# Creating Mock Rotational Churn Data

In [1]:
import numpy as np
import pandas as pd
from random import seed
from random import randint
import datetime

# My python script for creating mock data
import mock_data_script as mds

In [2]:
# Function for randomizing how many times users would appear in the data to simulate creating multiple unique call entries (1-15)

inst = mds.instances()
print('We will be generating {} instances of the user.'.format(inst))

We will be generating 16 instances of the user.


# Creating Unique Entries

In [3]:
def unique_entry(ins):
    user_imei = []
    user_imsi = []
    user_msisdn = []
    user_phone_num = []
    contacted_nums = []
    called_nums = []
    texted_nums = []
    call_site = []
    text_site = []
    contact_date = []
    user_apps = []
    net_hrs_spent = []
    load = []
    md_usage = []
    active_time = []

    
    uni_imei = mds.generate_IMEI()
    uni_imsi = mds.generate_IMSI()
    uni_msisdn = mds.generate_MSISDN()
    uni_pn = '09' + uni_msisdn

    for _ in range(ins):
        user_imei.append(uni_imei)
        user_imsi.append(uni_imsi)
        user_msisdn.append('639' + uni_msisdn)
        user_phone_num.append(uni_pn)

        uni_called = mds.generate_YN()
        uni_texted = mds.generate_YN()
        called_nums.append(uni_called)
        texted_nums.append(uni_texted)
        call_site.append(mds.generate_cell_tower(uni_called))
        text_site.append(mds.generate_cell_tower(uni_texted))
        contact_date.append(mds.generate_date())
        user_apps.append(mds.generate_apps())

        net_hrs = mds.generate_num_of_hrs()
        net_hrs_spent.append(net_hrs)

        uni_load = mds.generate_avg_load()
        load.append(uni_load)

        md_usage.append(mds.generate_md_usage())
        active_time.append(mds.generate_active_time())

        # Just in case the generated called number is equal to one's own number.
        valid=True
        while(valid):
            contacted = mds.generate_PN()
            if contacted != uni_pn:
                contacted_nums.append(mds.generate_PN())
                valid=False

    return user_imei, user_imsi, user_msisdn, user_phone_num, contacted_nums, called_nums, texted_nums, call_site, text_site, contact_date, user_apps, net_hrs_spent, load, md_usage, active_time

In [4]:
IMEIs, IMSIs, MSISDNs, PNs, Contacted, Called, Texted, cSite, tSite, con_date, apps_used, net_hrs_avg, load_amt, mobile_data, active_time_day_night = unique_entry(inst)

In [5]:
mock = {'IMEIs':IMEIs, 'IMSIs':IMSIs, 'MSISDNs':MSISDNs, 'Phone_Num':PNs, 'Contacted_Nums':Contacted, 'Called':Called, 'Texted':Texted, 'call_site':cSite, 'text_site':tSite, 'contact_date':con_date, 'user_apps':apps_used, 'net_hrs_spent(hrs)':net_hrs_avg, 'load_amt(php)':load_amt, 'mobile_data_usage(gb)':mobile_data, 'active_time':active_time_day_night}

In [6]:
df = pd.DataFrame(data=mock)

for i in range(500):
    inst = mds.instances()
    IMEIs, IMSIs, MSISDNs, PNs, Contacted, Called, Texted, cSite, tSite, con_date, apps_used, net_hrs_avg, load_amt, mobile_data, active_time_day_night = unique_entry(inst)
    mock = {
        'IMEIs':IMEIs, 
        'IMSIs':IMSIs, 
        'MSISDNs':MSISDNs, 
        'Phone_Num':PNs, 
        'Contacted_Nums':Contacted, 
        'Called':Called, 
        'Texted':Texted, 
        'call_site':cSite, 
        'text_site':tSite, 
        'contact_date':con_date, 
        'user_apps':apps_used, 
        'net_hrs_spent(hrs)':net_hrs_avg, 
        'load_amt(php)':load_amt, 
        'mobile_data_usage(gb)':mobile_data, 
        'active_time':active_time_day_night
        }

    x = pd.DataFrame(data=mock)
    df = pd.concat([df,x], ignore_index=True)

df.head()

Unnamed: 0,IMEIs,IMSIs,MSISDNs,Phone_Num,Contacted_Nums,Called,Texted,call_site,text_site,contact_date,user_apps,net_hrs_spent(hrs),load_amt(php),mobile_data_usage(gb),active_time
0,219742189102274,515022095584684,639904771101,9904771101,9254175505,no,yes,NS,T599,2019-08-09,"paypal, viber, YT Music, lazada, twitch, discord",1.3,96,4.51,day
1,219742189102274,515022095584684,639904771101,9904771101,9241347050,yes,yes,T2129,T1056,2020-11-07,"paymaya, messenger, gcash, reddit, shopee, twi...",3.5,260,3.15,night
2,219742189102274,515022095584684,639904771101,9904771101,9914781099,yes,yes,T10193,T7592,2020-10-02,"chrome, twitch, discord, viber, youtube, paypa...",11.4,83,4.82,night
3,219742189102274,515022095584684,639904771101,9904771101,9538031002,yes,no,T3938,NS,2020-02-03,"discord, grab, viber, paypal, paymaya, youtube...",19.6,208,1.53,night
4,219742189102274,515022095584684,639904771101,9904771101,9251415391,yes,no,T2028,NS,2019-09-27,"discord, chrome, YT Music, netflix, spotify, y...",7.1,347,2.24,night


In [7]:
len(df)

6677

In [8]:
#df['IMEIs'] = df['IMEIs'].astype('str')
#df['IMSIs'] = df['IMSIs'].astype('str')
#df['MSISDNs'] = df['MSISDNs'].astype('str')
#df['Phone_Num'] = df['Phone_Num'].astype('str')
#df['Contacted_Nums'] = df['Contacted_Nums'].astype('str')
df.to_excel('data/mock/rotational_churn_mock_data.xlsx',index=False)
df.to_csv('data/mock/rotational_churn_mock_data.csv',index=False)

In [9]:
data = pd.read_excel('data/mock/rotational_churn_mock_data.xlsx', dtype='str')
data.head()

Unnamed: 0,IMEIs,IMSIs,MSISDNs,Phone_Num,Contacted_Nums,Called,Texted,call_site,text_site,contact_date,user_apps,net_hrs_spent(hrs),load_amt(php),mobile_data_usage(gb),active_time
0,219742189102274,515022095584684,639904771101,9904771101,9254175505,no,yes,NS,T599,2019-08-09 00:00:00,"paypal, viber, YT Music, lazada, twitch, discord",1.3,96,4.51,day
1,219742189102274,515022095584684,639904771101,9904771101,9241347050,yes,yes,T2129,T1056,2020-11-07 00:00:00,"paymaya, messenger, gcash, reddit, shopee, twi...",3.5,260,3.15,night
2,219742189102274,515022095584684,639904771101,9904771101,9914781099,yes,yes,T10193,T7592,2020-10-02 00:00:00,"chrome, twitch, discord, viber, youtube, paypa...",11.4,83,4.82,night
3,219742189102274,515022095584684,639904771101,9904771101,9538031002,yes,no,T3938,NS,2020-02-03 00:00:00,"discord, grab, viber, paypal, paymaya, youtube...",19.6,208,1.53,night
4,219742189102274,515022095584684,639904771101,9904771101,9251415391,yes,no,T2028,NS,2019-09-27 00:00:00,"discord, chrome, YT Music, netflix, spotify, y...",7.1,347,2.24,night


In [10]:
data = pd.read_csv('data/mock/rotational_churn_mock_data.csv', dtype='str')
data.head()

Unnamed: 0,IMEIs,IMSIs,MSISDNs,Phone_Num,Contacted_Nums,Called,Texted,call_site,text_site,contact_date,user_apps,net_hrs_spent(hrs),load_amt(php),mobile_data_usage(gb),active_time
0,219742189102274,515022095584684,639904771101,9904771101,9254175505,no,yes,NS,T599,2019-08-09,"paypal, viber, YT Music, lazada, twitch, discord",1.3,96,4.51,day
1,219742189102274,515022095584684,639904771101,9904771101,9241347050,yes,yes,T2129,T1056,2020-11-07,"paymaya, messenger, gcash, reddit, shopee, twi...",3.5,260,3.15,night
2,219742189102274,515022095584684,639904771101,9904771101,9914781099,yes,yes,T10193,T7592,2020-10-02,"chrome, twitch, discord, viber, youtube, paypa...",11.4,83,4.82,night
3,219742189102274,515022095584684,639904771101,9904771101,9538031002,yes,no,T3938,NS,2020-02-03,"discord, grab, viber, paypal, paymaya, youtube...",19.6,208,1.53,night
4,219742189102274,515022095584684,639904771101,9904771101,9251415391,yes,no,T2028,NS,2019-09-27,"discord, chrome, YT Music, netflix, spotify, y...",7.1,347,2.24,night
