# Mock Data Creation

In [1]:
import pandas as pd
from mock_data_script import mock_data_generation as mds

## To create a mock data of rotational churners, we need a data of churners and use those as a basis of the data that are going to be "rotational churners". So we take the values from the churner data and add or subtract a few values in order to show that they have similar behavior, but may vary due to slight changes in their behavior.

In [2]:
d = []

for _ in range(20):
    weeks, IMEIs, phone_nums, credits_loaded, num_hrs, mdu, promos, times, active = mds.mock_churner_data()
    d.append(pd.DataFrame({
        'weeks':weeks, 
        'IMEI':IMEIs,
        'phone_nums':phone_nums,
        'credits_loaded_per_week':credits_loaded,
        'num_hrs_spend_on_net':num_hrs,
        'mobile_data_usage(gb)':mdu,
        'promos_used':promos,
        'active_times':times,
        'activity_status':active
        }))

In [3]:
d[0] # sample output

Unnamed: 0,weeks,IMEI,phone_nums,credits_loaded_per_week,num_hrs_spend_on_net,mobile_data_usage(gb),promos_used,active_times,activity_status
0,2018-04-17,939022743589095,9334425708,0,11.1,8.45,0,1,0
1,2018-04-24,939022743589095,9334425708,0,6.5,5.73,6,1,0
2,2018-05-01,939022743589095,9334425708,0,17.7,8.28,5,2,0
3,2018-05-08,939022743589095,9334425708,0,22.3,7.34,3,0,0
4,2018-05-15,939022743589095,9334425708,0,15.4,8.98,4,1,0
5,2018-05-22,939022743589095,9334425708,100,19.2,5.61,8,1,0
6,2018-05-29,939022743589095,9334425708,70,16.4,6.06,0,1,0
7,2018-06-05,939022743589095,9334425708,0,16.2,8.39,7,0,0
8,2018-06-12,939022743589095,9334425708,70,14.3,5.42,3,2,0
9,2018-06-19,939022743589095,9334425708,0,2.6,6.02,0,1,0


In [4]:
# records of the weeks prior to confirming that each user has churned
for i in range(len(d)):
    print(len(d[i]),'weeks')

50 weeks
39 weeks
20 weeks
56 weeks
24 weeks
30 weeks
73 weeks
31 weeks
30 weeks
53 weeks
66 weeks
61 weeks
23 weeks
12 weeks
75 weeks
49 weeks
29 weeks
29 weeks
13 weeks
29 weeks


In [5]:
len(d) # number of users

20

### Putting Datas in Separate Sheets in an Excel File

In [6]:
with pd.ExcelWriter('data/mock/sample_mock_c_data.xlsx') as writer:
    for i in range(len(d)):
        d[i].to_excel(writer, sheet_name=('d{}'.format(i)), index=False)

# Generating Rotational Churners

In [7]:
import numpy as np
import pandas as pd
from mock_data_script import mock_data_generation as mdg

dfs = pd.ExcelFile('data/mock/sample_mock_c_data.xlsx')
sheets = dfs.sheet_names
d = []
for i in range(5):
    d.append(dfs.parse(sheet_name=sheets[i]))

In [8]:
d[0].head()

Unnamed: 0,weeks,IMEI,phone_nums,credits_loaded_per_week,num_hrs_spend_on_net,mobile_data_usage(gb),promos_used,active_times,activity_status
0,2018-04-17,939022743589095,9334425708,0,11.1,8.45,0,1,0
1,2018-04-24,939022743589095,9334425708,0,6.5,5.73,6,1,0
2,2018-05-01,939022743589095,9334425708,0,17.7,8.28,5,2,0
3,2018-05-08,939022743589095,9334425708,0,22.3,7.34,3,0,0
4,2018-05-15,939022743589095,9334425708,0,15.4,8.98,4,1,0


In [9]:
len(times)

29

In [10]:
rc = []

for i in range(5):
        weeks, IMEIs, phone_nums, credits_loaded, num_hrs, mdu, promos, times, active = mdg.mock_rc_data(d[i]['weeks'][len(d[i])-4].strftime('%Y-%m-%d'), len(d[0])-4, d[0][:][:-4])
        rc.append(
                pd.DataFrame({
                'weeks':weeks, 
                'IMEI':IMEIs,
                'phone_nums':phone_nums,
                'credits_loaded_per_week':credits_loaded,
                'num_hrs_spend_on_net':num_hrs,
                'mobile_data_usage(gb)':mdu,
                'promos_used':promos,
                'active_times':times,
                'activity_status':active
                })
        )

active_times
active_times
active_times
active_times
active_times


In [11]:
rc[0]

Unnamed: 0,weeks,IMEI,phone_nums,credits_loaded_per_week,num_hrs_spend_on_net,mobile_data_usage(gb),promos_used,active_times,activity_status
0,2019-04-09,916288205654472,9667616603,0,11.1,7.2,0,0,0
1,2019-04-16,916288205654472,9667616603,0,6.5,5.7,4,0,0
2,2019-04-23,916288205654472,9667616603,20,18.2,7.3,4,1,0
3,2019-04-30,916288205654472,9667616603,0,22.3,6.4,3,1,0
4,2019-05-07,916288205654472,9667616603,30,15.4,9.0,5,1,0
5,2019-05-14,916288205654472,9667616603,90,19.2,6.5,9,1,0
6,2019-05-21,916288205654472,9667616603,50,15.1,5.6,0,1,0
7,2019-05-28,916288205654472,9667616603,20,16.2,8.4,7,1,0
8,2019-06-04,916288205654472,9667616603,90,13.5,5.4,1,2,0
9,2019-06-11,916288205654472,9667616603,30,3.2,6.9,0,2,0


In [12]:
with pd.ExcelWriter('data/mock/sample_mock_rc_data.xlsx') as writer:
    for i in range(len(rc)):
        rc[i].to_excel(writer, sheet_name=('rc{}'.format(i)), index=False)