# Mock Data Creation

In [1]:
import pandas as pd
from mock_data_script import mock_data_generation as mds

In [2]:
d = []

for _ in range(20):
    weeks, IMEIs, phone_nums, credits_loaded, num_hrs, mdu, promos, times, active = mds.mock_churner_data()
    d.append(pd.DataFrame({
        'weeks':weeks, 
        'IMEI':IMEIs,
        'phone_nums':phone_nums,
        'credits_loaded_per_week':credits_loaded,
        'num_hrs_spend_on_net':num_hrs,
        'mobile_data_usage(gb)':mdu,
        'promos_used':promos,
        'active_times':times,
        'activity_status':active
        }))

In [3]:
d[0] # sample output

Unnamed: 0,weeks,IMEI,phone_nums,credits_loaded_per_week,num_hrs_spend_on_net,mobile_data_usage(gb),promos_used,active_times,activity_status
0,2017-12-05,479233277369616,09379867474,0,21.1,6.24,2,1,0
1,2017-12-12,479233277369616,09379867474,1000,8.5,7.55,4,1,0
2,2017-12-19,479233277369616,09379867474,0,14.9,6.46,10,0,0
3,2017-12-26,479233277369616,09379867474,0,4.2,6.34,10,1,0
4,2018-01-02,479233277369616,09379867474,0,20.8,5.72,2,1,0
...,...,...,...,...,...,...,...,...,...
64,2019-02-26,479233277369616,09379867474,500,6.7,5.53,6,0,0
65,2019-03-05,479233277369616,09379867474,0,0.0,0.00,0,0,1
66,2019-03-12,479233277369616,09379867474,0,0.0,0.00,0,0,1
67,2019-03-19,479233277369616,09379867474,0,0.0,0.00,0,0,1


In [4]:
# records of the weeks prior to confirming that each user has churned
for i in range(len(d)):
    print(len(d[i]),'weeks')

69 weeks
52 weeks
56 weeks
28 weeks
42 weeks
30 weeks
36 weeks
16 weeks
29 weeks
72 weeks
49 weeks
63 weeks
51 weeks
73 weeks
49 weeks
23 weeks
56 weeks
38 weeks
55 weeks
50 weeks


In [5]:
len(d) # number of users

20

### Putting Datas in Separate Sheets in an Excel File

In [6]:
with pd.ExcelWriter('data/mock/sample_mock_c_data.xlsx') as writer:
    for i in range(len(d)):
        d[i].to_excel(writer, sheet_name=('d{}'.format(i)), index=False)

# Generating Rotational Churners

In [7]:
import numpy as np
import pandas as pd
from mock_data_script import mock_data_generation as mdg

dfs = pd.ExcelFile('data/mock/sample_mock_c_data.xlsx')
sheets = dfs.sheet_names
d = []
for i in range(5):
    d.append(dfs.parse(sheet_name=sheets[i]))

In [8]:
d[0].head()

Unnamed: 0,weeks,IMEI,phone_nums,credits_loaded_per_week,num_hrs_spend_on_net,mobile_data_usage(gb),promos_used,active_times,activity_status
0,2020-02-11,882906772575586,9459593810,30,3.1,8.0,10,2,0
1,2020-02-18,882906772575586,9459593810,90,6.0,5.1,6,2,0
2,2020-02-25,882906772575586,9459593810,150,20.3,10.0,4,2,0
3,2020-03-03,882906772575586,9459593810,70,20.3,9.3,7,1,0
4,2020-03-10,882906772575586,9459593810,1000,14.3,5.1,3,0,0


In [9]:
len(times)

50

In [10]:
rc = []

for i in range(5):
        weeks, IMEIs, phone_nums, credits_loaded, num_hrs, mdu, promos, times, active = mdg.mock_rc_data(d[i]['weeks'][len(d[i])-4].strftime('%Y-%m-%d'), len(d[0])-4, d[0][:][:-4])
        rc.append(
                pd.DataFrame({
                'weeks':weeks, 
                'IMEI':IMEIs,
                'phone_nums':phone_nums,
                'credits_loaded_per_week':credits_loaded,
                'num_hrs_spend_on_net':num_hrs,
                'mobile_data_usage(gb)':mdu,
                'promos_used':promos,
                'active_times':times,
                'activity_status':active
                })
        )

active_times
active_times
active_times
active_times
active_times


In [11]:
rc[0]

Unnamed: 0,weeks,IMEI,phone_nums,credits_loaded_per_week,num_hrs_spend_on_net,mobile_data_usage(gb),promos_used,active_times,activity_status
0,2020-12-08,5873123835126710,9675871650,20,1.1,6.4,10,2,0
1,2020-12-15,5873123835126710,9675871650,1000,6.9,5.1,8,2,0
2,2020-12-22,5873123835126710,9675871650,1000,20.3,11.2,2,1,0
3,2020-12-29,5873123835126710,9675871650,1000,18.8,7.6,8,1,0
4,2021-01-05,5873123835126710,9675871650,800,14.3,5.1,3,0,0
5,2021-01-12,5873123835126710,9675871650,100,1.9,6.4,0,2,0
6,2021-01-19,5873123835126710,9675871650,70,9.5,9.9,8,1,0
7,2021-01-26,5873123835126710,9675871650,0,1.1,4.8,8,0,0
8,2021-02-02,5873123835126710,9675871650,0,17.2,7.1,4,1,0
9,2021-02-09,5873123835126710,9675871650,1000,7.5,10.9,3,0,0


In [12]:
with pd.ExcelWriter('data/mock/sample_mock_rc_data.xlsx') as writer:
    for i in range(len(rc)):
        rc[i].to_excel(writer, sheet_name=('rc{}'.format(i)), index=False)