# Mock Data Creation

In [1]:
import pandas as pd
from mock_data_script import mock_data_generation as mds

In [2]:
d = []

for _ in range(10):
    weeks, IMEIs, credits_loaded, num_hrs, mdu, promos, times, active = mds.mock_churner_data()
    d.append(pd.DataFrame({
        'weeks':weeks, 
        'IMEI':IMEIs,
        'credits_loaded_per_week':credits_loaded,
        'num_hrs_spend_on_net':num_hrs,
        'mobile_data_usage(gb)':mdu,
        'promos_used':promos,
        'active_times':times,
        'activity_status':active
        }))

In [3]:
d[0] # sample output

Unnamed: 0,weeks,IMEI,credits_loaded_per_week,num_hrs_spend_on_net,mobile_data_usage(gb),promos_used,active_times,activity_status
0,2019-07-17,823865145970266,100,5.3,8.07,0,1,0
1,2019-07-24,823865145970266,0,17.4,8.6,0,0,0
2,2019-07-31,823865145970266,200,15.5,5.23,2,1,0
3,2019-08-07,823865145970266,20,13.2,6.87,2,0,0
4,2019-08-14,823865145970266,0,2.7,5.59,3,0,0
5,2019-08-21,823865145970266,30,14.4,6.87,0,2,0
6,2019-08-28,823865145970266,0,14.0,7.15,3,0,0
7,2019-09-04,823865145970266,0,4.3,6.63,2,2,0
8,2019-09-11,823865145970266,30,3.2,6.84,4,1,0
9,2019-09-18,823865145970266,800,5.1,6.39,1,2,0


In [4]:
# records of the weeks prior to confirming that each user has churned
for i in range(len(d)):
    print(len(d[i]),'weeks')

30 weeks
53 weeks
59 weeks
26 weeks
62 weeks
43 weeks
42 weeks
33 weeks
39 weeks
51 weeks


In [5]:
len(d) # number of users

10

### Putting Datas in Separate Sheets in an Excel File

In [6]:
with pd.ExcelWriter('data/mock/sample_mock_rc_data.xlsx') as writer:
    for i in range(len(d)):
        d[i].to_excel(writer, sheet_name=('d{}'.format(i)), index=False)

# Generating Rotational Churners

In [1]:
import numpy as np
import pandas as pd
from mock_data_script import mock_data_generation as mdg

dfs = pd.ExcelFile('data/mock/sample_mock_rc_data.xlsx')
sheets = dfs.sheet_names
d = []
for i in range(10):
    d.append(dfs.parse(sheet_name=sheets[i]))

In [7]:
d[0].head()

Unnamed: 0,weeks,IMEI,credits_loaded_per_week,num_hrs_spend_on_net,mobile_data_usage(gb),promos_used,active_times,activity_status
0,2019-07-17,823865145970266,100,5.3,8.07,0,1,0
1,2019-07-24,823865145970266,0,17.4,8.6,0,0,0
2,2019-07-31,823865145970266,200,15.5,5.23,2,1,0
3,2019-08-07,823865145970266,20,13.2,6.87,2,0,0
4,2019-08-14,823865145970266,0,2.7,5.59,3,0,0


In [3]:
weeks, IMEIs, credits_loaded, num_hrs, mdu, promos, times, active = mdg.mock_rc_data(d[0]['weeks'][25].strftime('%Y-%m-%d'), len(d[0])-4)

In [4]:
len(weeks)

26

In [34]:
l = d[0]['credits_loaded_per_week'].tolist()
uni = d[0]['credits_loaded_per_week'].unique().tolist()
x = d[0]['credits_loaded_per_week'].value_counts().reset_index().to_numpy().tolist()

In [60]:
d[0].columns[2]

'credits_loaded_per_week'

In [45]:
x[0]

[0, 18]

In [55]:
import random
pota = [i[1] for i in x]
random.shuffle(pota)
pota

[1, 2, 1, 2, 1, 18, 1, 2, 1, 1]

In [57]:
for j in range(len(x)):
    x[j][1] = pota[j]

x

[[0, 1],
 [30, 2],
 [20, 1],
 [100, 2],
 [90, 1],
 [500, 18],
 [50, 1],
 [200, 2],
 [800, 1],
 [1000, 1]]

In [61]:
n = []

while True:
    r = 0
    t = x[r][0]

    m = x[r][1]
    for i in range(m):
        n.append(t)
    
    r+=1

    if r >= len(x):
        break 

[100,
 0,
 200,
 20,
 0,
 30,
 0,
 0,
 30,
 800,
 0,
 0,
 20,
 0,
 90,
 0,
 0,
 0,
 0,
 50,
 500,
 0,
 100,
 0,
 1000,
 0,
 0,
 0,
 0,
 0]

In [66]:
h = d[0]['credits_loaded_per_week'].values.tolist()