# Mock Data

In [1]:
import numpy as np
import pandas as pd

In [2]:
ibm_data = 'data/kaggle/IBM_telco_churn_data.csv'
sample_data = 'data/archive/telecom_sampledata.csv'

df = pd.read_csv(ibm_data)
df.head()

Unnamed: 0.1,Unnamed: 0,CustomerID,Count,Country,State,City,Zip Code,Lat Long,Latitude,Longitude,...,Contract,Paperless Billing,Payment Method,Monthly Charges,Total Charges,Churn Label,Churn Value,Churn Score,CLTV,Churn Reason
0,0,3668-QPYBK,1,United States,California,Los Angeles,90003,"33.964131, -118.272783",33.964131,-118.272783,...,Month-to-month,Yes,Mailed check,53.85,108.15,Yes,1,86,3239,Competitor made better offer
1,1,9237-HQITU,1,United States,California,Los Angeles,90005,"34.059281, -118.30742",34.059281,-118.30742,...,Month-to-month,Yes,Electronic check,70.7,151.65,Yes,1,67,2701,Moved
2,2,9305-CDSKC,1,United States,California,Los Angeles,90006,"34.048013, -118.293953",34.048013,-118.293953,...,Month-to-month,Yes,Electronic check,99.65,820.5,Yes,1,86,5372,Moved
3,3,7892-POOKP,1,United States,California,Los Angeles,90010,"34.062125, -118.315709",34.062125,-118.315709,...,Month-to-month,Yes,Electronic check,104.8,3046.05,Yes,1,84,5003,Moved
4,4,0280-XJGEX,1,United States,California,Los Angeles,90015,"34.039224, -118.266293",34.039224,-118.266293,...,Month-to-month,Yes,Bank transfer (automatic),103.7,5036.3,Yes,1,89,5340,Competitor had better devices


In [3]:
df.columns

Index(['Unnamed: 0', 'CustomerID', 'Count', 'Country', 'State', 'City',
       'Zip Code', 'Lat Long', 'Latitude', 'Longitude', 'Gender',
       'Senior Citizen', 'Partner', 'Dependents', 'Tenure Months',
       'Phone Service', 'Multiple Lines', 'Internet Service',
       'Online Security', 'Online Backup', 'Device Protection', 'Tech Support',
       'Streaming TV', 'Streaming Movies', 'Contract', 'Paperless Billing',
       'Payment Method', 'Monthly Charges', 'Total Charges', 'Churn Label',
       'Churn Value', 'Churn Score', 'CLTV', 'Churn Reason'],
      dtype='object')

In [4]:
len(df)

7043

In [5]:
df = df.drop(columns=df.columns[0])
df.head()

Unnamed: 0,CustomerID,Count,Country,State,City,Zip Code,Lat Long,Latitude,Longitude,Gender,...,Contract,Paperless Billing,Payment Method,Monthly Charges,Total Charges,Churn Label,Churn Value,Churn Score,CLTV,Churn Reason
0,3668-QPYBK,1,United States,California,Los Angeles,90003,"33.964131, -118.272783",33.964131,-118.272783,Male,...,Month-to-month,Yes,Mailed check,53.85,108.15,Yes,1,86,3239,Competitor made better offer
1,9237-HQITU,1,United States,California,Los Angeles,90005,"34.059281, -118.30742",34.059281,-118.30742,Female,...,Month-to-month,Yes,Electronic check,70.7,151.65,Yes,1,67,2701,Moved
2,9305-CDSKC,1,United States,California,Los Angeles,90006,"34.048013, -118.293953",34.048013,-118.293953,Female,...,Month-to-month,Yes,Electronic check,99.65,820.5,Yes,1,86,5372,Moved
3,7892-POOKP,1,United States,California,Los Angeles,90010,"34.062125, -118.315709",34.062125,-118.315709,Female,...,Month-to-month,Yes,Electronic check,104.8,3046.05,Yes,1,84,5003,Moved
4,0280-XJGEX,1,United States,California,Los Angeles,90015,"34.039224, -118.266293",34.039224,-118.266293,Male,...,Month-to-month,Yes,Bank transfer (automatic),103.7,5036.3,Yes,1,89,5340,Competitor had better devices


### Establishing Fake Dates in order to create the illusion of a time-series data



In [6]:
# creating mock data called date-of-churn
np.random.seed(0)
churn_dates = np.random.choice(pd.date_range('2018-01-01','2020-08-01'), len(df))
churn_dates

array(['2019-11-16T00:00:00.000000000', '2019-07-14T00:00:00.000000000',
       '2019-09-22T00:00:00.000000000', ...,
       '2019-09-24T00:00:00.000000000', '2019-08-31T00:00:00.000000000',
       '2018-02-15T00:00:00.000000000'], dtype='datetime64[ns]')

In [7]:
# insert mock data to the dataframe
df.insert(loc=28, column='churn_date', value=churn_dates)

In [8]:
df.head()

Unnamed: 0,CustomerID,Count,Country,State,City,Zip Code,Lat Long,Latitude,Longitude,Gender,...,Paperless Billing,Payment Method,Monthly Charges,Total Charges,churn_date,Churn Label,Churn Value,Churn Score,CLTV,Churn Reason
0,3668-QPYBK,1,United States,California,Los Angeles,90003,"33.964131, -118.272783",33.964131,-118.272783,Male,...,Yes,Mailed check,53.85,108.15,2019-11-16,Yes,1,86,3239,Competitor made better offer
1,9237-HQITU,1,United States,California,Los Angeles,90005,"34.059281, -118.30742",34.059281,-118.30742,Female,...,Yes,Electronic check,70.7,151.65,2019-07-14,Yes,1,67,2701,Moved
2,9305-CDSKC,1,United States,California,Los Angeles,90006,"34.048013, -118.293953",34.048013,-118.293953,Female,...,Yes,Electronic check,99.65,820.5,2019-09-22,Yes,1,86,5372,Moved
3,7892-POOKP,1,United States,California,Los Angeles,90010,"34.062125, -118.315709",34.062125,-118.315709,Female,...,Yes,Electronic check,104.8,3046.05,2018-07-12,Yes,1,84,5003,Moved
4,0280-XJGEX,1,United States,California,Los Angeles,90015,"34.039224, -118.266293",34.039224,-118.266293,Male,...,Yes,Bank transfer (automatic),103.7,5036.3,2020-04-15,Yes,1,89,5340,Competitor had better devices


In [9]:
df = df.sort_values(by='churn_date')
df.head()

Unnamed: 0,CustomerID,Count,Country,State,City,Zip Code,Lat Long,Latitude,Longitude,Gender,...,Paperless Billing,Payment Method,Monthly Charges,Total Charges,churn_date,Churn Label,Churn Value,Churn Score,CLTV,Churn Reason
627,1623-NLDOT,1,United States,California,Lancaster,93534,"34.727529, -118.153098",34.727529,-118.153098,Female,...,No,Mailed check,33.55,1445.3,2018-01-01,Yes,1,82,2044,Competitor had better devices
1549,0733-VUNUW,1,United States,California,San Francisco,94114,"37.758085, -122.434801",37.758085,-122.434801,Male,...,Yes,Bank transfer (automatic),55.65,1400.55,2018-01-01,Yes,1,97,4126,Competitor had better devices
4597,1137-DGOWI,1,United States,California,Coachella,92236,"33.680031, -116.171678",33.680031,-116.171678,Female,...,No,Bank transfer (automatic),70.25,2198.9,2018-01-01,No,0,66,5417,
1048,2860-RANUS,1,United States,California,Mc Kittrick,93251,"35.38381, -119.730885",35.38381,-119.730885,Female,...,Yes,Credit card (automatic),85.8,1727.5,2018-01-01,Yes,1,75,3336,Price too high
3012,9957-YODKZ,1,United States,California,Canby,96015,"41.486953, -120.913975",41.486953,-120.913975,Male,...,Yes,Electronic check,80.8,457.1,2018-01-01,No,0,68,5611,
