## Generation of churn dummy data using Faker
This workbook generates dummy churn data using Faker for use in data visualization demos. 

In [1]:
from faker import Faker
import pandas as pd
import numpy as np
import datetime
from datetime import date
import random
import locale

### Parameters ###

In [2]:
# Locale for randomly generated data
faker = Faker('da_DK')

# Country for locations
location_country = 'DK'

# Number of records
records = 10

# Minimum customer age
min_age = 18

# Company start date
start_date = datetime.date(2010, 1, 1)

# Probability of churn
churn_rate = 25

In [3]:
# Custom competitor list
competitors = ['Insuralux','Arrowhead','Beacon','WeProtectYou','Unity','Capital Protectors','Smart Protect','Insurance For You','Smart Life Insurance','MutuTrust','InsCap']

### Generate data

In [39]:
# Generating dummy data
customer ={}

for n in range(records):
    customer[n]={}
    customer[n]['id']= faker.random_number(digits=5)
    customer[n]['name']= faker.name()
    customer[n]['address']= faker.address()
    customer[n]['city']= faker.city()
    customer[n]['post_code']= faker.postcode()
    # Danish locations on land
    customer[n]['location']= faker.local_latlng(location_country)
    customer[n]['email']= faker.email()
    customer[n]['phone']= faker.phone_number()
    # Only adult customers
    customer[n]['birth_date'] = faker.date_of_birth(minimum_age=min_age)
    customer[n]['start_date'] = faker.date_between(start_date)
    # % of churned customers
    customer[n]['churn']= faker.boolean(chance_of_getting_true=churn_rate)
    customer[n]['churn_date'] = faker.date_between(start_date)
    customer[n]['churn_probability'] = faker.pyfloat(min_value=0,max_value=1)
    customer[n]['competitor'] = random.choice(competitors)
    # Customer value assuming a normal distribution
    customer[n]['customer_value'] = np.random.normal(loc=25000,scale=10000)

In [40]:
# Convert dictionary to dataframes
churn_df = pd.DataFrame.from_dict(customer,orient='index')

### A little cleanup

In [41]:
churn_df.loc[churn_df['churn'] == False,'churn_date'] = np.nan

In [42]:
def calculate_age(born):
    today = date.today()
    return today.year - born.year - ((today.month, today.day) < (born.month, born.day))

In [43]:
churn_df['age'] = churn_df['birth_date'].apply(calculate_age)

In [44]:
churn_df.head()

Unnamed: 0,id,name,address,city,post_code,location,email,phone,birth_date,start_date,churn,churn_date,churn_probability,competitor,customer_value,age
0,40535,Univ.Prof. Malene Mathiasen,Kjeldsgårds Allé 923\n4937 Bogø By,Brønderslev,6458,"(55.67938, 12.53463, Frederiksberg, DK, Europe...",bolsen@example.org,27902379,1997-09-23,2013-06-05,False,,0.120037,Arrowhead,46270.495575,25
1,90389,Prof. Kirstine Paulsen,Ringholm Allé 712\n3146 Ulstrup,Trustrup,2959,"(55.67938, 12.53463, Frederiksberg, DK, Europe...",knudsentina@example.com,6947 9222,1963-07-27,2014-05-29,False,,0.5,WeProtectYou,21144.347073,59
2,89911,Prof. Line Madsen,Southamptongade 478\n2272 Tikøb,Vejle,1922,"(55.67938, 12.53463, Frederiksberg, DK, Europe...",lundfrode@example.net,+45 9891 2596,1940-04-06,2017-12-10,False,,0.826438,MutuTrust,32980.883229,82
3,19881,Sara Lind,Knabrovej 889\n7118 Stokkemarke,Bandholm,4789,"(55.67938, 12.53463, Frederiksberg, DK, Europe...",henriksenragnar@example.org,+45 36 74 28 29,1950-01-01,2021-03-17,False,,0.990467,WeProtectYou,14534.883861,73
4,11459,Bettina Lassen,Rysensteensgade 7\n1570 København,Strøby,7614,"(55.67938, 12.53463, Frederiksberg, DK, Europe...",uchristiansen@example.net,1004 8464,1934-07-30,2011-05-09,False,,0.13135,Insuralux,41816.922214,88


### Export to csv

In [45]:
# Export to csv
churn_df.to_csv('/Users/lars/churn.csv',header=True)