In [1]:
import numpy as np
import pandas as pd
import plotly.express as px
import scipy as sc
import uuid

In [2]:
#read in population distribution source: https://www.populationpyramid.net/northern-europe/1969/
population = pd.read_csv('data/AFRICA-1970.csv') 

In [3]:
#create initial population 
genders = ['male', 'female']
location = ['city','suburbs','rural']
locationw = [0.7,0.2,0.1]
saleschannels = ['online','phone','shop']
players = np.array(['telcom','comline','fastsat'])


#probability tables of population using a channel based on income & location
ruralwdata = [[0.1,0.3,0.5], [0.1,0.2,0.8], [0.1,0.2,0.7], [0.1,0,0.3]] 
rw = pd.DataFrame(data=ruralwdata, columns=['city','suburbs','rural'],index=['low','med','high','v-high']) 

radiodata = [[0.2,0.1,0.1], [0.3,0.4,0.2], [0.5,0.4,0.3], [0.5,0.5,0.5]] 
radio = pd.DataFrame(data=radiodata, columns=['city','suburbs','rural'],index=['low','med','high','v-high']) 

tvdata = [[0.1,0,0], [0.2,0.1,0], [0.3,0.1,0], [0.3,0.2,0.1]] 
tv = pd.DataFrame(data=tvdata, columns=['city','suburbs','rural'],index=['low','med','high','v-high']) 

 
def create_initial_population(popsize, region):
    df = pd.DataFrame(columns=['closed-mcalling-minutes', 'closed-mcalling-price', 'closed-mcalling-delta'])

    population = pd.read_csv(region)  
    #population = pd.read_csv('data/AFRICA-1970.csv')  
    population['Age'] = [0,5,10,15,20,25,30,35,40,45,50,55,60,65,70,75,80,85,90,95,100]

    population['Mp']=(population['M']/(population['M'].sum()))
    population['Fp']=(population['F']/(population['F'].sum()))
    
    year = 1970
    ages = population['Age']
    weights = population['Mp'].values
    df['income']= np.random.beta(1,6,size=popsize)*128
    df['age'] = np.random.choice(ages,size=popsize,p=weights)+np.random.randint(1,6,size=popsize)
    df['gender'] = np.random.choice(genders,size=df.shape[0])
    df['bdate'] = year-df['age']
    df['uuid'] = [uuid.uuid4() for _ in range(len(df.index))]
    #df['mcalling'] = np.where(df['age']>14, np.random.randint(0,1000,size=popsize), 0)
    df['need-mcalling'] = np.where(df['age']>14, np.random.normal(500,scale=250,size=popsize), 0)
    df['died'] = 'alive'
    df['location'] = np.random.choice(location,p=locationw,size=df.shape[0])
    #dotomodel for channel
    df['channel-online'] = 0
    df['channel-phone'] = 0
    df['channel-shop'] = 1
    df['satisfaction'] = 0
    df['channel'] = 'none'
    df['sample-awareness'] = 'none'
    df['source'] = 'new'
    df['closed-company']= 'none'
    
    return df

def newbirths(year,growthrate,df,rw,radio,tv):
    bf = pd.DataFrame()
    numberofpeople = int(len(df.loc[df['died'] == 'alive'])/1000*growthrate)
    print('Births:', numberofpeople)
    bf['gender'] = np.random.choice(genders,size=numberofpeople)
    bf['age'] = 1
    bf['bdate'] = year
    bf['income'] = np.random.beta(1,6,size=numberofpeople)*128
    bf['uuid'] = [uuid.uuid4() for _ in range(len(bf.index))]
    bf['died'] = 'alive'
    #doto funtion that controls the customers channel preference
    bf['channel-online'] = 0
    bf['channel-phone'] = 0
    bf['channel-shop'] = 1
    bf['satisfaction'] = 0.00
    bf['location'] = np.random.choice(location,p=locationw,size=numberofpeople)
    bf['need-mcalling'] = 0
    bf['died'] = 'alive'
    #dotomodel for channel
    bf['channel-online'] = 0
    bf['channel-phone'] = 0
    bf['channel-shop'] = 1
    bf['satisfaction'] = 0
    bf['channel'] = 'none'
    bf['sample-awareness'] = 'none'
    bf['source'] = 'new'
    bf['closed-company']= 'none'
    bf['incomelabel'] = np.where(bf['income']>32,'v-high',np.where(bf['income']>8,'high',np.where(bf['income']>2,'med','low')))
    bf = bf.set_index(['incomelabel','location'])
    bf['consume-rw']=rw.stack()
    bf['consume-radio']=radio.stack()
    bf['consume-tv']=tv.stack()
    bf = bf.reset_index()
    
    return bf

def mortality(inputage,lifeexp):
    #survival function
    dp= 1-(np.exp(0.0675*inputage-lifeexp)*0.0012)
    return dp

 
def deaths(lifeexp, year,df):
    #df['random'] = np.random.beta(1, 5, df.shape[0])
    df['random'] = np.random.random(df.shape[0])
    df['survivalprob'] = np.where(mortality(df['age'].astype(float),lifeexp)<0, 0.02,mortality(df['age'].astype(float),lifeexp))
    
    
    df['died'] = np.where(df['died'] == 'alive', np.where(df['survivalprob'] < df['random'], year, df['died']),df['died'])

    return df

def need(df):
    no_m_calling_need = df.loc[df['need-mcalling']==0]
    
    callingneed = np.where(no_m_calling_need['age']>14, np.random.normal(500,scale=250,size=no_m_calling_need.shape[0]), 0)
    #callingage= np.where(df['age']<14, callingneed, 0)
    

In [4]:
tv

Unnamed: 0,city,suburbs,rural
low,0.1,0.0,0.0
med,0.2,0.1,0.0
high,0.3,0.1,0.0
v-high,0.3,0.2,0.1


In [5]:
radio

Unnamed: 0,city,suburbs,rural
low,0.2,0.1,0.1
med,0.3,0.4,0.2
high,0.5,0.4,0.3
v-high,0.5,0.5,0.5


In [6]:
#use channel probability tables to populate population table
#to-do run this after newbirths only for new people



In [7]:
rw

Unnamed: 0,city,suburbs,rural
low,0.1,0.3,0.5
med,0.1,0.2,0.8
high,0.1,0.2,0.7
v-high,0.1,0.0,0.3


In [8]:
#marketing budget tables
#(['telcom','comline','fastsat'])
def marketing():
    marketing_index = ['rw','radio','tv']

    marketingbudget_rw = np.array([500, 200, 50])
    marketingbudget_rw = (marketingbudget_rw/marketingbudget_rw.sum())

    marketingbudget_radio = np.array([50, 0, 400])
    marketingbudget_radio = (marketingbudget_radio/marketingbudget_radio.sum())

    marketingbudget_tv = np.array([1000, 2000, 400])
    marketingbudget_tv = (marketingbudget_tv/marketingbudget_tv.sum())

    #marketing dataframe
    marketing_df = pd.DataFrame(data=[marketingbudget_rw,marketingbudget_radio,marketingbudget_tv], columns=players, index=marketing_index)
    return marketing_df

In [9]:
marketing()

Unnamed: 0,telcom,comline,fastsat
rw,0.666667,0.266667,0.066667
radio,0.111111,0.0,0.888889
tv,0.294118,0.588235,0.117647


In [10]:
#assign company to each person based on awarness (or none if total awareness is a bit low) and a bit of chance
#and only if satsifcation is < 1

#def awareness_loop():

def awareness(df):
    marketing_df = marketing()
    #display(marketing_df)
    asum = 0
    df['sample-awareness'] = 'none'
    for i in players:
        awarness_company = 'awareness-{}'.format(i)
        df[awarness_company] = marketing_df[i]['rw']*df['consume-rw']*np.random.choice([0,0.5,1],size=df.shape[0])+marketing_df[i]['radio']*df['consume-radio']*np.random.choice([0,0.5,1],size=df.shape[0])+marketing_df[i]['tv']*df['consume-tv']*np.random.choice([0,0.5,1],size=df.shape[0])
        asum = asum + df[awarness_company] 
    df['awarenessum'] =  asum 

    for i in players:
        awarness_n = 'awareness-{}-n'.format(i)
        awarness_company = 'awareness-{}'.format(i)
        df[awarness_n] = (df[awarness_company]/df['awarenessum'])
        
    
    
    for index, row in df.iterrows():
        p = [row['awareness-telcom-n'],row['awareness-comline-n'],row['awareness-fastsat-n']] #make it dynamic
        pchannel = [row['channel-online'],row['channel-phone'],row['channel-shop']]

        channel = np.random.choice(saleschannels,p=pchannel)
        try:
            choice = np.random.choice(players,p=p)
        except ValueError:
            pass
        df.at[index, 'channel'] = channel

        if row['awarenessum'] > 0.4  and np.random.rand() > 0.75 and row['satisfaction'] < 0.1:
            #print(row)
            df.at[index, 'sample-awareness'] = choice
        else:
            #print('smth')
            df.at[index, 'sample-awareness'] = 'none'




In [11]:
#players = np.array(['telcom','comline','fastsat'])
shops_df = pd.DataFrame()
shops_df['company'] = players
shops_df['shops'] = np.array([1, 2, 3])
shops_df['shop_throughput'] = np.array([300, 150, 100])
shops_df['shop_conversionrate'] = np.array([0.06,0.05,0.2]) #industry standard ~10%
shops_df['max_troughput'] = shops_df['shops']*shops_df['shop_throughput']


In [12]:
def offers():
    #to be read from UI 
    offer_data = [['telcom',50,5],['telcom',100,10], ['telcom',1000, 3],['comline',50,0.01], ['comline',600, 2],['fastsat',200,1], ['fastsat',600, 5]]
    offer_df = pd.DataFrame(data=offer_data, columns = ['company','minutes', 'price'])
    #display(offer_df)
    return offer_df



In [25]:
def customers_from_shops(df,shops_df):
    global offer
    df['satisfaction'] = df['satisfaction'].astype(float)
    df['closed-mcalling-delta'] = df['closed-mcalling-delta'].astype(float)
    df['closed-mcalling-minutes'] = df['closed-mcalling-minutes'].astype(float)
    offer_df = offers()

    for i in players:
        awareness_sample = df.loc[df['sample-awareness'] == i] 
        customers_in_shops = awareness_sample.loc[awareness_sample['channel'] == 'shop']        
        shops = shops_df.loc[shops_df['company'] == i]

        if (len(customers_in_shops) > shops['max_troughput']).bool():
            max_customers_in_shops = customers_in_shops.sample(n=shops['max_troughput'].iloc[0])
            #print('customers_in_shops:',customers_in_shops)
        else:
            max_customers_in_shops = customers_in_shops
         
        #print(i,'customers in shops',len(customers_in_shops))
        conversion_counter = 0

        for index, row in max_customers_in_shops.iterrows():
            
            offer = offer_df.loc[offer_df.company==i].copy()
            offer.loc[:,'delta'] = offer.minutes - row['need-mcalling']
            try:
                f1 = offer.loc[[offer.loc[offer['delta'] > 0, 'delta'].idxmin()]] #leiab ainult positiivsed
            except ValueError:
                pass
                #print(index,'nothing to offer, pass')

            rand_convert = np.random.rand()
            shop_convert_rate = shops.shop_conversionrate.iloc[0]
            if row['need-mcalling'] > 0 and shop_convert_rate > rand_convert:
                #print('Shop conversion:',shop_convert_rate,'Rand:',rand_convert)
                if (row['income'] > f1.price.iloc[0]):
                    df.at[index, 'closed-mcalling-minutes'] = f1.minutes.iloc[0]
                    df.at[index, 'closed-mcalling-price'] = f1.price.iloc[0]
                    df.at[index, 'closed-mcalling-delta'] = f1.delta.iloc[0]
                    df.at[index, 'source'] = df.at[index, 'closed-company']
                    df.at[index, 'closed-company'] = i                   
                    df.at[index, 'satisfaction'] = 1-(f1.delta/f1.minutes) #closer offering the higher satsifaction
                    conversion_counter = conversion_counter+1
                    #print(index, f1.price.iloc[0], f1.minutes.iloc[0],1-(f1.delta.iloc[0]/f1.minutes.iloc[0]))

                else:
                    pass
                    #print(index, 'insufficent income')
                
            else:
                pass
        print(i,'- customers in shops:',len(customers_in_shops),'- max:',len(max_customers_in_shops),'- new customers:',conversion_counter)
                #print('no need')    

customers_from_shops(df,shops_df)

telcom - customers in shops: 248 - max: 248 - new customers: 9
comline - customers in shops: 147 - max: 147 - new customers: 7
fastsat - customers in shops: 517 - max: 300 - new customers: 40


In [14]:
#customers_from_shops(df,shops_df)

In [15]:
#awareness_sample = df.loc[df['sample-awareness'] == 'telco'] 
#customers_in_shops = awareness_sample.loc[awareness_sample['channel'] == 'shop']        


In [16]:
df = create_initial_population(10000,'data/Northern Europe-1970.csv')

df['incomelabel'] = np.where(df['income']>32,'v-high',np.where(df['income']>8,'high',np.where(df['income']>2,'med','low')))
df = df.set_index(['incomelabel','location'])
df['consume-rw']=rw.stack()
df['consume-radio']=radio.stack()
df['consume-tv']=tv.stack()
df = df.reset_index()

In [17]:
from IPython.display import display, HTML
display(HTML(df.to_html()))

Unnamed: 0,incomelabel,location,closed-mcalling-minutes,closed-mcalling-price,closed-mcalling-delta,income,age,gender,bdate,uuid,need-mcalling,died,channel-online,channel-phone,channel-shop,satisfaction,channel,sample-awareness,source,closed-company,consume-rw,consume-radio,consume-tv
0,v-high,city,,,,51.21649,49,male,1921,d1f0574c-a8ec-4e4a-9979-150ce084b31c,252.604551,alive,0,0,1,0,none,none,new,none,0.1,0.5,0.3
1,med,suburbs,,,,7.043637,27,female,1943,06bd0fc1-a3b9-482a-86cc-58df2527bce5,690.007803,alive,0,0,1,0,none,none,new,none,0.2,0.4,0.1
2,low,city,,,,1.194322,10,female,1960,a74809bc-21bb-48f9-8f03-2b2bbdeba902,0.0,alive,0,0,1,0,none,none,new,none,0.1,0.2,0.1
3,low,city,,,,0.152751,17,male,1953,c31b8802-4a23-4627-8614-1c2a2dc9f28f,602.34882,alive,0,0,1,0,none,none,new,none,0.1,0.2,0.1
4,high,city,,,,24.800697,47,male,1923,b451de0c-c2c9-4607-b7b3-44eea907a43e,-143.72392,alive,0,0,1,0,none,none,new,none,0.1,0.5,0.3
5,med,suburbs,,,,6.732332,61,male,1909,8af7467c-de01-4f44-96e4-d080006bbdc6,628.270222,alive,0,0,1,0,none,none,new,none,0.2,0.4,0.1
6,med,city,,,,7.837658,94,female,1876,39c7f74d-dc41-4a7d-b673-be7373e44c50,481.416435,alive,0,0,1,0,none,none,new,none,0.1,0.3,0.2
7,high,city,,,,25.032361,3,male,1967,3daa8555-3c5d-468d-b374-6efdf3ffb88a,0.0,alive,0,0,1,0,none,none,new,none,0.1,0.5,0.3
8,high,city,,,,28.572738,54,female,1916,1a8f1142-e927-450d-8ea8-294b93253fe5,1087.416566,alive,0,0,1,0,none,none,new,none,0.1,0.5,0.3
9,high,suburbs,,,,21.071225,2,female,1968,5bd3ffb5-a2c9-4acb-9e40-92d3fc33c892,0.0,alive,0,0,1,0,none,none,new,none,0.2,0.4,0.1


In [19]:
awareness(df)
customers_from_shops(df,shops_df)

for i in np.arange(1970, 1975):
    
#    for j in players:
#        df.drop(['awareness-{}'.format(j),'awareness-{}-n'.format(j),'awarenessum'], axis=1)
        #df['awareness-{}'.format(j)] = ''
        #df['awareness-{}-n'.format(j)] = ''
        #df['awarenessum']
    
    
    birthdf = newbirths(i,15,df,rw,radio,tv)
    
    life_exp = 0.04*i-78.8
    deaths(life_exp,i,df)  
    
    print('died:', len(df.loc[df['died'] != 'alive']))
    df = df[df['died'] == 'alive']
    
    df['age'] = df['age']+1
    

    df = df.append(birthdf)
    need(df)    

    

    awareness(df)
    customers_from_shops(df,shops_df)

    print('###',i, 'customers:')
    print(df['closed-company'].value_counts())


telcom - customers in shops: 279 - max: 279 - new customers: 7
comline - customers in shops: 167 - max: 167 - new customers: 5
fastsat - customers in shops: 620 - max: 300 - new customers: 39
Births: 150
died: 365
telcom - customers in shops: 239 - max: 239 - new customers: 8
comline - customers in shops: 201 - max: 201 - new customers: 7
fastsat - customers in shops: 635 - max: 300 - new customers: 37
### 1970 customers:
none       9684
fastsat      76
telcom       15
comline      10
Name: closed-company, dtype: int64
Births: 146
died: 341
telcom - customers in shops: 264 - max: 264 - new customers: 9
comline - customers in shops: 184 - max: 184 - new customers: 6
fastsat - customers in shops: 578 - max: 300 - new customers: 33
### 1971 customers:
none       9443
fastsat     106
telcom       26
comline      15
Name: closed-company, dtype: int64
Births: 143
died: 287
telcom - customers in shops: 258 - max: 258 - new customers: 10
comline - customers in shops: 176 - max: 176 - new custo