In [2]:
import numpy as np
import pandas as pd
from sklearn.cluster import KMeans
from kneed import KneeLocator
from sklearn.metrics.pairwise import euclidean_distances
from sklearn.preprocessing import StandardScaler, normalize
from sklearn.linear_model import LinearRegression
from sklearn import model_selection
import matplotlib.pyplot as plt
# import psycopg2
import seaborn as sns
import pickle
import os
import sys

In [3]:
sys.path.append(os.path.abspath(os.path.join('../scripts')))
from preprocessing import PreProcess
from overview import Overview
from plot import Plot
from loggers import Logger

In [4]:
loggers = Logger("userSatisfaction.log").get_app_logger()
loggers.info("Initialized user satisfaction logger")

In [5]:
df_org = pd.read_csv('../data/cleaned_data_outliers.csv')
df = df_org.copy()

In [6]:
# Import user experience data
user_exp = pd.read_csv('../data/user_exp.csv')

# Import user engagement data
user_eng = pd.read_csv('../data/user_eng.csv')

In [7]:
user_exp.head()

Unnamed: 0,MSISDN/Number,cluster,avg_rtt_total,avg_tp_total,total_avg_tcp_total
0,24.237822,0,8.11439,7.27448,23.201031
1,24.237822,0,3.401197,7.803027,23.201031
2,24.237822,0,5.4161,7.763021,23.201031
3,24.237822,1,6.942157,9.10231,16.921539
4,24.237822,1,8.11439,27.135851,49.213729


In [8]:
user_eng.head()

Unnamed: 0,MSISDN/Number,cluster,sessions,Dur. (ms),total_data
0,24.237822,0,1,11.667533,6.778433
1,24.237822,1,1,12.107522,5.055351
2,24.237822,2,1,11.8128,6.390184
3,24.237822,2,1,10.817335,6.045765
4,24.237822,0,2,19.656666,13.143197


In [9]:
with open("../models/user_eng.pkl", "rb") as f:
    kmeans1 = pickle.load(f)
    
with open("../models/user_exp.pkl", "rb") as f:
    kmeans2 = pickle.load(f)

In [10]:
user_eng_df = user_eng.set_index('MSISDN/Number')[
    ['sessions','Dur. (ms)', 'total_data']]
scaler = StandardScaler()
scaled_array = scaler.fit_transform(user_eng_df)
pd.DataFrame(scaled_array).head(5)

Unnamed: 0,0,1,2
0,-0.12024,-0.116165,-0.083693
1,-0.12024,-0.104045,-0.16851
2,-0.12024,-0.112164,-0.102804
3,-0.12024,-0.139586,-0.119758
4,0.177556,0.103914,0.229608


In [11]:
data_normalized = normalize(scaled_array)
pd.DataFrame(data_normalized).head(5)

Unnamed: 0,0,1,2
0,-0.643111,-0.621314,-0.447635
1,-0.518978,-0.449075,-0.727319
2,-0.620034,-0.578384,-0.530122
3,-0.547204,-0.635243,-0.545008
4,0.575933,0.337065,0.744774


In [12]:
distance = kmeans1.fit_transform(data_normalized)
distance_from_less_engagement = list(
    map(lambda x: x[3], distance))
user_eng['engagement_score'] = distance_from_less_engagement
user_eng.head(5)

Unnamed: 0,MSISDN/Number,cluster,sessions,Dur. (ms),total_data,engagement_score
0,24.237822,0,1,11.667533,6.778433,0.14854
1,24.237822,1,1,12.107522,5.055351,0.313709
2,24.237822,2,1,11.8128,6.390184,0.140515
3,24.237822,2,1,10.817335,6.045765,0.058808
4,24.237822,0,2,19.656666,13.143197,1.955241


In [24]:
len(user_expr)

106855

In [14]:
user_expr = user_exp.copy()
user_expr.drop([106855], axis=0, inplace=True)

In [41]:
distance = kmeans2.fit_transform(data_normalized)
distance_from_worest_experiance = list(
    map(lambda x: x[0], distance))
user_expr['experience_score'] = distance_from_worest_experiance
user_expr.head(5)

ValueError: Length of values (106856) does not match length of index (106855)

In [40]:
user_eng_id = user_eng['MSISDN/Number'].values
user_expr_id = user_expr['MSISDN/Number'].values
user_intersection = list(
    set(user_eng_id).intersection(user_expr_id))
user_intersection[:5]

[24.242603733828183,
 24.238122894935568,
 24.23986256243128,
 24.23819064464601,
 24.237825910629]

In [34]:
user_exper_df = user_expr[user_expr['MSISDN/Number'].isin(
    user_intersection)]

In [35]:


user_eng_df.shape



(106856, 3)

In [36]:
user_exper_df = user_expr[user_expr['MSISDN/Number'].isin(
    user_intersection)]

In [37]:
user_exper_df.shape

(106855, 5)

In [38]:
user_df = pd.merge(user_eng_df, user_exper_df, on='MSISDN/Number')
user_df['satisfaction_score'] = (
    user_df['engagement_score'] + user_df['experience_score'])/2
user_df.head(5)

KeyError: 'engagement_score'

In [27]:
satisfaction_df = user_df[['MSISDN/Number', 'engagement_score',
                        'experience_score', 'satisfaction_score']]
satisfaction_df = satisfaction_df.set_index('msisdn/number')

KeyError: "['engagement_score', 'experience_score', 'satisfaction_score'] not in index"