# Satisfaction Analysis

Assuming that the satisfaction of a user is dependent on user engagement and experience, you’re expected in this section to analyze customer satisfaction in depth.

In [1]:
#importing libraries
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
from statistics import *
from sklearn import preprocessing
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import Ridge
from sklearn.cluster import KMeans
from sklearn.metrics.pairwise import euclidean_distances

# Based on the engagement analysis + the experience analysis you conducted above ,

Write a python program to assign:

engagement score to each user. Consider the engagement score as the Euclidean distance between the user data point & the less engaged cluster (use the first clustering for this) (Euclidean Distance)
experience score to each user. Consider the experience score as the Euclidean distance between the user data point & the worst experience’s cluster.

# Engagement score

In [4]:
# centroid file from previous outputs
enga_centr = pd.read_csv('../data/centero_engagment.csv')
enga_centr = enga_centr.iloc[:,1:]
enga_centr

Unnamed: 0,Total UL and DL,Dur. (ms),sessions frequency
0,-0.018266,5.431628,0.046955
1,-0.009948,1.332984,-0.210176
2,1.256253,-0.241265,-0.218358
3,-1.266658,-0.253828,-0.220158
4,0.010122,-0.050115,2.807734
5,-0.002853,-0.416422,-0.232556


In [6]:
# Import standardized engagement metric values 
enga_metr = pd.read_csv('../data/std_engagement.csv')
enga_metr.index = enga_metr['MSISDN/Number']
enga_metr = enga_metr.iloc[:,1:4]
enga_metr.head()

Unnamed: 0_level_0,Total UL and DL,Dur. (ms),sessions frequency
MSISDN/Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
33601000000.0,-1.158862,20.571127,-0.495197
33601000000.0,0.921507,15.109232,-0.495197
33601000000.0,-0.575403,15.069424,-0.495197
33601010000.0,2.095055,14.589959,-0.495197
33601010000.0,0.701781,11.820586,0.731244


In [7]:
# calculating the distance between the metric values and the centroid of the the first k-means cluster
engagement_score = euclidean_distances(enga_metr.values.tolist(),[enga_centr.values.tolist()[0]])
engagement_score

array([[15.19208078],
       [ 9.73822996],
       [ 9.66909777],
       ...,
       [ 4.61464489],
       [ 4.66646744],
       [ 4.70325266]])

In [8]:
engagement_score = [val[0] for val in engagement_score.tolist()]
engagement_score[:2]

[15.192080784619563, 9.738229960336461]

# Experience score

In [10]:
# Get centroid value from the previous files
exper_centr = pd.read_csv('../data/centro_experience.csv')
exper_centr = exper_centr.iloc[:,1:]
exper_centr

Unnamed: 0,Average RTT,Average TCP,Average throughput
0,0.750116,-0.50605,-0.744422
1,-1.230701,0.827638,-0.152066
2,0.518475,-0.346097,1.408331


In [12]:
# standardized experience metric values
exper_metr = pd.read_csv('../data/std_experience.csv')
exper_metr.index = exper_metr['MSISDN/Number']
exper_metr = exper_metr.iloc[:106856,1:4]
exper_metr

Unnamed: 0_level_0,Average TCP,Average throughput,Average RTT
MSISDN/Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
3.366117e+10,-1.357519,15.220434,0.510533
3.369817e+10,0.711986,11.928930,-0.596697
3.367251e+10,-1.480467,11.918292,-0.714487
3.365169e+10,-1.396600,10.960414,-0.855836
3.366856e+10,0.692157,10.681835,-0.596697
...,...,...,...
3.361648e+10,0.761222,-0.552987,-0.855836
3.362400e+10,0.761222,-0.552987,-1.303439
3.376193e+10,0.761222,-0.552987,-0.125535
3.369817e+10,0.761222,-0.552987,-1.067859


In [13]:
# Calculating the distance between the metric values and the centroid of the the first k-means cluster
experience_score = euclidean_distances(exper_metr.values.tolist(),[exper_centr.values.tolist()[0]])
experience_score

array([[15.91663653],
       [12.43591568],
       [12.62302166],
       ...,
       [ 0.62076363],
       [ 0.32701291],
       [ 1.99664168]])

In [14]:
experience_score = [val[0] for val in experience_score.tolist()]
experience_score[:2]

[15.916636529999163, 12.4359156801666]

# Consider the average of both engagement & experience scores as the satisfaction score & report the top 10 satisfied customer

In [15]:
#creating a dataframe to contain both engagement and experience score with index being 'MSISDN/Number' to identify the user
df = pd.DataFrame()
df['engagement_score'] = engagement_score
df['experience_score'] = experience_score
df.index = enga_metr.index
#result
df.head()

Unnamed: 0_level_0,engagement_score,experience_score
MSISDN/Number,Unnamed: 1_level_1,Unnamed: 2_level_1
33601000000.0,15.192081,15.916637
33601000000.0,9.73823,12.435916
33601000000.0,9.669098,12.623022
33601010000.0,9.414621,11.666216
33601010000.0,6.465718,11.189011


In [16]:
# Creating a new column for the dataframe representing satisfaction calculated as an average of engagement and experience score
df['satisfaction_score'] = (df['engagement_score'] + df['experience_score'])/2
df.head()

Unnamed: 0_level_0,engagement_score,experience_score,satisfaction_score
MSISDN/Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
33601000000.0,15.192081,15.916637,15.554359
33601000000.0,9.73823,12.435916,11.087073
33601000000.0,9.669098,12.623022,11.14606
33601010000.0,9.414621,11.666216,10.540418
33601010000.0,6.465718,11.189011,8.827364


Task 4.2 - Consider the average of both engagement & experience scores as the satisfaction score & report the top 10 satisfied customer

In [18]:
top10_satisfied_customers = df.sort_values(by='satisfaction_score', ascending=False).iloc[:10,:]
top10_satisfied_customers.to_csv('../data/top10_satisfied_customers.csv')
top10_satisfied_customers

Unnamed: 0_level_0,engagement_score,experience_score,satisfaction_score
MSISDN/Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
33601000000.0,15.192081,15.916637,15.554359
33626320000.0,21.29609,4.031117,12.663603
33614890000.0,20.047971,5.138084,12.593027
33625780000.0,20.110156,3.021788,11.565972
33601000000.0,9.669098,12.623022,11.14606
33601000000.0,9.73823,12.435916,11.087073
33601010000.0,9.414621,11.666216,10.540418
33659730000.0,18.700767,0.817626,9.759196
33603130000.0,14.092319,5.300125,9.696222
33604520000.0,14.127967,4.518197,9.323082


# Build a regression model of your choice to predict the satisfaction score of a customer.

In [19]:
# Form a new Model training Dataframe consisting all of the columns
train_df = enga_metr.copy(deep=True)
train_df['Average TCP'] = exper_metr['Average TCP'].to_list()
train_df['Average RTT'] = exper_metr['Average RTT'].to_list()
train_df['Average throughput'] = exper_metr['Average throughput'].to_list()
# Adding our average satisfaction score (this is going to be our predicted value)
train_df['satisfaction_score'] = df['satisfaction_score'].to_list()
train_df.head()

Unnamed: 0_level_0,Total UL and DL,Dur. (ms),sessions frequency,Average TCP,Average RTT,Average throughput,satisfaction_score
MSISDN/Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
33601000000.0,-1.158862,20.571127,-0.495197,-1.357519,0.510533,15.220434,15.554359
33601000000.0,0.921507,15.109232,-0.495197,0.711986,-0.596697,11.92893,11.087073
33601000000.0,-0.575403,15.069424,-0.495197,-1.480467,-0.714487,11.918292,11.14606
33601010000.0,2.095055,14.589959,-0.495197,-1.3966,-0.855836,10.960414,10.540418
33601010000.0,0.701781,11.820586,0.731244,0.692157,-0.596697,10.681835,8.827364


In [20]:
# Dividing the train data into actual train data (train + validation) and test data
# 80 (60 + 20) and 20
train_data = train_df.iloc[:int(len(train_df) * 0.8),:]
test_data = train_df.iloc[int(len(train_df) * 0.8):,:]

In [22]:
train_x = train_data.iloc[:,:-1]
train_y = train_data.iloc[:,-1:]
# 
test_x = test_data.iloc[:,:-1]
test_y = test_data.iloc[:,-1:]

In [23]:
from sklearn.model_selection import train_test_split
xtrain,xtest,ytrain,ytest = train_test_split(train_x,train_y,test_size=0.2,random_state=40)

In [24]:
# Training the Ridge Regression Model on the splited training data
# Implementing Ridge Regression Model
alpha_val = 0.8
rr = Ridge(alpha=alpha_val)
rr.fit(xtrain, ytrain)
# Validation value of the model
validation_score = rr.score(xtest, ytest)
print("The model score is: {:.5f}".format(validation_score))

The model score is: 0.88214


In [25]:
from joblib import dump
# Saving the final Model using joblibs dump
dump(rr, '../models/satisfaction_model.joblib')

FileNotFoundError: [Errno 2] No such file or directory: '../models/satisfaction_model.joblib'