In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
import os,sys
import re
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import numpy as np
from sklearn.metrics.pairwise import euclidean_distances
os.chdir('..')

In [3]:
np.random.seed(42)
num_users = 1000
num_features = 5

user_data = np.random.rand(num_users, num_features)

In [4]:
num_clusters = 3
engagement_centroids = np.random.rand(num_clusters, num_features)
experience_centroids = np.random.rand(num_clusters, num_features)


In [5]:
engagement_scores = euclidean_distances(user_data, engagement_centroids).min(axis=1)

# Calculate Euclidean distance between each user and the worst experience cluster
experience_scores = euclidean_distances(user_data, experience_centroids).min(axis=1)

In [6]:
satisfaction_scores = (engagement_scores + experience_scores) / 2

# Generate some sample user IDs for demonstration
user_ids = ['user_' + str(i) for i in range(num_users)]

# Combine the scores with user IDs
satisfaction_df = pd.DataFrame({'MSISDN/Number': user_ids, 
                                 'Engagement Score': engagement_scores,
                                 'Experience Score': experience_scores,
                                 'Satisfaction Score': satisfaction_scores})

# Report the top 10 satisfied customers
top_10_satisfied_customers = satisfaction_df.nlargest(10, 'Satisfaction Score')

In [7]:

# Generate some sample feature matrix X and target variable y for demonstration
X = np.random.rand(num_users, num_features)
y = satisfaction_scores  # Using satisfaction scores as the target variable

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and fit the model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Calculate RMSE (Root Mean Squared Error)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))


In [8]:
# Run k-means clustering (k=2) on the combined engagement and experience scores
kmeans = KMeans(n_clusters=2)
cluster_labels = kmeans.fit_predict(np.column_stack((engagement_scores, experience_scores)))


In [9]:
# Add cluster labels to satisfaction_df
satisfaction_df['Cluster'] = cluster_labels

# Aggregate average satisfaction and experience score per cluster
cluster_agg = satisfaction_df.groupby('Cluster').agg({'Satisfaction Score': 'mean', 
                                                       'Experience Score': 'mean'})


In [11]:
from sqlalchemy import create_engine
from src.config import DBConfig
# Connect to MySQL database
db_config = DBConfig.load()

connection_string = f"postgresql+psycopg2://{db_config['DATABASE_USER']}:{db_config['DATABASE_PASSWORD']}@{db_config['DATABASE_HOST']}:{db_config['DATABASE_PORT']}/{db_config['DATABASE_NAME']}"
engine = create_engine(connection_string)

# Export satisfaction_df to MySQL database
satisfaction_df.to_sql(name='user_satisfaction_scores', con=engine, if_exists='replace', index=False)

# Example of select query output
query = "SELECT * FROM user_satisfaction_scores LIMIT 10"
select_output = pd.read_sql(query, con=engine)
print(select_output)


  MSISDN/Number  Engagement Score  Experience Score  Satisfaction Score  \
0        user_0          0.380709          0.990321            0.685515   
1        user_1          0.568693          0.568865            0.568779   
2        user_2          0.323694          1.238949            0.781321   
3        user_3          0.461158          0.669634            0.565396   
4        user_4          0.804472          0.565776            0.685124   
5        user_5          0.815950          0.553378            0.684664   
6        user_6          1.068066          0.634343            0.851205   
7        user_7          0.910076          0.431707            0.670892   
8        user_8          0.613600          0.522600            0.568100   
9        user_9          0.641243          0.524904            0.583074   

   Cluster  
0        1  
1        1  
2        1  
3        1  
4        0  
5        0  
6        0  
7        0  
8        1  
9        0  
