# Task 4 - Satisfaction Analysis

This notebook analyzes customer satisfaction based on engagement and experience metrics.

In [1]:
import sys
sys.path.append('..')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans

from scripts.data_processing_utils import *
from scripts.engagement_utils import *
from scripts.experience_utils import *
from scripts.satisfaction_utils import *

## Task 4.1 - Calculate Engagement and Experience Scores

In [2]:
# Load and preprocess data
df = load_and_preprocess_data()

# Get engagement metrics
user_metrics = aggregate_user_metrics(df)
engagement_features = ['total_sessions', 'total_duration', 'total_volume']

# Get experience metrics
experience_metrics = aggregate_experience_metrics(df)
experience_features = ['tcp_retransmission', 'avg_rtt', 'avg_throughput']

# Calculate engagement scores
engagement_scores = calculate_engagement_scores(
    user_metrics, engagement_features, kmeans_engagement
)
user_metrics['engagement_score'] = engagement_scores

# Calculate experience scores
experience_scores = calculate_experience_scores(
    experience_metrics, experience_features, kmeans_experience
)
experience_metrics['experience_score'] = experience_scores

print("Sample of engagement scores:")
print(user_metrics[['engagement_score']].head())
print("\nSample of experience scores:")
print(experience_metrics[['experience_score']].head())

NameError: name 'calculate_engagement_scores' is not defined

## Task 4.2 - Calculate Satisfaction Scores

In [3]:
# Combine engagement and experience scores
satisfaction_df = pd.DataFrame({
    'engagement_score': engagement_scores,
    'experience_score': experience_scores
}, index=user_metrics.index)

# Calculate satisfaction scores
satisfaction_df['satisfaction_score'] = calculate_satisfaction_scores(
    satisfaction_df['engagement_score'],
    satisfaction_df['experience_score']
)

# Get top 10 satisfied customers
print("Top 10 Satisfied Customers:")
print(satisfaction_df.nlargest(10, 'satisfaction_score'))

NameError: name 'engagement_scores' is not defined

## Task 4.3 - Build Regression Model

In [None]:
# Prepare features for regression
features = engagement_features + experience_features
X = pd.concat([
    user_metrics[engagement_features],
    experience_metrics[experience_features]
], axis=1)

# Train model
model, train_score, test_score, X_test, y_test = train_satisfaction_model(
    X, satisfaction_df['satisfaction_score']
)

# Make predictions on test set
y_pred = model.predict(X_test)

# Plot model performance
plot_model_performance(y_test, y_pred)

print(f"Training R² score: {train_score:.4f}")
print(f"Testing R² score: {test_score:.4f}")

## Task 4.4 - Satisfaction Clustering

In [None]:
# Perform k-means clustering on engagement and experience scores
X = satisfaction_df[['engagement_score', 'experience_score']]
kmeans = KMeans(n_clusters=2, random_state=42)
satisfaction_df['satisfaction_cluster'] = kmeans.fit_predict(X)

# Plot clusters
plot_satisfaction_clusters(satisfaction_df)

## Task 4.5 - Aggregate Scores by Cluster

In [None]:
# Calculate average scores per cluster
cluster_stats = satisfaction_df.groupby('satisfaction_cluster').agg({
    'engagement_score': 'mean',
    'experience_score': 'mean',
    'satisfaction_score': 'mean'
}).round(2)

print("Average Scores by Cluster:")
print(cluster_stats)

## Task 4.6 - Export to MySQL

In [None]:
# Prepare final table for export
final_table = satisfaction_df.copy()
final_table.index.name = 'user_id'

# MySQL connection parameters
connection_params = {
    'host': 'localhost',
    'user': 'your_username',
    'password': 'your_password',
    'database': 'your_database',
    'port': 3306
}

# Export to MySQL
success = export_to_mysql(final_table, 'user_satisfaction', connection_params)

if success:
    print("Data successfully exported to MySQL")
else:
    print("Failed to export data to MySQL")