# Task 3 - Experience Analytics

This notebook analyzes user experience in the telecom dataset focusing on network parameters and device characteristics.

In [1]:
import sys
sys.path.append('..')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from scripts.data_processing_utils import *
from scripts.experience_utils import *

## Task 3.1 - Customer Experience Metrics

In [2]:
# Load and preprocess data
df = load_and_preprocess_data()

# Aggregate customer experience metrics
experience_metrics = aggregate_experience_metrics(df)

print("Experience Metrics Overview:")
print(f"Number of customers: {len(experience_metrics)}")
print("\nSample of experience metrics:")
print(experience_metrics.head())

# Display summary statistics
print("\nSummary Statistics:")
print(experience_metrics.describe())

Experience Metrics Overview:
Number of customers: 106857

Sample of experience metrics:
              tcp_dl_retrans  tcp_ul_retrans      rtt_dl     rtt_ul  \
user_id                                                               
3.360100e+10    2.080991e+07   759658.664811   46.000000   0.000000   
3.360100e+10    2.080991e+07   759658.664811   30.000000   1.000000   
3.360100e+10    2.080991e+07   759658.664811  109.795706  17.662883   
3.360101e+10    1.066000e+03   759658.664811   69.000000  15.000000   
3.360101e+10    1.507977e+07   390430.332406   57.000000   2.500000   

              throughput_dl  throughput_ul                         handset  \
user_id                                                                      
3.360100e+10           37.0           39.0  Huawei P20 Lite Huawei Nova 3E   
3.360100e+10           48.0           51.0          Apple iPhone 7 (A1778)   
3.360100e+10           48.0           49.0                       undefined   
3.360101e+10          20

## Task 3.2 - Network Parameter Analysis

In [3]:
# Analyze TCP retransmission values
tcp_analysis = analyze_network_parameter(df, 'tcp_retransmission')
print("TCP Retransmission Analysis:")
print("Top 10 values:", tcp_analysis['top'])
print("Bottom 10 values:", tcp_analysis['bottom'])
print("Most frequent 10 values:", tcp_analysis['frequent'])

# Analyze RTT values
rtt_analysis = analyze_network_parameter(df, 'rtt')
print("\nRTT Analysis:")
print("Top 10 values:", rtt_analysis['top'])
print("Bottom 10 values:", rtt_analysis['bottom'])
print("Most frequent 10 values:", rtt_analysis['frequent'])

# Analyze Throughput values
throughput_analysis = analyze_network_parameter(df, 'throughput')
print("\nThroughput Analysis:")
print("Top 10 values:", throughput_analysis['top'])
print("Bottom 10 values:", throughput_analysis['bottom'])
print("Most frequent 10 values:", throughput_analysis['frequent'])

KeyError: 'tcp_retransmission'

## Task 3.3 - Handset Analysis

In [4]:
# Analyze throughput distribution per handset
throughput_dist = analyze_throughput_by_handset(df)
plot_throughput_distribution(throughput_dist)

print("\nThroughput Distribution by Handset Type:")
print(throughput_dist.describe())

# Analyze TCP retransmission per handset
tcp_by_handset = analyze_tcp_by_handset(df)
plot_tcp_by_handset(tcp_by_handset)

print("\nTCP Retransmission by Handset Type:")
print(tcp_by_handset.describe())

KeyError: 'Column not found: avg_throughput'

## Task 3.4 - Experience Clustering

In [None]:
# Perform experience clustering
cluster_results = perform_experience_clustering(experience_metrics)

# Display cluster characteristics
print("Cluster Characteristics:")
print(cluster_results['stats'])

# Plot cluster visualizations
plot_experience_clusters(cluster_results['data'])

# Cluster descriptions
print("\nCluster Descriptions:")
for cluster, description in cluster_results['descriptions'].items():
    print(f"\nCluster {cluster}:")
    print(description)