In [None]:
# Import required libraries
import sys
import matplotlib.pyplot as plt
import numpy as np
from collections import Counter

# Add src directory to path to import our modules
sys.path.append('../src')
from data_processor import DataProcessor

# Setup plotting style
plt.rcParams['figure.figsize'] = (12, 8)
plt.rcParams['figure.dpi'] = 300


In [None]:
# Load and process the data
data_processor = DataProcessor("../data/data.xlsx")
data_processor.load_data()
sessions = data_processor.sessions

print(f"Data loaded: {len(sessions)} sessions")


In [None]:
# Plot 1: Cluster Histogram
switch_points = [s.switch_point for s in sessions if s.switch_point is not None]

plt.figure(figsize=(12, 8))
plt.hist(switch_points, bins=20, alpha=0.7, color='skyblue', edgecolor='navy')
plt.title('Distribution of Switching Points', fontsize=16, fontweight='bold')
plt.xlabel('Switching Point', fontsize=14)
plt.ylabel('Number of Sessions', fontsize=14)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig("cluster_histogram.png", dpi=300, bbox_inches='tight')
plt.show()


In [None]:
# Plot 2: Weight Count Histogram
SWITCH_TOKEN = -1
TERMINATION_TOKEN = 300

# Extract all weights
all_weights = []
for session in sessions:
    for weight in session.weight_sequence:
        if weight != SWITCH_TOKEN and weight != TERMINATION_TOKEN:
            all_weights.append(weight)

# Count occurrences
weight_counter = Counter(all_weights)
weights = sorted(weight_counter.keys())
counts = [weight_counter[w] for w in weights]

plt.figure(figsize=(15, 8))
plt.bar(weights, counts, alpha=0.7, color='lightcoral', edgecolor='darkred')
plt.title('Distribution of Weight Values', fontsize=16, fontweight='bold')
plt.xlabel('Weight Value', fontsize=14)
plt.ylabel('Frequency', fontsize=14)
plt.grid(True, alpha=0.3, axis='y')
plt.tight_layout()
plt.savefig("weight_count_histogram.png", dpi=300, bbox_inches='tight')
plt.show()
