In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df = pd.read_csv('../data/calls.csv')

In [None]:
df['year'] = df['date'].str[:4]
df['month'] = df['date'].str[5:7]
df.sample(4)

In [None]:
calls_by_month = df.groupby(['year', 'month'])['calls'].sum().reset_index()
calls_by_month

In [None]:
# Create ordered data
monthly_calls = df.groupby('month')['calls'].sum()

monthly_calls

In [None]:
# Set style and figure size
plt.style.use('seaborn-white')  # Cleaner base style
plt.figure(figsize=(12, 6))


# Create bar plot
bars = plt.bar(monthly_calls.index, monthly_calls.values, color='#2E86C1')

# Customize plot
plt.title('Dicembre è il mese con più chiamate', fontsize=14, pad=15)
plt.xlabel('Mese', fontsize=12)
plt.ylabel('Numero di chiaate', fontsize=12)

# Remove grid and spines
plt.grid(False)
plt.gca().spines['top'].set_visible(False)
plt.gca().spines['right'].set_visible(False)

# Add value labels
for bar in bars:
   height = bar.get_height()
   plt.text(bar.get_x() + bar.get_width()/2., height,
            f'{int(height):,}'.replace(',', '.'),
            ha='center', va='bottom')

# Adjust layout
plt.tight_layout()

plt.show()

In [None]:
#Calculate percentages
calls_by_month['percentage'] = calls_by_month.groupby('year')['calls'].transform(lambda x: (x / x.sum()) * 100)
calls_by_month

In [None]:
# Group by month and sum percentages
monthly_total_percentages = calls_by_month.groupby('month')['percentage'].sum()
monthly_total_percentages

In [None]:
# 1. Average monthly percentages with confidence intervals
monthly_stats = calls_by_month.groupby('month')['percentage'].agg(['mean', 'std']).round(2)
monthly_stats['ci'] = 1.96 * monthly_stats['std'] / np.sqrt(len(calls_by_month['year'].unique()))

# Plot with error bars
plt.figure(figsize=(12, 6))
plt.errorbar(monthly_stats.index, monthly_stats['mean'], 
           yerr=monthly_stats['ci'], fmt='o-', color='#2E86C1')

plt.title('Media mensile delle chiamate (con intervallo di confidenza)', fontsize=14)
plt.xlabel('Mese', fontsize=12)
plt.ylabel('Percentuale media (%)', fontsize=12)
plt.grid(False)
plt.gca().spines['top'].set_visible(False)
plt.gca().spines['right'].set_visible(False)
plt.savefig('../images/media_mensile.png', dpi=300, bbox_inches='tight')

# 2. Heatmap showing yearly patterns
plt.figure(figsize=(15, 8))
# Prima di fare il pivot, verifichiamo se ci sono duplicati
duplicates = calls_by_month.groupby(['year', 'month']).size().reset_index(name='count')
duplicates = duplicates[duplicates['count'] > 1]

print("Righe duplicate:")
print(duplicates)

# Risolviamo aggregando i dati prima del pivot
df_agg = calls_by_month.groupby(['year', 'month'])['percentage'].mean().reset_index()

# Ora creiamo il pivot
pivot_df = df_agg.pivot(index='year', columns='month', values='percentage')

sns.heatmap(pivot_df, cmap='YlOrRd', annot=True, fmt='.1f', 
           center=pivot_df.mean().mean())
           
plt.title('Distribuzione mensile delle chiamate per anno (%)')
plt.savefig('../images/heatmap_mensile.png', dpi=300, bbox_inches='tight')
plt.show()