In [6]:
!pip install matplotlib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# --- 1. Sample Dataset ---
data = {
    'Date': pd.date_range(start='2025-01-01', periods=14, freq='D').tolist() * 2,
    'User_ID': list(range(1001, 1008)) * 4,
    'Country': ['USA'] * 14 + ['India'] * 14,
    'Page_Views': np.random.randint(5, 20, 28),
    'Time_Spent': np.random.randint(60, 600, 28),  # in seconds
    'Bounce_Rate': np.random.uniform(30, 80, 28)   # in %
}

df = pd.DataFrame(data)

# --- 2. MultiIndexing: Country + Date ---
df.set_index(['Country', 'Date'], inplace=True)
df.sort_index(inplace=True)

print("\nMultiIndexed DataFrame:")
print(df.head())

# --- 3. Reshape using melt and unstack ---
df_reset = df.reset_index()
melted = df_reset.melt(
    id_vars=['Country', 'Date'],
    value_vars=['Page_Views', 'Time_Spent', 'Bounce_Rate'],
    var_name='Metric',
    value_name='Value'
)
unstacked = melted.set_index(['Country', 'Date', 'Metric']).unstack('Metric')

print("\nUnstacked Engagement Metrics:")
print(unstacked.head())

# --- 4. Rolling and Expanding Calculations (Fixed) ---
grouped = df.groupby(level='Country')

# Rolling 7-day average for Time Spent
weekly_avg_time = (
    grouped['Time_Spent']
    .rolling(window=7, min_periods=1)
    .mean()
    .rename('Weekly_Avg_Time')
    .reset_index()
)

# Expanding cumulative Page Views
cumulative_views = (
    grouped['Page_Views']
    .expanding()
    .sum()
    .rename('Cumulative_Page_Views')
    .reset_index()
)

# Merge both
rolling_df = pd.merge(weekly_avg_time, cumulative_views, on=['Country', 'Date'])

print("\nRolling & Expanding Summary:")
print(rolling_df.head())

# --- 5. Loop-based vs Vectorized Bounce Rate Trends (Fixed) ---
# Recreate reset df for bounce trend
df_reset = df.reset_index()

# Loop-based approach
def loop_bounce_trend(group):
    diffs = [np.nan]
    for i in range(1, len(group)):
        diffs.append(group.iloc[i]['Bounce_Rate'] - group.iloc[i - 1]['Bounce_Rate'])
    return pd.Series(diffs, index=group.index)

loop_bounce = df_reset.groupby('Country').apply(loop_bounce_trend)
loop_bounce.index = df_reset.index
df_reset['Bounce_Trend_Loop'] = loop_bounce

# Vectorized approach
df_reset['Bounce_Trend_Vectorized'] = df_reset.groupby('Country')['Bounce_Rate'].diff()

print("\nBounce Rate Trends Comparison:")
print(df_reset[['Country', 'Date', 'Bounce_Rate', 'Bounce_Trend_Loop', 'Bounce_Trend_Vectorized']].head())

# --- 6. Optimized .apply() instead of .iterrows() ---
# Flagging High Engagement
df_reset['Engagement_Flag'] = df_reset[['Page_Views', 'Time_Spent']].apply(
    lambda x: 'High' if x.Page_Views > 10 and x.Time_Spent > 300 else 'Low', axis=1
)

print("\nEngagement Flagging:")
print(df_reset[['Page_Views', 'Time_Spent', 'Engagement_Flag']].head())

# --- 7. Matplotlib Line Plot ---
plt.figure(figsize=(10, 6))
for country in rolling_df['Country'].unique():
    subset = rolling_df[rolling_df['Country'] == country]
    plt.plot(subset['Date'], subset['Weekly_Avg_Time'], label=f'{country} Weekly Avg Time')

plt.title("Weekly Average Time Spent by Country")
plt.xlabel("Date")
plt.ylabel("Avg Time Spent (seconds)")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()



MultiIndexed DataFrame:
                    User_ID  Page_Views  Time_Spent  Bounce_Rate
Country Date                                                    
India   2025-01-01     1001          19         278    58.976360
        2025-01-02     1002          14         393    41.905502
        2025-01-03     1003          16         200    63.147978
        2025-01-04     1004          10          95    74.148105
        2025-01-05     1005           9         263    62.875545

Unstacked Engagement Metrics:
                         Value                      
Metric             Bounce_Rate Page_Views Time_Spent
Country Date                                        
India   2025-01-01   58.976360       19.0      278.0
        2025-01-02   41.905502       14.0      393.0
        2025-01-03   63.147978       16.0      200.0
        2025-01-04   74.148105       10.0       95.0
        2025-01-05   62.875545        9.0      263.0


ValueError: cannot insert Country, already exists