# Analytics Verification & Data Analysis

This notebook verifies the analytics module of the Gym Membership System and performs exploratory data analysis (EDA) on the `data` folder.

## Objectives
1. Load data from the `data/` directory.
2. Replicate core analytics calculations (Churn, Retention, LTV).
3. visualize the same key metrics as the application dashboard.
4. Verify the integrity of the generated dataset.

In [None]:
import os
import sys
import json
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta

# Add src to path to import actual modules if needed, or we implement logic here
sys.path.append(os.path.abspath('src'))

from data_manager import DataManager
from analytics import Analytics

# Setup Plotting Style
plt.style.use('ggplot')
sns.set_palette("husl")
plt.rcParams['figure.figsize'] = (10, 6)

## 1. Load Data
We use the `DataManager` class to load the JSON files from the `data` directory.

In [None]:
DATA_DIR = "data"
dm = DataManager(data_dir=DATA_DIR)
analytics = Analytics(dm)

print(f"Loaded {len(dm.members_db)} members")
print(f"Loaded {len(dm.membership_history)} membership records")
print(f"Loaded {len(dm.payments_log)} payment records")
print(f"Loaded {len(dm.attendance_log)} attendance logs")

## 2. Retention Metrics Verification
Comparing Churn and Retention rates with expected values.

In [None]:
churn_rate = analytics.calculate_churn_rate()
retention_rate = analytics.calculate_retention_rate()

print(f"Churn Rate (Log): {churn_rate}%")
print(f"Retention Rate: {retention_rate}%")

# Visualizing Trend
trend = analytics.get_retention_trend(6)
df_trend = pd.DataFrame(trend)

plt.figure(figsize=(10, 5))
sns.lineplot(data=df_trend, x='months', y='rates', marker='o', linewidth=2.5)
plt.title('Retention Rate Trend (Last 6 Months)')
plt.ylabel('Retention Rate (%)')
plt.xlabel('Month')
plt.axhline(70, color='red', linestyle='--', label='Target (70%)')
plt.legend()
plt.show()

## 3. Revenue Analysis
Breakdown of revenue by month and prediction for future revenue.

In [None]:
hist_rev = analytics.get_historical_revenue_trend(6)
df_rev = pd.DataFrame(hist_rev)

plt.figure(figsize=(10, 5))
sns.barplot(data=df_rev, x='months', y='revenue', palette='viridis')
plt.title('Historical Revenue')
plt.ylabel('Revenue (PKR)')
plt.show()

In [None]:
# Revenue Forecast
forecast = analytics.predict_revenue(6)
df_forecast = pd.DataFrame(forecast)

plt.figure(figsize=(10, 5))
plt.bar(df_forecast['months'], df_forecast['predicted'], label='Predicted Total', alpha=0.7)
plt.bar(df_forecast['months'], df_forecast['guaranteed'], label='Guaranteed (Recurring)', alpha=0.7)
plt.title('Revenue Forecast (Next 6 Months)')
plt.ylabel('Projected Revenue')
plt.legend()
plt.show()

## 4. Peak Hours Analysis
 Analyzing attendance logs to find busiest times.

In [None]:
from collections import Counter

hours = []
for log in dm.attendance_log:
    if log.get('check_in_time'):
        try:
            dt = datetime.fromisoformat(log['check_in_time'])
            hours.append(dt.hour)
        except ValueError:
            pass

hour_counts = Counter(hours)
x_hours = list(range(6, 23))
y_counts = [hour_counts.get(h, 0) for h in x_hours]

plt.figure(figsize=(12, 5))
sns.lineplot(x=x_hours, y=y_counts, marker='o', color='orange')
plt.fill_between(x_hours, y_counts, color='orange', alpha=0.3)
plt.title('Peak Gym Hours (6 AM - 10 PM)')
plt.xlabel('Hour of Day')
plt.ylabel('Number of Check-ins')
plt.xticks(x_hours)
plt.grid(True)
plt.show()

## 5. Member Status Distribution
Visualizing the breakdown of Active vs inactive vs frozen members.

In [None]:
# Get current status from memberships
status_counts = Counter([m['status'] for m in dm.membership_history])

plt.figure(figsize=(6, 6))
plt.pie(status_counts.values(), labels=status_counts.keys(), autopct='%1.1f%%', startangle=140, colors=['#66b3ff','#99ff99','#ffcc99','#ff9999'])
plt.title('Membership Status Distribution')
plt.show()