# Financial Analysis

This notebook performs comprehensive financial analysis including:
- Revenue and expense analysis
- Profitability metrics
- Category performance
- Financial ratios
- Store performance comparison

## Setup and Data Loading

In [None]:
import sys
from pathlib import Path

# Add project root to path
project_root = Path().absolute().parent
sys.path.insert(0, str(project_root))

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

# Configure plotting
plt.style.use('default')
sns.set_palette("husl")
%matplotlib inline

# Import project modules
from src.data_processing.database_manager import DatabaseManager
from src.analysis.financial_metrics import FinancialMetricsCalculator
from src.analysis.trend_analysis import TrendAnalyzer

print("Setup complete!")

In [None]:
# Load data
db_manager = DatabaseManager()
metrics_calculator = FinancialMetricsCalculator()
trend_analyzer = TrendAnalyzer()

# Load financial data
query = """
SELECT date, amount, category, subcategory, description, 
       store_location, payment_method
FROM financial_data
ORDER BY date
"""

df = db_manager.query_data(query)
df['date'] = pd.to_datetime(df['date'])

print(f"Loaded {len(df):,} records")
print(f"Date range: {df['date'].min().date()} to {df['date'].max().date()}")
print(f"Amount range: ${df['amount'].min():.2f} to ${df['amount'].max():.2f}")

## Basic Financial Metrics

In [None]:
# Calculate basic financial metrics
basic_metrics = metrics_calculator.calculate_basic_metrics(df)

print("BASIC FINANCIAL METRICS")
print("=" * 40)
print(f"Period: {basic_metrics.period_start.date()} to {basic_metrics.period_end.date()}")
print(f"Total Revenue: ${basic_metrics.total_revenue:,.2f}")
print(f"Total Expenses: ${basic_metrics.total_expenses:,.2f}")
print(f"Net Income: ${basic_metrics.net_income:,.2f}")
print(f"Gross Margin: {basic_metrics.gross_margin:.2f}%")
print(f"Average Transaction: ${basic_metrics.average_transaction:.2f}")
print(f"Transaction Count: {basic_metrics.transaction_count:,}")

# Create summary visualization
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Revenue vs Expenses
categories = ['Revenue', 'Expenses']
values = [basic_metrics.total_revenue, basic_metrics.total_expenses]
colors = ['green', 'red']

axes[0, 0].bar(categories, values, color=colors, alpha=0.7)
axes[0, 0].set_title('Revenue vs Expenses')
axes[0, 0].set_ylabel('Amount ($)')
for i, v in enumerate(values):
    axes[0, 0].text(i, v + max(values) * 0.01, f'${v:,.0f}', ha='center', va='bottom')

# Net Income
net_color = 'green' if basic_metrics.net_income >= 0 else 'red'
axes[0, 1].bar(['Net Income'], [basic_metrics.net_income], color=net_color, alpha=0.7)
axes[0, 1].set_title('Net Income')
axes[0, 1].set_ylabel('Amount ($)')
axes[0, 1].text(0, basic_metrics.net_income + abs(basic_metrics.net_income) * 0.05, 
                f'${basic_metrics.net_income:,.0f}', ha='center', va='bottom')

# Transaction metrics
axes[1, 0].bar(['Avg Transaction'], [basic_metrics.average_transaction], color='blue', alpha=0.7)
axes[1, 0].set_title('Average Transaction Amount')
axes[1, 0].set_ylabel('Amount ($)')
axes[1, 0].text(0, basic_metrics.average_transaction * 1.05, 
                f'${basic_metrics.average_transaction:.2f}', ha='center', va='bottom')

# Transaction count
axes[1, 1].bar(['Total Transactions'], [basic_metrics.transaction_count], color='orange', alpha=0.7)
axes[1, 1].set_title('Total Transaction Count')
axes[1, 1].set_ylabel('Count')
axes[1, 1].text(0, basic_metrics.transaction_count * 1.05, 
                f'{basic_metrics.transaction_count:,}', ha='center', va='bottom')

plt.tight_layout()
plt.show()

## Monthly Performance Analysis

In [None]:
# Calculate monthly metrics
monthly_metrics = metrics_calculator.calculate_monthly_metrics(df)

print("MONTHLY PERFORMANCE")
print("=" * 40)
print(monthly_metrics.head(10))

# Monthly performance visualization
fig, axes = plt.subplots(2, 2, figsize=(15, 12))

# Monthly revenue and expenses
axes[0, 0].plot(monthly_metrics['period'], monthly_metrics['total_revenue'], 
                marker='o', label='Revenue', linewidth=2, color='green')
axes[0, 0].plot(monthly_metrics['period'], monthly_metrics['total_expenses'], 
                marker='s', label='Expenses', linewidth=2, color='red')
axes[0, 0].set_title('Monthly Revenue vs Expenses')
axes[0, 0].set_ylabel('Amount ($)')
axes[0, 0].legend()
axes[0, 0].grid(True, alpha=0.3)
axes[0, 0].tick_params(axis='x', rotation=45)

# Monthly net income
net_income_colors = ['green' if x >= 0 else 'red' for x in monthly_metrics['net_income']]
axes[0, 1].bar(monthly_metrics['period'], monthly_metrics['net_income'], 
               color=net_income_colors, alpha=0.7)
axes[0, 1].set_title('Monthly Net Income')
axes[0, 1].set_ylabel('Net Income ($)')
axes[0, 1].axhline(y=0, color='black', linestyle='-', alpha=0.5)
axes[0, 1].tick_params(axis='x', rotation=45)

# Monthly transaction count
axes[1, 0].plot(monthly_metrics['period'], monthly_metrics['transaction_count'], 
                marker='o', linewidth=2, color='blue')
axes[1, 0].set_title('Monthly Transaction Count')
axes[1, 0].set_ylabel('Number of Transactions')
axes[1, 0].grid(True, alpha=0.3)
axes[1, 0].tick_params(axis='x', rotation=45)

# Monthly average transaction
axes[1, 1].plot(monthly_metrics['period'], monthly_metrics['average_transaction'], 
                marker='o', linewidth=2, color='orange')
axes[1, 1].set_title('Monthly Average Transaction Amount')
axes[1, 1].set_ylabel('Average Amount ($)')
axes[1, 1].grid(True, alpha=0.3)
axes[1, 1].tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

# Monthly summary statistics
print("\nMONTHLY SUMMARY STATISTICS")
print("=" * 40)
print(f"Average Monthly Revenue: ${monthly_metrics['total_revenue'].mean():,.2f}")
print(f"Average Monthly Expenses: ${monthly_metrics['total_expenses'].mean():,.2f}")
print(f"Average Monthly Net Income: ${monthly_metrics['net_income'].mean():,.2f}")
print(f"Best Month (Revenue): {monthly_metrics.loc[monthly_metrics['total_revenue'].idxmax(), 'period']} (${monthly_metrics['total_revenue'].max():,.2f})")
print(f"Worst Month (Revenue): {monthly_metrics.loc[monthly_metrics['total_revenue'].idxmin(), 'period']} (${monthly_metrics['total_revenue'].min():,.2f})")

## Category Performance Analysis

In [None]:
# Calculate category metrics
if 'category' in df.columns and not df['category'].isnull().all():
    category_metrics = metrics_calculator.calculate_category_metrics(df)
    
    print("CATEGORY PERFORMANCE")
    print("=" * 40)
    print(category_metrics.head(10))
    
    # Category visualization
    fig, axes = plt.subplots(2, 2, figsize=(15, 12))
    
    # Top categories by total amount
    top_categories = category_metrics.head(10)
    axes[0, 0].barh(range(len(top_categories)), top_categories['total_amount'])
    axes[0, 0].set_yticks(range(len(top_categories)))
    axes[0, 0].set_yticklabels(top_categories['category'])
    axes[0, 0].set_title('Top 10 Categories by Total Amount')
    axes[0, 0].set_xlabel('Total Amount ($)')
    
    # Categories by transaction count
    count_sorted = category_metrics.sort_values('transaction_count', ascending=False).head(10)
    axes[0, 1].barh(range(len(count_sorted)), count_sorted['transaction_count'], color='orange')
    axes[0, 1].set_yticks(range(len(count_sorted)))
    axes[0, 1].set_yticklabels(count_sorted['category'])
    axes[0, 1].set_title('Top 10 Categories by Transaction Count')
    axes[0, 1].set_xlabel('Number of Transactions')
    
    # Categories by average amount
    avg_sorted = category_metrics.sort_values('average_amount', ascending=False).head(10)
    axes[1, 0].barh(range(len(avg_sorted)), avg_sorted['average_amount'], color='green')
    axes[1, 0].set_yticks(range(len(avg_sorted)))
    axes[1, 0].set_yticklabels(avg_sorted['category'])
    axes[1, 0].set_title('Top 10 Categories by Average Amount')
    axes[1, 0].set_xlabel('Average Amount ($)')
    
    # Pie chart for top 5 categories
    top_5 = category_metrics.head(5)
    axes[1, 1].pie(top_5['total_amount'], labels=top_5['category'], autopct='%1.1f%%')
    axes[1, 1].set_title('Top 5 Categories Distribution')
    
    plt.tight_layout()
    plt.show()
    
    # Category insights
    print("\nCATEGORY INSIGHTS")
    print("=" * 40)
    print(f"Number of Categories: {len(category_metrics)}")
    print(f"Top Category: {category_metrics.iloc[0]['category']} (${category_metrics.iloc[0]['total_amount']:,.2f})")
    print(f"Most Frequent Category: {count_sorted.iloc[0]['category']} ({count_sorted.iloc[0]['transaction_count']:,} transactions)")
    print(f"Highest Average Category: {avg_sorted.iloc[0]['category']} (${avg_sorted.iloc[0]['average_amount']:,.2f})")
    
    # Top 5 categories represent what percentage of total
    top_5_percentage = top_5['total_amount'].sum() / category_metrics['total_amount'].sum() * 100
    print(f"Top 5 Categories represent {top_5_percentage:.1f}% of total amount")
else:
    print("No category data available for analysis")

## Financial Ratios and Growth Analysis

In [None]:
# Calculate financial ratios
ratios = metrics_calculator.calculate_ratios(df)

print("FINANCIAL RATIOS")
print("=" * 40)
for ratio_name, ratio_value in ratios.items():
    if ratio_name.endswith('_ratio'):
        print(f"{ratio_name.replace('_', ' ').title()}: {ratio_value:.2f}")
    else:
        print(f"{ratio_name.replace('_', ' ').title()}: ${ratio_value:.2f}")

# Growth analysis
growth_metrics = metrics_calculator.calculate_growth_metrics(df, period='M')

if not growth_metrics.empty:
    print("\nGROWTH ANALYSIS")
    print("=" * 40)
    print(growth_metrics.tail(10))
    
    # Growth visualization
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    
    # Monthly growth rates
    axes[0, 0].plot(growth_metrics['period'].astype(str), growth_metrics['amount_growth_rate'], 
                    marker='o', linewidth=2)
    axes[0, 0].set_title('Monthly Amount Growth Rate')
    axes[0, 0].set_ylabel('Growth Rate (%)')
    axes[0, 0].axhline(y=0, color='red', linestyle='--', alpha=0.5)
    axes[0, 0].tick_params(axis='x', rotation=45)
    axes[0, 0].grid(True, alpha=0.3)
    
    # Transaction count growth
    axes[0, 1].plot(growth_metrics['period'].astype(str), growth_metrics['count_growth_rate'], 
                    marker='s', linewidth=2, color='orange')
    axes[0, 1].set_title('Monthly Transaction Count Growth Rate')
    axes[0, 1].set_ylabel('Growth Rate (%)')
    axes[0, 1].axhline(y=0, color='red', linestyle='--', alpha=0.5)
    axes[0, 1].tick_params(axis='x', rotation=45)
    axes[0, 1].grid(True, alpha=0.3)
    
    # Cumulative amount
    axes[1, 0].plot(growth_metrics['period'].astype(str), growth_metrics['cumulative_amount'], 
                    marker='o', linewidth=2, color='green')
    axes[1, 0].set_title('Cumulative Amount Over Time')
    axes[1, 0].set_ylabel('Cumulative Amount ($)')
    axes[1, 0].tick_params(axis='x', rotation=45)
    axes[1, 0].grid(True, alpha=0.3)
    
    # Cumulative transaction count
    axes[1, 1].plot(growth_metrics['period'].astype(str), growth_metrics['cumulative_count'], 
                    marker='s', linewidth=2, color='blue')
    axes[1, 1].set_title('Cumulative Transaction Count Over Time')
    axes[1, 1].set_ylabel('Cumulative Count')
    axes[1, 1].tick_params(axis='x', rotation=45)
    axes[1, 1].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    # Growth summary
    print("\nGROWTH SUMMARY")
    print("=" * 40)
    avg_amount_growth = growth_metrics['amount_growth_rate'].mean()
    avg_count_growth = growth_metrics['count_growth_rate'].mean()
    print(f"Average Monthly Amount Growth: {avg_amount_growth:.2f}%")
    print(f"Average Monthly Count Growth: {avg_count_growth:.2f}%")
    
    # Best and worst months
    best_month = growth_metrics.loc[growth_metrics['amount_growth_rate'].idxmax()]
    worst_month = growth_metrics.loc[growth_metrics['amount_growth_rate'].idxmin()]
    print(f"Best Growth Month: {best_month['period']} ({best_month['amount_growth_rate']:.2f}%)")
    print(f"Worst Growth Month: {worst_month['period']} ({worst_month['amount_growth_rate']:.2f}%)")

## Store Performance Analysis

In [None]:
# Store analysis (if store location data is available)
if 'store_location' in df.columns and not df['store_location'].isnull().all():
    store_data = df.dropna(subset=['store_location'])
    
    store_metrics = store_data.groupby('store_location').agg({
        'amount': ['sum', 'count', 'mean', 'std'],
        'date': ['min', 'max']
    }).round(2)
    
    # Flatten column names
    store_metrics.columns = ['Total_Amount', 'Transaction_Count', 'Avg_Amount', 
                           'Std_Amount', 'First_Sale', 'Last_Sale']
    store_metrics = store_metrics.sort_values('Total_Amount', ascending=False)
    
    print("STORE PERFORMANCE")
    print("=" * 40)
    print(store_metrics.head(10))
    
    # Store performance visualization
    fig, axes = plt.subplots(2, 2, figsize=(15, 12))
    
    # Top stores by revenue
    top_stores = store_metrics.head(10)
    axes[0, 0].barh(range(len(top_stores)), top_stores['Total_Amount'])
    axes[0, 0].set_yticks(range(len(top_stores)))
    axes[0, 0].set_yticklabels(top_stores.index)
    axes[0, 0].set_title('Top 10 Stores by Revenue')
    axes[0, 0].set_xlabel('Total Revenue ($)')
    
    # Store transaction counts
    count_sorted = store_metrics.sort_values('Transaction_Count', ascending=False).head(10)
    axes[0, 1].barh(range(len(count_sorted)), count_sorted['Transaction_Count'], color='orange')
    axes[0, 1].set_yticks(range(len(count_sorted)))
    axes[0, 1].set_yticklabels(count_sorted.index)
    axes[0, 1].set_title('Top 10 Stores by Transaction Count')
    axes[0, 1].set_xlabel('Number of Transactions')
    
    # Average transaction by store
    avg_sorted = store_metrics.sort_values('Avg_Amount', ascending=False).head(10)
    axes[1, 0].barh(range(len(avg_sorted)), avg_sorted['Avg_Amount'], color='green')
    axes[1, 0].set_yticks(range(len(avg_sorted)))
    axes[1, 0].set_yticklabels(avg_sorted.index)
    axes[1, 0].set_title('Top 10 Stores by Average Transaction')
    axes[1, 0].set_xlabel('Average Transaction ($)')
    
    # Store revenue distribution pie chart
    top_5_stores = store_metrics.head(5)
    axes[1, 1].pie(top_5_stores['Total_Amount'], labels=top_5_stores.index, autopct='%1.1f%%')
    axes[1, 1].set_title('Top 5 Stores Revenue Distribution')
    
    plt.tight_layout()
    plt.show()
    
    # Store insights
    print("\nSTORE INSIGHTS")
    print("=" * 40)
    print(f"Number of Stores: {len(store_metrics)}")
    print(f"Top Performing Store: {store_metrics.index[0]} (${store_metrics.iloc[0]['Total_Amount']:,.2f})")
    print(f"Average Revenue per Store: ${store_metrics['Total_Amount'].mean():,.2f}")
    print(f"Most Active Store: {count_sorted.index[0]} ({count_sorted.iloc[0]['Transaction_Count']:,} transactions)")
    print(f"Highest Average Transaction Store: {avg_sorted.index[0]} (${avg_sorted.iloc[0]['Avg_Amount']:,.2f})")
    
    # Store performance spread
    revenue_cv = store_metrics['Total_Amount'].std() / store_metrics['Total_Amount'].mean()
    print(f"Store Revenue Variability (CV): {revenue_cv:.2f}")
else:
    print("No store location data available for analysis")

## Seasonal Analysis

In [None]:
# Seasonal analysis
seasonal_metrics = metrics_calculator.calculate_seasonal_metrics(df)

print("SEASONAL ANALYSIS")
print("=" * 40)
print(seasonal_metrics.head(20))

# Create seasonal visualizations
fig, axes = plt.subplots(2, 2, figsize=(15, 12))

# Monthly seasonality
monthly_data = seasonal_metrics[seasonal_metrics['season_type'] == 'monthly']
if not monthly_data.empty:
    axes[0, 0].bar(monthly_data['season_value'], monthly_data['sum'])
    axes[0, 0].set_title('Revenue by Month')
    axes[0, 0].set_xlabel('Month')
    axes[0, 0].set_ylabel('Total Amount ($)')
    axes[0, 0].set_xticks(range(1, 13))

# Quarterly seasonality
quarterly_data = seasonal_metrics[seasonal_metrics['season_type'] == 'quarterly']
if not quarterly_data.empty:
    axes[0, 1].bar(quarterly_data['season_value'], quarterly_data['sum'], color='orange')
    axes[0, 1].set_title('Revenue by Quarter')
    axes[0, 1].set_xlabel('Quarter')
    axes[0, 1].set_ylabel('Total Amount ($)')

# Day of week seasonality
dow_data = seasonal_metrics[seasonal_metrics['season_type'] == 'day_of_week']
if not dow_data.empty:
    axes[1, 0].bar(range(len(dow_data)), dow_data['sum'], color='green')
    axes[1, 0].set_title('Revenue by Day of Week')
    axes[1, 0].set_xlabel('Day of Week')
    axes[1, 0].set_ylabel('Total Amount ($)')
    axes[1, 0].set_xticks(range(len(dow_data)))
    axes[1, 0].set_xticklabels(dow_data['season_value'], rotation=45)

# Monthly transaction count
if not monthly_data.empty:
    axes[1, 1].bar(monthly_data['season_value'], monthly_data['count'], color='red')
    axes[1, 1].set_title('Transaction Count by Month')
    axes[1, 1].set_xlabel('Month')
    axes[1, 1].set_ylabel('Number of Transactions')
    axes[1, 1].set_xticks(range(1, 13))

plt.tight_layout()
plt.show()

# Seasonal insights
if not monthly_data.empty:
    best_month = monthly_data.loc[monthly_data['sum'].idxmax()]
    worst_month = monthly_data.loc[monthly_data['sum'].idxmin()]
    
    print("\nSEASONAL INSIGHTS")
    print("=" * 40)
    print(f"Best Month: {best_month['season_value']} (${best_month['sum']:,.2f})")
    print(f"Worst Month: {worst_month['season_value']} (${worst_month['sum']:,.2f})")
    print(f"Seasonal Variation: {((best_month['sum'] - worst_month['sum']) / worst_month['sum'] * 100):.1f}%")

if not quarterly_data.empty:
    best_quarter = quarterly_data.loc[quarterly_data['sum'].idxmax()]
    print(f"Best Quarter: Q{best_quarter['season_value']} (${best_quarter['sum']:,.2f})")

if not dow_data.empty:
    best_day = dow_data.loc[dow_data['sum'].idxmax()]
    worst_day = dow_data.loc[dow_data['sum'].idxmin()]
    print(f"Best Day of Week: {best_day['season_value']} (${best_day['sum']:,.2f})")
    print(f"Worst Day of Week: {worst_day['season_value']} (${worst_day['sum']:,.2f})")

## Executive Summary

In [None]:
print("EXECUTIVE FINANCIAL SUMMARY")
print("=" * 50)

print(f"\n📊 OVERALL PERFORMANCE")
print(f"   Total Revenue: ${basic_metrics.total_revenue:,.2f}")
print(f"   Total Expenses: ${basic_metrics.total_expenses:,.2f}")
print(f"   Net Income: ${basic_metrics.net_income:,.2f}")
print(f"   Profit Margin: {basic_metrics.gross_margin:.2f}%")
print(f"   Total Transactions: {basic_metrics.transaction_count:,}")

print(f"\n📈 GROWTH & TRENDS")
if not growth_metrics.empty:
    print(f"   Average Monthly Growth: {growth_metrics['amount_growth_rate'].mean():.2f}%")
    print(f"   Best Growth Month: {growth_metrics.loc[growth_metrics['amount_growth_rate'].idxmax(), 'period']}")
    print(f"   Transaction Growth: {growth_metrics['count_growth_rate'].mean():.2f}%")

print(f"\n🏪 OPERATIONAL METRICS")
print(f"   Average Transaction: ${basic_metrics.average_transaction:.2f}")
print(f"   Daily Average Revenue: ${basic_metrics.total_revenue / (basic_metrics.period_end - basic_metrics.period_start).days:.2f}")
if 'store_location' in df.columns and not df['store_location'].isnull().all():
    print(f"   Number of Stores: {df['store_location'].nunique()}")
    print(f"   Revenue per Store: ${store_metrics['Total_Amount'].mean():,.2f}")

print(f"\n📋 CATEGORY INSIGHTS")
if 'category' in df.columns and not df['category'].isnull().all():
    print(f"   Number of Categories: {df['category'].nunique()}")
    print(f"   Top Category: {category_metrics.iloc[0]['category']} (${category_metrics.iloc[0]['total_amount']:,.2f})")
    print(f"   Category Concentration: Top 5 represent {(category_metrics.head(5)['total_amount'].sum() / category_metrics['total_amount'].sum() * 100):.1f}% of revenue")

print(f"\n🌟 KEY RECOMMENDATIONS")
print(f"   1. {'Focus on' if basic_metrics.net_income > 0 else 'Improve'} profitability - Current margin: {basic_metrics.gross_margin:.1f}%")

if not growth_metrics.empty:
    if growth_metrics['amount_growth_rate'].mean() > 0:
        print(f"   2. Maintain growth momentum - Average growth: {growth_metrics['amount_growth_rate'].mean():.1f}%")
    else:
        print(f"   2. Address declining revenue trend - Current growth: {growth_metrics['amount_growth_rate'].mean():.1f}%")

if 'category' in df.columns and not df['category'].isnull().all():
    print(f"   3. Leverage top category '{category_metrics.iloc[0]['category']}' for expansion")

if not seasonal_metrics.empty and not monthly_data.empty:
    best_month = monthly_data.loc[monthly_data['sum'].idxmax()]
    print(f"   4. Optimize for seasonal patterns - Peak month: {best_month['season_value']}")

print(f"   5. Monitor transaction frequency and average amount trends")

print(f"\n" + "=" * 50)