# Financial Forecasting Model - Overview

This notebook demonstrates a regression-based forecasting approach for sales and expenses, enabling proactive financial planning.

## Project Goal

Develop a forecasting model that:
- Analyzes historical sales and expense patterns
- Generates accurate forecasts for future periods
- Enables scenario-based planning (baseline, optimistic, pessimistic)
- Improves forecast accuracy to support better budgeting decisions


## 1. Data Loading


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_percentage_error, mean_squared_error
from datetime import datetime

# Set style for better-looking plots
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (14, 6)

# Load the financial data
df = pd.read_csv('../data/sample_financial_data.csv')

# Convert date to datetime
df['date'] = pd.to_datetime(df['date'])

print(f"Data loaded: {len(df)} rows, {len(df.columns)} columns")
print(f"Date range: {df['date'].min()} to {df['date'].max()}")
print(f"\nCategories: {df['category'].unique()}")
df.head()


## 2. Data Overview & Exploration


In [None]:
# Focus on Sales data for forecasting
sales_df = df[df['category'] == 'Sales'].copy()
sales_df = sales_df.sort_values('date')

# Create time index
sales_df['time_index'] = range(1, len(sales_df) + 1)
sales_df['month'] = sales_df['date'].dt.month
sales_df['quarter'] = sales_df['date'].dt.quarter

print("Sales Data Overview:")
print(sales_df[['date', 'actual_sales', 'forecast_sales', 'actual_expenses']].head(10))
print("\nSummary Statistics:")
print(sales_df[['actual_sales', 'forecast_sales', 'actual_expenses']].describe())


## 3. Historical Trend Visualization


In [None]:
# Plot historical sales trends
plt.figure(figsize=(14, 6))
plt.plot(sales_df['date'], sales_df['actual_sales'], marker='o', label='Actual Sales', linewidth=2, markersize=6)
plt.plot(sales_df['date'], sales_df['forecast_sales'], marker='s', label='Previous Forecast', linewidth=2, linestyle='--', markersize=4, alpha=0.7)
plt.xlabel('Date')
plt.ylabel('Sales ($)')
plt.title('Historical Sales: Actual vs. Previous Forecasts')
plt.legend()
plt.xticks(rotation=45)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

# Calculate forecast accuracy for historical data
historical_mape = mean_absolute_percentage_error(sales_df['actual_sales'], sales_df['forecast_sales']) * 100
print(f"\nHistorical Forecast MAPE: {historical_mape:.2f}%")


## 4. Regression-Based Forecasting Model


In [None]:
# Prepare features for regression
# Use baseline data for training
train_df = sales_df[sales_df['scenario'] == 'Baseline'].copy()

# Features: time index, month (for seasonality), quarter
X_train = train_df[['time_index', 'month', 'quarter']].values
y_train = train_df['actual_sales'].values

# Train linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions on training data
train_predictions = model.predict(X_train)

# Calculate training accuracy
train_mape = mean_absolute_percentage_error(y_train, train_predictions) * 100
train_rmse = np.sqrt(mean_squared_error(y_train, train_predictions))

print(f"Model Training Complete")
print(f"Training MAPE: {train_mape:.2f}%")
print(f"Training RMSE: ${train_rmse:,.2f}")
print(f"Model R² Score: {model.score(X_train, y_train):.4f}")


## 5. Model Visualization


In [None]:
# Plot actual vs. predicted on training data
plt.figure(figsize=(14, 6))
plt.plot(train_df['date'], train_df['actual_sales'], marker='o', label='Actual Sales', linewidth=2)
plt.plot(train_df['date'], train_predictions, marker='x', label='Model Predictions', linewidth=2, linestyle='--', alpha=0.8)
plt.xlabel('Date')
plt.ylabel('Sales ($)')
plt.title('Regression Model: Actual vs. Predicted (Training Data)')
plt.legend()
plt.xticks(rotation=45)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()


## 6. Future Forecasts


In [None]:
# Generate forecasts for next 6 months
last_date = train_df['date'].max()
last_time_index = train_df['time_index'].max()

# Create future dates
future_months = pd.date_range(start=last_date + pd.DateOffset(months=1), periods=6, freq='MS')
future_df = pd.DataFrame({
    'date': future_months,
    'time_index': range(last_time_index + 1, last_time_index + 7),
    'month': future_months.month,
    'quarter': future_months.quarter
})

# Generate baseline forecasts
X_future = future_df[['time_index', 'month', 'quarter']].values
future_df['baseline_forecast'] = model.predict(X_future)

# Generate optimistic scenario (+5%)
future_df['optimistic_forecast'] = future_df['baseline_forecast'] * 1.05

# Generate pessimistic scenario (-5%)
future_df['pessimistic_forecast'] = future_df['baseline_forecast'] * 0.95

print("=== 6-MONTH FORECAST ===")
print(future_df[['date', 'baseline_forecast', 'optimistic_forecast', 'pessimistic_forecast']])


## 7. Scenario Comparison Visualization


In [None]:
# Combine historical and forecast data for visualization
plt.figure(figsize=(16, 6))

# Historical actual
plt.plot(train_df['date'], train_df['actual_sales'], marker='o', label='Historical Actual', linewidth=2, color='blue')

# Future forecasts
plt.plot(future_df['date'], future_df['baseline_forecast'], marker='s', label='Baseline Forecast', linewidth=2, linestyle='-', color='green')
plt.plot(future_df['date'], future_df['optimistic_forecast'], marker='^', label='Optimistic (+5%)', linewidth=2, linestyle='--', color='orange', alpha=0.8)
plt.plot(future_df['date'], future_df['pessimistic_forecast'], marker='v', label='Pessimistic (-5%)', linewidth=2, linestyle='--', color='red', alpha=0.8)

# Add vertical line to separate historical and forecast
plt.axvline(x=last_date, color='gray', linestyle=':', linewidth=2, alpha=0.5, label='Forecast Start')

plt.xlabel('Date')
plt.ylabel('Sales ($)')
plt.title('Financial Forecasting: Historical vs. Future Scenarios')
plt.legend()
plt.xticks(rotation=45)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()


## 8. Forecast Accuracy Improvement


In [None]:
# Compare old forecast method vs. new model
print("=== FORECAST ACCURACY COMPARISON ===")
print(f"\n1. Previous Forecast Method (Historical):")
print(f"   MAPE: {historical_mape:.2f}%")

print(f"\n2. New Regression Model (Training):")
print(f"   MAPE: {train_mape:.2f}%")

accuracy_improvement = historical_mape - train_mape
improvement_pct = (accuracy_improvement / historical_mape) * 100

print(f"\n3. Improvement:")
print(f"   Absolute MAPE Reduction: {accuracy_improvement:.2f} percentage points")
print(f"   Relative Improvement: {improvement_pct:.1f}%")

print("\n=== KEY INSIGHTS ===")
print("• Regression model captures trend and seasonality patterns")
print("• Scenario analysis enables risk-aware planning")
print("• Proactive visibility supports better budgeting decisions")
