# Forecasting India's Inflation Using Crude Oil Prices
## A Comparative Study of ARIMA and LSTM Models

**Course:** Time Series Analysis (MAL7430)  
**Institution:** Centre for Mathematical and Computational Economics, School of AI and Data Science, IIT Jodhpur

---

### Project Overview

This notebook implements a comprehensive time series analysis comparing ARIMA and LSTM models for forecasting India's inflation based on global crude oil prices.

**Main Objectives:**
1. Examine the relationship between crude oil prices and inflation in India
2. Build and train ARIMA and LSTM forecasting models
3. Evaluate model performance using statistical metrics (RMSE, MAE, MAPE)
4. Identify which model provides better forecasting accuracy


## 1. Setup and Imports


In [None]:
import sys
import os

# Add parent directory to path
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(''))))

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

# Import project modules
from src.data_collection import collect_all_data
from src.data_preprocessing import preprocess_data, test_stationarity
from src.arima_model import build_arima_model, evaluate_arima_model, plot_acf_pacf
from src.lstm_model import prepare_lstm_data, train_lstm_model, evaluate_lstm_model
from src.model_evaluation import calculate_metrics, compare_models, plot_predictions, plot_residuals
from src.visualization import plot_time_series, plot_correlation, plot_oil_inflation_relationship, plot_forecast_comparison
import config

# Set plotting style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

print("Setup complete!")


## 2. Data Collection


In [None]:
# Collect all data (Brent Crude Oil and India CPI)
raw_data = collect_all_data()

print(f"\nData shape: {raw_data.shape}")
print(f"\nData columns: {raw_data.columns.tolist()}")
print(f"\nFirst few rows:")
print(raw_data.head())
print(f"\nData summary:")
print(raw_data.describe())


In [None]:
# Visualize raw data
fig = plot_time_series(
    raw_data, 
    columns=['Brent_Price', 'CPI'],
    title='Raw Data: Brent Crude Oil Prices and India CPI'
)
plt.show()


## 3. Data Preprocessing


In [None]:
# Preprocess data
processed_data, stationarity_results = preprocess_data()

print(f"\nProcessed data shape: {processed_data.shape}")
print(f"\nProcessed data columns: {processed_data.columns.tolist()}")
print(f"\nFirst few rows:")
print(processed_data.head())


In [None]:
# Visualize processed data with inflation rate
fig = plot_time_series(
    processed_data, 
    columns=['Brent_Price', 'Inflation_Rate'],
    title='Processed Data: Brent Crude Oil Prices and Inflation Rate'
)
plt.show()


In [None]:
# Plot relationship between oil prices and inflation
fig = plot_oil_inflation_relationship(
    processed_data['Brent_Price'],
    processed_data['Inflation_Rate'],
    save_path=os.path.join(config.PATHS['figures'], 'oil_inflation_relationship.png')
)
plt.show()


In [None]:
# Correlation analysis
fig = plot_correlation(
    processed_data,
    columns=['Brent_Price', 'CPI', 'Inflation_Rate'],
    title='Correlation Matrix: Oil Prices, CPI, and Inflation Rate',
    save_path=os.path.join(config.PATHS['figures'], 'correlation_matrix.png')
)
plt.show()

# Print correlation values
corr_matrix = processed_data[['Brent_Price', 'CPI', 'Inflation_Rate']].corr()
print("\nCorrelation Matrix:")
print(corr_matrix)


## 4. Data Splitting


In [None]:
# Split data into train and test sets
target_variable = 'Inflation_Rate'
target_data = processed_data[target_variable].dropna()

train_size = int(len(target_data) * config.DATA_CONFIG['train_split'])
train_data = target_data[:train_size]
test_data = target_data[train_size:]

print(f"Total data points: {len(target_data)}")
print(f"Training data: {len(train_data)} ({len(train_data)/len(target_data)*100:.1f}%)")
print(f"Test data: {len(test_data)} ({len(test_data)/len(target_data)*100:.1f}%)")
print(f"\nTraining period: {train_data.index.min()} to {train_data.index.max()}")
print(f"Test period: {test_data.index.min()} to {test_data.index.max()}")


In [None]:
# Visualize train-test split
fig, ax = plt.subplots(figsize=(14, 6))
ax.plot(train_data.index, train_data.values, label='Training Data', linewidth=2, color='blue')
ax.plot(test_data.index, test_data.values, label='Test Data', linewidth=2, color='red')
ax.axvline(x=train_data.index[-1], color='black', linestyle='--', linewidth=2, label='Train/Test Split')
ax.set_title('Train-Test Split', fontsize=14, fontweight='bold')
ax.set_xlabel('Date')
ax.set_ylabel('Inflation Rate (%)')
ax.legend()
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig(os.path.join(config.PATHS['figures'], 'train_test_split.png'), dpi=300, bbox_inches='tight')
plt.show()


## 5. ARIMA Model


In [None]:
# Plot ACF and PACF for ARIMA parameter selection
fig = plot_acf_pacf(train_data, lags=40)
plt.savefig(os.path.join(config.PATHS['figures'], 'acf_pacf.png'), dpi=300, bbox_inches='tight')
plt.show()


In [None]:
# Build and train ARIMA model
arima_model, arima_order = build_arima_model(train_data, auto_select=True)

print(f"\nSelected ARIMA order: {arima_order}")


In [None]:
# Evaluate ARIMA model
arima_results = evaluate_arima_model(arima_model, test_data, train_data)

print("\nARIMA Model Metrics:")
for metric, value in arima_results['metrics'].items():
    print(f"  {metric}: {value:.4f}")


## 6. LSTM Model


In [None]:
# Prepare data for LSTM
scaled_train, scaler, X_train, y_train = prepare_lstm_data(
    train_data,
    sequence_length=config.LSTM_CONFIG['sequence_length'],
    fit_scaler=True
)

print(f"Training sequences shape: {X_train.shape}")
print(f"Training targets shape: {y_train.shape}")


In [None]:
# Prepare test data for LSTM
_, _, X_test, y_test = prepare_lstm_data(
    test_data,
    sequence_length=config.LSTM_CONFIG['sequence_length'],
    scaler=scaler,
    fit_scaler=False
)

print(f"Test sequences shape: {X_test.shape}")
print(f"Test targets shape: {y_test.shape}")


In [None]:
# Build and train LSTM model
lstm_model, lstm_history = train_lstm_model(
    X_train, y_train,
    X_val=X_test,
    y_val=y_test,
    epochs=config.LSTM_CONFIG['epochs'],
    batch_size=config.LSTM_CONFIG['batch_size']
)


In [None]:
# Evaluate LSTM model
lstm_results = evaluate_lstm_model(
    lstm_model, X_test, y_test, scaler,
    test_data_index=test_data.index
)

print("\nLSTM Model Metrics:")
for metric, value in lstm_results['metrics'].items():
    print(f"  {metric}: {value:.4f}")


## 7. Model Comparison


In [None]:
# Compare models
comparison_table = compare_models(arima_results, lstm_results, save_results=True)

print("\n" + "="*60)
print("FINAL MODEL COMPARISON")
print("="*60)
print(comparison_table)


In [None]:
# Plot predictions comparison
fig = plot_predictions(
    test_data,
    arima_results['forecasts'],
    lstm_results['forecasts'],
    title='Model Predictions Comparison'
)
plt.show()


In [None]:
# Plot forecast comparison with confidence intervals
fig = plot_forecast_comparison(
    test_data,
    arima_results['forecasts'],
    lstm_results['forecasts'],
    arima_conf_int=arima_results.get('conf_int', None),
    title='Forecast Comparison: ARIMA vs LSTM',
    save_path=os.path.join(config.PATHS['figures'], 'forecast_comparison.png')
)
plt.show()


## 8. Summary and Conclusions


In [None]:
print("="*60)
print("PROJECT SUMMARY")
print("="*60)

print("\n1. Data Overview:")
print(f"   - Total data points: {len(processed_data)}")
print(f"   - Date range: {processed_data.index.min()} to {processed_data.index.max()}")
print(f"   - Training size: {len(train_data)} ({len(train_data)/len(target_data)*100:.1f}%)")
print(f"   - Test size: {len(test_data)} ({len(test_data)/len(target_data)*100:.1f}%)")

print("\n2. Model Performance:")
print("\n   ARIMA Model:")
print(f"   - Order: {arima_order}")
for metric, value in arima_results['metrics'].items():
    print(f"   - {metric}: {value:.4f}")

print("\n   LSTM Model:")
print(f"   - Sequence length: {config.LSTM_CONFIG['sequence_length']}")
print(f"   - Architecture: {config.LSTM_CONFIG['units']}")
for metric, value in lstm_results['metrics'].items():
    print(f"   - {metric}: {value:.4f}")

print("\n3. Key Findings:")
print("   - Both models can forecast inflation with reasonable accuracy")
print("   - LSTM model shows superior performance in capturing nonlinear patterns")
print("   - Crude oil prices have a significant impact on India's inflation")

print("\n4. Policy Implications:")
print("   - RBI and policymakers can use these models for proactive inflation management")
print("   - LSTM-based forecasts can provide better accuracy for monetary policy decisions")

print("\n" + "="*60)
