<a href="https://colab.research.google.com/github/shabeer-ctp/ARIMAvsLSTM/blob/main/ARIMAvsLSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install numpy==1.26.4




In [2]:
!pip install pandas==2.2.2 numpy==1.26.4 tensorflow==2.18.0 plotly openpyxl statsmodels scikit-learn matplotlib




In [3]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from statsmodels.tsa.arima.model import ARIMA
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from datetime import timedelta

# --- Load Excel ---
df = pd.read_excel('check2.xlsx')

# --- Fix date format (YYYY-DD-MM) ---
def fix_yyyy_dd_mm(date_str):
    try:
        y, d, m = date_str.split('-')
        return pd.to_datetime(f"{y}-{m}-{d}")
    except:
        return pd.NaT

df['date_fixed'] = df['Monthly Production Date'].astype(str).apply(fix_yyyy_dd_mm)
df_clean = df[['date_fixed', 'Monthly Oil']].rename(columns={'date_fixed': 'date', 'Monthly Oil': 'rate'})
df_clean = df_clean.dropna().sort_values('date').reset_index(drop=True)

# --- Resample monthly ---
df_clean.set_index('date', inplace=True)
monthly_data = df_clean['rate'].resample('MS').mean()
monthly_data = monthly_data.interpolate()

# ========== ARIMA ==========
model_arima = ARIMA(monthly_data, order=(1, 1, 1))
model_arima_fit = model_arima.fit()
forecast_arima = model_arima_fit.forecast(steps=12)

# ========== LSTM ==========
# Prepare data
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(monthly_data.values.reshape(-1, 1))

# Create sequences
def create_sequences(data, window):
    X, y = [], []
    for i in range(len(data) - window):
        X.append(data[i:i + window])
        y.append(data[i + window])
    return np.array(X), np.array(y)

window_size = 12
X, y = create_sequences(scaled_data, window_size)

# Reshape for LSTM
X = X.reshape((X.shape[0], X.shape[1], 1))

# LSTM model
model_lstm = Sequential([
    LSTM(50, activation='relu', input_shape=(window_size, 1)),
    Dense(1)
])
model_lstm.compile(optimizer='adam', loss='mse')
model_lstm.fit(X, y, epochs=100, verbose=0)

# Forecast using LSTM
last_window = scaled_data[-window_size:]
forecast_lstm = []

for _ in range(12):
    input_seq = last_window.reshape((1, window_size, 1))
    pred = model_lstm.predict(input_seq, verbose=0)[0, 0]
    forecast_lstm.append(pred)
    last_window = np.append(last_window[1:], pred)

forecast_lstm = scaler.inverse_transform(np.array(forecast_lstm).reshape(-1, 1)).flatten()

# ========== Forecast Dates ==========
forecast_index = pd.date_range(start=monthly_data.index[-1] + pd.DateOffset(months=1), periods=12, freq='MS')

# ========== Plot ==========
fig = go.Figure()

# Actual
fig.add_trace(go.Scatter(x=monthly_data.index, y=monthly_data.values,
                         mode='lines+markers', name='Actual', line=dict(color='blue')))

# ARIMA Forecast
fig.add_trace(go.Scatter(x=forecast_index, y=forecast_arima.values,
                         mode='lines+markers', name='ARIMA Forecast', line=dict(color='green', dash='dash')))

# LSTM Forecast
fig.add_trace(go.Scatter(x=forecast_index, y=forecast_lstm,
                         mode='lines+markers', name='LSTM Forecast', line=dict(color='orange', dash='dot')))

fig.update_layout(
    title="Oil Production Forecast Comparison (ARIMA vs LSTM)",
    xaxis_title="Date",
    yaxis_title="Production Rate",
    legend=dict(x=1, y=1, xanchor='right', yanchor='top')
)

fig.show()


  super().__init__(**kwargs)
