<a href="https://colab.research.google.com/github/ravinnd3/TimeSeries_TemperatureForecasting/blob/main/Forecast_TimeSeries_Temperature_Melbourne.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# !pip install statsmodels tensorflow

In [None]:
# !pip install prophet neuralprophet --quiet

In [None]:
import os
import kagglehub
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")

from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.ar_model import AutoReg
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from sklearn.metrics import mean_squared_error, mean_absolute_error
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import r2_score
from sklearn.preprocessing import MinMaxScaler


from keras import backend as k #to make the keras job faster
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout,GRU, SimpleRNN, Bidirectional,LeakyReLU,ReLU,Conv1D,MaxPooling1D,Flatten
from keras.callbacks import EarlyStopping
from keras.optimizers import Adam

# from prophet import Prophet
# from neuralprophet import NeuralProphet


from joblib import Parallel, delayed #for faster and parallel computation


In [None]:
import importlib

libraries = [
    "os", "kagglehub", "pandas", "numpy", "matplotlib", "seaborn", "warnings",
    "statsmodels", "sklearn", "keras", "prophet", "neuralprophet", "joblib"
]

for lib in libraries:
    try:
        module = importlib.import_module(lib)
        version = getattr(module, "__version__", "Version info not available")
        print(f"{lib}: {version}")
    except ImportError:
        print(f"{lib}: Not installed")


In [None]:


# Download latest version (will be downloaded to /kaggle/input/...)
dataset_path = kagglehub.dataset_download("paulbrabban/daily-minimum-temperatures-in-melbourne")

# Define the desired destination path
destination_path = "/content/"

# Create the destination directory if it doesn't exist
os.makedirs(destination_path, exist_ok=True)

# Copy the contents of the downloaded dataset to the destination path
# We use shell command for simplicity, you could also use shutil.copytree
!cp -r "{dataset_path}/." "{destination_path}/"

print(f"Dataset copied to: {destination_path}")

# List files in the destination directory to confirm
!ls "{destination_path}"

In [None]:
## Checking the bad records, when reading by pandas thorwing error for bad records at row no 3653

with open("/content/daily-minimum-temperatures-in-me.csv") as f:
    for i, line in enumerate(f, start=1):
        if i == 3653:
            print(line)
            break




In [None]:
data = pd.read_csv("/content/daily-minimum-temperatures-in-me.csv", parse_dates=['Date'], index_col='Date', delimiter=',', skipfooter=1,on_bad_lines="skip")
data.head(5)

In [None]:
#checking for null values
data.isnull().sum()

In [None]:
data.info()

In [None]:
# Change the datatype from object to number
data['Daily minimum temperatures in Melbourne, Australia, 1981-1990'] = pd.to_numeric(data['Daily minimum temperatures in Melbourne, Australia, 1981-1990'], errors='coerce')

data.info()

In [None]:
data.index

In [None]:
data.describe()

Till above reading file using pandas and skipping bad records.

the index is set as date and temperature for Melbourne, Australia, 1981-1990 is column.


In [None]:
data.rename(columns={"Daily minimum temperatures in Melbourne, Australia, 1981-1990":"Temperature"}, inplace=True)
data.head(5)

In [None]:
print(data.index.min(), data.index.max())

In [None]:
print(pd.infer_freq(data.index))

# This will return 'D', 'M', 'MS', etc. if the data is regular. If it returns None, the index is irregular.

**Identify Missing Dates**

In [None]:
date_range = pd.date_range(start=data.index.min(), end=data.index.max(), freq='D')
missing = date_range.difference(data.index)
print("Missing Dates:", missing)

Missing dates are '1984-12-31', '1988-12-31'

In [None]:
# Filling the missing records, here first creating the index for missing dates
data = data.reindex(date_range)

In [None]:
# Reconfirming the missing dates are filled or not

date_range = pd.date_range(start=data.index.min(), end=data.index.max(), freq='D')
missing = date_range.difference(data.index)
print("Missing Dates:", missing)

In [None]:
print(data.loc['1984-12-30':'1985-01-02'])
print(data.loc['1988-12-30':'1989-01-02'])


In [None]:
# Filling the missing value using interpolation

data['Temperature'].interpolate(method='time',inplace=True)

In [None]:
print(data.loc['1984-12-30':'1985-01-02'])
print(data.loc['1988-12-30':'1989-01-02'])


In [None]:
data

In [None]:
data['Temperature'].plot(kind='line', figsize=(8, 4), title='Temperature')
plt.show()

In [None]:
data['Temperature'].plot(xlim=['1983-01-01','1983-12-31'],figsize=(12,4))

In [None]:
data['Temperature'].plot(xlim=['1981-01-01','1981-12-31'],figsize=(12,4),color='black',label='1981')
data['Temperature'].plot(xlim=['1982-01-01','1982-12-31'],figsize=(12,4),color='blue',label='1982')
data['Temperature'].plot(xlim=['1983-01-01','1983-12-31'],figsize=(12,4),color='green',label='1983')
data['Temperature'].plot(xlim=['1984-01-01','1984-12-31'],figsize=(12,4),color='red',label='1984')
plt.legend()
plt.show()



It shows same trend to all the years, in which temperature is downward trend from May to July and again getting in same throuhout months trend.

In [None]:

result = seasonal_decompose(data['Temperature'], model='additive', period=365)
result.plot()
plt.show()


The chart has **four subplots**:

---

### **1️⃣ Observed (Top plot)**

* This is the **raw data**: daily temperatures.
* Shows all components together: trend + seasonal + residual.
* we can see clear **yearly seasonality** (summer vs winter) and **short-term fluctuations** (daily variation).

---

### **2️⃣ Trend (Second plot)**

* The **long-term direction** of the series.
* Smoothed line over time (usually with moving average or LOESS).
* Shows **gradual increase or decrease** over years.
* In the chart:

  * From 1981–1987, slight downward trend.
  * Around 1987–1990, slight upward trend.
* Captures **slow changes in average temperature**.

---

### **3️⃣ Seasonal (Third plot)**

* The **repeating pattern within each year**.
* Seasonal component is **periodic**, in this case yearly (`period=365`).
* Shows **winter lows and summer highs** recurring each year.
* The amplitude is consistent (additive model assumes constant amplitude).

---

### **4️⃣ Residual / Remainder (Bottom plot)**

* The **noise** or **unexplained variation** after removing trend and seasonality.
* Ideally looks **random around 0**.
* In chart:

  * Most points hover around 0.
  * Occasional spikes → unusual temperature events or measurement noise.

---

### ✅ How to interpret this decomposition:

1. **Trend** → underlying long-term increase/decrease.
2. **Seasonal** → repeated patterns (summer/winter cycles).
3. **Residual** → random fluctuations, errors, or outliers.
4. **Observed** → combination of all three.

---

💡 **Why use decomposition?**

* Helps **understand the structure** of your series.
* **Preprocessing**: remove trend/seasonality before modeling residuals.
* **Forecasting**: you can forecast trend and seasonality separately, then combine.


In [None]:
result.seasonal.plot(figsize=(8, 4), title='Seasonal Component')
plt.show()

In [None]:
data['Temperature'].max()

In [None]:
data.resample(rule='A').min()

In [None]:
data.resample(rule='A').max()

**Test for Stationarity**

Use Augmented Dickey-Fuller test (ADF test):



*   p-value < 0.05 → reject null hypothesis → data is stationary
*   p-value > 0.05 → not stationary → apply transformations



In [None]:

result = adfuller(data['Temperature'].dropna())
print('ADF Statistic:', result[0])
print('p-value:', result[1])


Above results means the data is stationary(i.e mean, variance do not change over time)

for ARIMA or SARIMA the value for I (i.e differencing could be 0)

In [None]:
rolmean = data['Temperature'].rolling(window=365).mean()
rolstd = data['Temperature'].rolling(window=365).std()

plt.figure(figsize=(12,4))
plt.plot(data['Temperature'], color='blue', label='Original')
plt.plot(rolmean, color='red', label='Rolling Mean')
plt.plot(rolstd, color='black', label='Rolling Std')
# data['Temperature'].plot(figsize=(12,4), title='Original Series')

plt.legend()
plt.title('Rolling Mean & Standard Deviation')
plt.show()


**Above graph shows both red and balck line in straight( not showing any upward or downwrad trend) which means data is stationary.**

Above results means the data is stationary(i.e mean, variance do not change over time)

for ARIMA or SARIMA the value for I (i.e differencing could be 0)

**Checking for ACF and PACF with lag 7,14,30 and 60 lags, as we have daily data to check for weekly, 2weekly , monthly and 2monthly**

In [None]:
plot_acf(data['Temperature'].dropna())
plot_pacf(data['Temperature'].dropna())
plt.show()


In ACF, we can see the positive correlation, means the previous day temperature has impact for next day temperature.

lag2 is showing spike so,may be we can try with AR(2)

In [None]:
plot_acf(data['Temperature'].dropna(), lags=14)
plot_pacf(data['Temperature'].dropna(), lags=14)
plt.show()


In [None]:
plot_acf(data['Temperature'].dropna(), lags=30)
plot_pacf(data['Temperature'].dropna(), lags=30)
plt.show()


In [None]:
plot_acf(data['Temperature'].dropna(), lags=60)
plot_pacf(data['Temperature'].dropna(), lags=60)
plt.show()


In [None]:
plot_acf(data['Temperature'].dropna(), lags=14)
plot_pacf(data['Temperature'].dropna(), lags=14)
plt.show()


In [None]:
# Split data into training and testing sets
train_size = int(len(data) * 0.8)
train_data, test_data = data[0:train_size], data[train_size:]

print('Training dataset shape:', train_data.shape)
print('Testing dataset shape:', test_data.shape)

In [None]:
test_data

In [None]:

results = []

# Loop through p, d, q
for p in range(0, 5):
    for d in range(0, 3):
        for q in range(0, 5):
            try:
                # Fit model on training data
                model = ARIMA(train_data, order=(p, d, q))
                model_fit = model.fit()


                # Training evaluation

                train_pred = model_fit.predict(start=0, end=len(train_data)-1)
                train_rmse = np.sqrt(mean_squared_error(train_data, train_pred))
                train_r2 = r2_score(train_data, train_pred)


                # Testing evaluation (forecast)

                test_pred = model_fit.forecast(steps=len(test_data))
                test_rmse = np.sqrt(mean_squared_error(test_data, test_pred))
                test_r2 = r2_score(test_data, test_pred)


                results.append({
                    'p': p, 'd': d, 'q': q,
                    'AIC': model_fit.aic,
                    'BIC': model_fit.bic,
                    'Train_RMSE': train_rmse,
                    'Train_R2': train_r2,
                    'Test_RMSE': test_rmse,
                    'Test_R2': test_r2
                })

            except:
                continue  # skip invalid combos

# Convert to DataFrame
results_df = pd.DataFrame(results)

# Sort by Test_RMSE
results_df = results_df.sort_values(by="Test_RMSE").reset_index(drop=True)

# Show top 10 models
print(results_df.head(10))


In [None]:
best_by_rmse = results_df.loc[results_df['Train_RMSE'].idxmin()]
best_by_r2 = results_df.loc[results_df['Train_R2'].idxmax()]
best_by_aic = results_df.loc[results_df['AIC'].idxmin()]
best_by_bic = results_df.loc[results_df['BIC'].idxmin()]

# print("\nTop 5 Models by Test RMSE:\n", results_df.head(5))
print("\nBest by RMSE:\n", best_by_rmse)
print("\nBest by R²:\n", best_by_r2)
print("\nBest by AIC:\n", best_by_aic)
print("\nBest by BIC:\n", best_by_bic)

In [None]:
best_model = ARIMA(train_data['Temperature'], order=(1, 0, 2))
best_fit = best_model.fit()

train_pred = best_fit.predict(start=0, end=len(train_data)-1)

# Forecast on test set
test_data['ARMA'] = best_fit.forecast(steps=len(test_data))

# Plot results
plt.figure(figsize=(12,6))
plt.plot(train_data.index, train_data['Temperature'], label="Training Actual", color="blue")
plt.plot(train_data.index, train_pred, label="Training Predicted", color="red")
plt.plot(test_data.index, test_data['Temperature'], label="Test Actual", color="green")
plt.plot(test_data.index, test_data['ARMA'], label="Test Forecast", color="orange")
plt.xlabel("Date")
plt.ylabel("Temperature")
plt.title(f"ARIMA 1, 0, 2")
plt.legend()
plt.show()


In [None]:
best_model = ARIMA(train_data['Temperature'], order=(4,0,2))
best_fit = best_model.fit()

train_pred = best_fit.predict(start=0, end=len(train_data)-1)

# Forecast on test set
test_data['ARMA'] = best_fit.forecast(steps=len(test_data))

# Plot results
plt.figure(figsize=(12,6))
plt.plot(train_data.index, train_data['Temperature'], label="Training Actual", color="blue")
plt.plot(train_data.index, train_pred, label="Training Predicted", color="red")
plt.plot(test_data.index, test_data['Temperature'], label="Test Actual", color="green")
plt.plot(test_data.index, test_data['ARMA'], label="Test Forecast", color="orange")
plt.xlabel("Date")
plt.ylabel("Temperature")
plt.title(f"ARIMA 4, 0, 2")
plt.legend()
plt.show()


In [None]:
best_model = ARIMA(train_data['Temperature'], order=(2,1,1))
best_fit = best_model.fit()

train_pred = best_fit.predict(start=0, end=len(train_data)-1)

# Forecast on test set
test_data['ARMA'] = best_fit.forecast(steps=len(test_data))

# Plot results
plt.figure(figsize=(12,6))
plt.plot(train_data.index, train_data['Temperature'], label="Training Actual", color="blue")
plt.plot(train_data.index, train_pred, label="Training Predicted", color="red")
plt.plot(test_data.index, test_data['Temperature'], label="Test Actual", color="green")
plt.plot(test_data.index, test_data['ARMA'], label="Test Forecast", color="orange")
plt.xlabel("Date")
plt.ylabel("Temperature")
plt.title(f"ARIMA 4, 0, 3 - Train & Test Prediction")
plt.legend()
plt.show()


In [None]:
best_model = ARIMA(train_data['Temperature'], order=(1,0,2))
best_fit = best_model.fit()

train_pred = best_fit.predict(start=0, end=len(train_data)-1)

# Forecast on test set
test_data['ARMA'] = best_fit.forecast(steps=len(test_data))

# Plot results
plt.figure(figsize=(12,6))
plt.plot(train_data.index, train_data['Temperature'], label="Training Actual", color="blue")
plt.plot(train_data.index, train_pred, label="Training Predicted", color="red")
plt.plot(test_data.index, test_data['Temperature'], label="Test Actual", color="green")
plt.plot(test_data.index, test_data['ARMA'], label="Test Forecast", color="orange")
plt.xlabel("Date")
plt.ylabel("Temperature")
plt.title(f"ARIMA 1, 0, 2 ")
plt.legend()
plt.show()


SARIMAX (ARIMA with Seasonal)

In [None]:
train_data

In [None]:
results = []

# Define seasonal period (90 for quarterly / ~3 months)
seasonal_period = 90

# Loop through p, d, q and seasonal P, D, Q
for p in range(0, 5):      # non-seasonal AR
    for d in range(0, 1):  # non-seasonal differencing
        for q in range(0, 3):  # non-seasonal MA
            for P in range(0, 2):   # seasonal AR
                for D in range(0, 2):  # seasonal differencing
                    for Q in range(0, 2):  # seasonal MA
                        try:
                            model = SARIMAX(
                                train_data,
                                order=(p, d, q),
                                seasonal_order=(P, D, Q, seasonal_period),
                                enforce_stationarity=False,
                                enforce_invertibility=False
                            )
                            model_fit = model.fit(disp=False)

                            # --- Predictions ---
                            train_pred = model_fit.predict(start=0, end=len(train_data)-1)
                            # train_pred = np.array(train_pred).flatten()[:len(train_data)]

                            test_pred = model_fit.forecast(steps=len(test_data))
                            # test_pred = np.array(test_pred).flatten()[:len(test_data)]

                            # --- Skip invalid lengths ---
                            if len(train_pred) != len(train_data) or len(test_pred) != len(test_data):
                                continue

                            # --- Metrics ---
                            train_rmse = np.sqrt(mean_squared_error(train_data, train_pred))
                            train_r2 = r2_score(train_data, train_pred)
                            test_rmse = np.sqrt(mean_squared_error(test_data, test_pred))
                            test_r2 = r2_score(test_data, test_pred)

                            # --- Append results ---
                            results.append({
                                'p': p, 'd': d, 'q': q,
                                'P': P, 'D': D, 'Q': Q,
                                'AIC': model_fit.aic,
                                'BIC': model_fit.bic,
                                'Train_RMSE': train_rmse,
                                'Train_R2': train_r2,
                                'Test_RMSE': test_rmse,
                                'Test_R2': test_r2
                            })

                        except Exception as e:
                            print(f"Error with parameters (p,d,q)=({p},{d},{q}) and seasonal (P,D,Q)=({P},{D},{Q}): {e}")
                            continue

# --- Results summary ---
if results:
    results_df = pd.DataFrame(results)
    results_df = results_df.sort_values(by="Test_RMSE").reset_index(drop=True)
    print("\nTop 10 SARIMAX Models:")
    print(results_df.head(10))
else:
    print("No valid SARIMAX models were fitted.")


In [None]:
results_df = results_df.sort_values(by="AIC").reset_index(drop=True)
results_df.head(10)

In [None]:
best_model = SARIMAX(train_data['Temperature'], order=(4,0,1), seasonal_order=(1,1,1,90), enforce_stationarity=False, enforce_invertibility=False)
best_fit = best_model.fit()

train_pred = best_fit.predict(start=0, end=len(train_data)-1)

# Forecast on test set
test_data['SARIMAX'] = best_fit.forecast(steps=len(test_data))

# Plot results
plt.figure(figsize=(12,6))
plt.plot(train_data.index, train_data['Temperature'], label="Training Actual", color="blue")
plt.plot(train_data.index, train_pred, label="Training Predicted", color="red")
plt.plot(test_data.index, test_data['Temperature'], label="Test Actual", color="green")
plt.plot(test_data.index, test_data['SARIMAX'], label="Test Forecast", color="orange")
plt.xlabel("Date")
plt.ylabel("Temperature")
plt.title(f"SARIMAX order=(4,0,1), seasonal_order=(1,1,1,365)")
plt.legend()
plt.show()

SimpleRNN

In [None]:
scaler = MinMaxScaler(feature_range=(0, 1))

# Fit on training data only
train_scaled = scaler.fit_transform(train_data[['Temperature']])
test_scaled = scaler.transform(test_data[['Temperature']])


In [None]:
window_size = 30

In [None]:
def create_sequences(data, window_size=30):
    """Create sequences of length window_size for RNN input"""
    xs, ys = [], []
    for i in range(len(data) - window_size):
        x = data[i:(i+window_size)]
        y = data[i+window_size]
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)





In [None]:
train_scaled = train_data['Temperature'].values
test_scaled = test_data['Temperature'].values

# Create training sequences
X_train, y_train = create_sequences(train_scaled, window_size)
X_test, y_test = create_sequences(np.concatenate([train_scaled[-window_size:], test_scaled]), window_size)

# Reshape for RNN: (samples, timesteps, features)
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))


In [None]:
#RNN model
rnn_model = Sequential([
    SimpleRNN(64, activation='tanh', input_shape=(window_size, 1)),
    Dense(1)
])

rnn_model.compile(optimizer='adam', loss='mse')

#Train model
history = rnn_model.fit(X_train, y_train, epochs=100, batch_size=32, verbose=1, validation_split=0.1)


In [None]:
rnn_model.summary()

In [None]:
# Predictions
train_pred_rnn = rnn_model.predict(X_train)
test_pred_rnn = rnn_model.predict(X_test)

# Metrics
train_rmse = np.sqrt(mean_squared_error(y_train, train_pred_rnn))
train_r2 = r2_score(y_train, train_pred_rnn)

test_rmse = np.sqrt(mean_squared_error(y_test, test_pred_rnn))
test_r2 = r2_score(y_test, test_pred_rnn)

print("RNN Results:")
print(f"Train RMSE: {train_rmse:.3f}, Train R²: {train_r2:.3f}")
print(f"Test RMSE: {test_rmse:.3f}, Test R²: {test_r2:.3f}")


In [None]:
rnn_results_df = pd.DataFrame([{
    'p': 'RNN', 'd': '-', 'q': '-',
    'AIC': np.nan, 'BIC': np.nan,
    'Train_RMSE': train_rmse,
    'Train_R2': train_r2,
    'Test_RMSE': test_rmse,
    'Test_R2': test_r2
}])

results_df = pd.concat([results_df, rnn_results_df], ignore_index=True)

# Sort by Test_RMSE again
results_df = results_df.sort_values(by="Test_RMSE").reset_index(drop=True)

print(results_df.tail())  # Show last rows including RNN

In [None]:

# Plot Training Data
plt.figure(figsize=(12,6))
plt.plot(y_train, label="Training Data (Actual)", color="blue")
plt.plot(train_pred_rnn, label="RNN Prediction (Train)", color="red")
plt.title("RNN Training Data vs Predictions")
plt.xlabel("Time Step")
plt.ylabel("Temperature")
plt.legend()
plt.show()

# Plot Testing Data
plt.figure(figsize=(12,6))
plt.plot(y_test, label="Testing Data (Actual)", color="blue")
plt.plot(test_pred_rnn, label="RNN Prediction (Test)", color="green")
plt.title("RNN Testing Data vs Predictions")
plt.xlabel("Time Step")
plt.ylabel("Temperature")
plt.legend()
plt.show()


LSTM (Long Short-Term Memory)

In [None]:
# LSTM model
lstm_model = Sequential([
    LSTM(64, activation="tanh", input_shape=(window_size, 1)),
    Dense(1)
])

lstm_model.compile(optimizer="adam", loss="mse")

# Train model
history = lstm_model.fit(
    X_train, y_train,
    epochs=100,
    batch_size=32,
    verbose=1,
    validation_split=0.1
)


In [None]:
lstm_model.summary()

In [None]:
# Predictions
train_pred_lstm = lstm_model.predict(X_train)
test_pred_lstm = lstm_model.predict(X_test)

# Metrics
train_rmse = np.sqrt(mean_squared_error(y_train, train_pred_lstm))
train_r2 = r2_score(y_train, train_pred_lstm)

test_rmse = np.sqrt(mean_squared_error(y_test, test_pred_lstm))
test_r2 = r2_score(y_test, test_pred_lstm)

print("LSTM Results:")
print(f"Train RMSE: {train_rmse:.3f}, Train R²: {train_r2:.3f}")
print(f"Test RMSE: {test_rmse:.3f}, Test R²: {test_r2:.3f}")


In [None]:

# Plot Training Data
plt.figure(figsize=(12,6))
plt.plot(y_train, label="Training Data (Actual)", color="blue")
plt.plot(train_pred_lstm, label="RNN Prediction (Train)", color="red")
plt.title("RNN Training Data vs Predictions")
plt.xlabel("Time Step")
plt.ylabel("Temperature")
plt.legend()
plt.show()

# Plot Testing Data
plt.figure(figsize=(12,6))
plt.plot(y_test, label="Testing Data (Actual)", color="blue")
plt.plot(test_pred_lstm, label="RNN Prediction (Test)", color="green")
plt.title("RNN Testing Data vs Predictions")
plt.xlabel("Time Step")
plt.ylabel("Temperature")
plt.legend()
plt.show()


In [None]:
lstm_model = Sequential([
    LSTM(64, return_sequences=True, input_shape=(window_size, 1), dropout=0.2),
    LSTM(32, dropout=0.2),
    Dense(1)
])

lstm_model.compile(optimizer="adam", loss="mse")

early_stop = EarlyStopping(monitor="val_loss", patience=10, restore_best_weights=True)

history = lstm_model.fit(
    X_train, y_train,
    epochs=200,
    batch_size=32,
    validation_split=0.1,
    callbacks=[early_stop]
)


In [None]:
lstm_model.summary()

In [None]:
train_pred_lstm = lstm_model.predict(X_train)
test_pred_lstm = lstm_model.predict(X_test)


train_rmse = np.sqrt(mean_squared_error(y_train, train_pred_lstm))
train_r2 = r2_score(y_train, train_pred_lstm)

test_rmse = np.sqrt(mean_squared_error(y_test, test_pred_lstm))
test_r2 = r2_score(y_test, test_pred_lstm)

print("LSTM Results")
print(f"Train → RMSE: {train_rmse:.3f}, R²: {train_r2:.3f}")
print(f"Test  → RMSE: {test_rmse:.3f}, R²: {test_r2:.3f}")


In [None]:
# Plot Training Data
plt.figure(figsize=(12,6))
plt.plot(y_train, label="Training Data (Actual)", color="blue")
plt.plot(train_pred_lstm, label="LSTM Prediction (Train)", color="red")
plt.title("LSTM Training Data vs Predictions")
plt.xlabel("Time Step")
plt.ylabel("Temperature")
plt.legend()
plt.show()

# Plot Testing Data
plt.figure(figsize=(12,6))
plt.plot(y_test, label="Testing Data (Actual)", color="blue")
plt.plot(test_pred_lstm, label="LSTM Prediction (Test)", color="green")
plt.title("LSTM Testing Data vs Predictions")
plt.xlabel("Time Step")
plt.ylabel("Temperature")
plt.legend()
plt.show()


GRU (Gated Recurrent Unit)

In [None]:

# GRU model
gru_model = Sequential([
    GRU(132, activation="tanh", input_shape=(window_size,1)),
    Dense(1)
])

gru_model.compile(optimizer="adam", loss="mse")

# Train GRU
history = gru_model.fit(
    X_train, y_train,
    epochs=300,
    batch_size=32,
    verbose=1,
    validation_split=0.1
)


In [None]:
train_pred_gru = gru_model.predict(X_train)
test_pred_gru = gru_model.predict(X_test)

# Metrics
train_rmse = np.sqrt(mean_squared_error(y_train, train_pred_gru))
train_r2 = r2_score(y_train, train_pred_gru)

test_rmse = np.sqrt(mean_squared_error(y_test, test_pred_gru))
test_r2 = r2_score(y_test, test_pred_gru)

print("GRU Results")
print(f"Train → RMSE: {train_rmse:.3f}, R²: {train_r2:.3f}")
print(f"Test  → RMSE: {test_rmse:.3f}, R²: {test_r2:.3f}")


In [None]:
gru_model.summary()

In [None]:
#Training Data vs GRU Prediction
plt.figure(figsize=(12,6))
plt.plot(y_train, label="Training Data (Actual)", color="blue")
plt.plot(train_pred_gru, label="GRU Prediction (Train)", color="red")
plt.title("GRU Training Data vs Predictions")
plt.xlabel("Time Step")
plt.ylabel("Temperature")
plt.legend()
plt.show()

#Testing Data vs GRU Prediction
plt.figure(figsize=(12,6))
plt.plot(y_test, label="Testing Data (Actual)", color="blue")
plt.plot(test_pred_gru, label="GRU Prediction (Test)", color="green")
plt.title("GRU Testing Data vs Predictions")
plt.xlabel("Time Step")
plt.ylabel("Temperature")
plt.legend()
plt.show()


In [None]:
early_stop = EarlyStopping(
    monitor='val_loss',
    patience=50,
    restore_best_weights=True
)

# Build GRU model with 3 layers
gru_model = Sequential([
    GRU(132, activation='tanh', return_sequences=True, input_shape=(window_size, 1)),
    Dropout(0.1),  # first dropout layer

    GRU(64, activation='tanh', return_sequences=True),
    Dropout(0.1),  # second dropout layer

    GRU(32, activation='tanh'),
    Dense(1)  # output layer
])

# Compile model
gru_model.compile(optimizer='adam', loss='mse')

# Train model
history = gru_model.fit(
    X_train, y_train,
    epochs=500,
    batch_size=32,
    verbose=1,
    validation_split=0.1,
    callbacks=[early_stop]
)


In [None]:
gru_model.summary()

In [None]:
train_pred_gru = gru_model.predict(X_train)
test_pred_gru = gru_model.predict(X_test)

# Metrics
train_rmse = np.sqrt(mean_squared_error(y_train, train_pred_gru))
train_r2 = r2_score(y_train, train_pred_gru)

test_rmse = np.sqrt(mean_squared_error(y_test, test_pred_gru))
test_r2 = r2_score(y_test, test_pred_gru)

print("GRU Results")
print(f"Train → RMSE: {train_rmse:.3f}, R²: {train_r2:.3f}")
print(f"Test  → RMSE: {test_rmse:.3f}, R²: {test_r2:.3f}")


In [None]:
#Training Data vs GRU Prediction
plt.figure(figsize=(12,6))
plt.plot(y_train, label="Training Data (Actual)", color="blue")
plt.plot(train_pred_gru, label="GRU Prediction (Train)", color="red")
plt.title("GRU Training Data vs Predictions")
plt.xlabel("Time Step")
plt.ylabel("Temperature")
plt.legend()
plt.show()

#Testing Data vs GRU Prediction
plt.figure(figsize=(12,6))
plt.plot(y_test, label="Testing Data (Actual)", color="blue")
plt.plot(test_pred_gru, label="GRU Prediction (Test)", color="green")
plt.title("GRU Testing Data vs Predictions")
plt.xlabel("Time Step")
plt.ylabel("Temperature")
plt.legend()
plt.show()


Bidirectional RNNs



In [None]:
# Bidirectional RNN model
bidir_model = Sequential([
    Bidirectional(SimpleRNN(64, activation="tanh"), input_shape=(window_size, 1)),
    Dense(1)
])

bidir_model.compile(optimizer="adam", loss="mse")

# Train model
history = bidir_model.fit(
    X_train, y_train,
    epochs=500,
    batch_size=32,
    verbose=1,
    validation_split=0.1
)

In [None]:
bidir_model.summary()

In [None]:
# Predictions
train_pred_bidir = bidir_model.predict(X_train)
test_pred_bidir = bidir_model.predict(X_test)

# Metrics
train_rmse = np.sqrt(mean_squared_error(y_train, train_pred_bidir))
train_r2 = r2_score(y_train, train_pred_bidir)

test_rmse = np.sqrt(mean_squared_error(y_test, test_pred_bidir))
test_r2 = r2_score(y_test, test_pred_bidir)

print("Bidirectional RNN Results")
print(f"Train → RMSE: {train_rmse:.3f}, R²: {train_r2:.3f}")
print(f"Test  → RMSE: {test_rmse:.3f}, R²: {test_r2:.3f}")

In [None]:
#Training Data vs BiRNN Prediction
plt.figure(figsize=(12,6))
plt.plot(y_train, label="Training Data (Actual)", color="blue")
plt.plot(train_pred_bidir, label="Bidirectional RNN Prediction (Train)", color="red")
plt.title("Bidirectional RNN Training Data vs Predictions")
plt.xlabel("Time Step")
plt.ylabel("Temperature")
plt.legend()
plt.show()

#Testing Data vs BiRNN Prediction
plt.figure(figsize=(12,6))
plt.plot(y_test, label="Testing Data (Actual)", color="blue")
plt.plot(test_pred_bidir, label="Bidirectional RNN Prediction (Test)", color="green")
plt.title("Bidirectional RNN Testing Data vs Predictions")
plt.xlabel("Time Step")
plt.ylabel("Temperature")
plt.legend()
plt.show()


In [None]:
early_stop = EarlyStopping(monitor='val_loss', patience=50, restore_best_weights=True)


bidir_model = Sequential([
    Bidirectional(SimpleRNN(132, return_sequences=True), input_shape=(window_size, 1)),
    Dropout(0.2),

    Bidirectional(SimpleRNN(64, return_sequences=True)),
    LeakyReLU(alpha=0.1),
    Dropout(0.1),

    Bidirectional(SimpleRNN(16, return_sequences=True)),
    LeakyReLU(alpha=0.1),
    Dropout(0.1),

    Bidirectional(SimpleRNN(32)),
    Dense(16, activation="relu"),
    Dense(1)
])

# Optimizer with gradient clipping for stability
optimizer = Adam(learning_rate=0.001, clipnorm=1.0)

bidir_model.compile(optimizer=optimizer, loss="mse")


history = bidir_model.fit(
    X_train, y_train,
    epochs=500,
    batch_size=32,
    verbose=1,
    validation_split=0.1,
    callbacks=[early_stop]
)


In [None]:
bidir_model.summary()

In [None]:
# Predictions
train_pred_bidir = bidir_model.predict(X_train)
test_pred_bidir = bidir_model.predict(X_test)

# Metrics
train_rmse = np.sqrt(mean_squared_error(y_train, train_pred_bidir))
train_r2 = r2_score(y_train, train_pred_bidir)

test_rmse = np.sqrt(mean_squared_error(y_test, test_pred_bidir))
test_r2 = r2_score(y_test, test_pred_bidir)

print("Bidirectional RNN Results")
print(f"Train → RMSE: {train_rmse:.3f}, R²: {train_r2:.3f}")
print(f"Test  → RMSE: {test_rmse:.3f}, R²: {test_r2:.3f}")

In [None]:
#Training Data vs BiRNN Prediction
plt.figure(figsize=(12,6))
plt.plot(y_train, label="Training Data (Actual)", color="blue")
plt.plot(train_pred_bidir, label="Bidirectional RNN Prediction (Train)", color="red")
plt.title("Bidirectional RNN Training Data vs Predictions")
plt.xlabel("Time Step")
plt.ylabel("Temperature")
plt.legend()
plt.show()

#Testing Data vs BiRNN Prediction
plt.figure(figsize=(12,6))
plt.plot(y_test, label="Testing Data (Actual)", color="blue")
plt.plot(test_pred_bidir, label="Bidirectional RNN Prediction (Test)", color="green")
plt.title("Bidirectional RNN Testing Data vs Predictions")
plt.xlabel("Time Step")
plt.ylabel("Temperature")
plt.legend()
plt.show()


Stacked (Deep) RNNs

In [None]:
# Stacked (Deep) RNN model
stacked_rnn_model = Sequential([
    SimpleRNN(64, activation="tanh", return_sequences=True, input_shape=(window_size, 1)),
    SimpleRNN(64, activation="tanh"),  # Second RNN layer
    Dense(1)
])

stacked_rnn_model.compile(optimizer="adam", loss="mse")

history = stacked_rnn_model.fit(
    X_train, y_train,
    epochs=500,
    batch_size=32,
    verbose=1,
    validation_split=0.1
)

In [None]:
stacked_rnn_model.summary()

In [None]:
# Predictions
train_pred_stacked = stacked_rnn_model.predict(X_train)
test_pred_stacked = stacked_rnn_model.predict(X_test)

# Metrics
train_rmse = np.sqrt(mean_squared_error(y_train, train_pred_stacked))
train_r2 = r2_score(y_train, train_pred_stacked)

test_rmse = np.sqrt(mean_squared_error(y_test, test_pred_stacked))
test_r2 = r2_score(y_test, test_pred_stacked)

print("Stacked RNN Results")
print(f"Train → RMSE: {train_rmse:.3f}, R²: {train_r2:.3f}")
print(f"Test  → RMSE: {test_rmse:.3f}, R²: {test_r2:.3f}")


In [None]:
#Training Data vs Stacked RNN Prediction
plt.figure(figsize=(12,6))
plt.plot(y_train, label="Training Data (Actual)", color="blue")
plt.plot(train_pred_stacked, label="Stacked RNN Prediction (Train)", color="red")
plt.title("Stacked RNN Training Data vs Predictions")
plt.xlabel("Time Step")
plt.ylabel("Temperature")
plt.legend()
plt.show()

#Testing Data vs Stacked RNN Prediction
plt.figure(figsize=(12,6))
plt.plot(y_test, label="Testing Data (Actual)", color="blue")
plt.plot(test_pred_stacked, label="Stacked RNN Prediction (Test)", color="green")
plt.title("Stacked RNN Testing Data vs Predictions")
plt.xlabel("Time Step")
plt.ylabel("Temperature")
plt.legend()
plt.show()


Hybrid RNN + CNN

In [None]:

hybrid_model = Sequential([
    Conv1D(filters=32, kernel_size=3, activation="relu", input_shape=(window_size, 1)),
    MaxPooling1D(pool_size=2),
    SimpleRNN(64, activation="tanh"),
    Dense(1)
])

hybrid_model.compile(optimizer="adam", loss="mse")

# Train model
history = hybrid_model.fit(
    X_train, y_train,
    epochs=500,
    batch_size=32,
    verbose=1,
    validation_split=0.1
)

In [None]:
hybrid_model.summary()

In [None]:
# Predictions
train_pred_hybrid = hybrid_model.predict(X_train)
test_pred_hybrid = hybrid_model.predict(X_test)

# Metrics
train_rmse = np.sqrt(mean_squared_error(y_train, train_pred_hybrid))
train_r2 = r2_score(y_train, train_pred_hybrid)

test_rmse = np.sqrt(mean_squared_error(y_test, test_pred_hybrid))
test_r2 = r2_score(y_test, test_pred_hybrid)

print("Hybrid CNN + RNN Results")
print(f"Train → RMSE: {train_rmse:.3f}, R²: {train_r2:.3f}")
print(f"Test  → RMSE: {test_rmse:.3f}, R²: {test_r2:.3f}")


In [None]:
plt.figure(figsize=(12,6))
plt.plot(y_train, label="Training Data (Actual)", color="blue")
plt.plot(train_pred_hybrid, label="Hybrid CNN+RNN Prediction (Train)", color="red")
plt.title("Hybrid CNN+RNN Training Data vs Predictions")
plt.xlabel("Time Step")
plt.ylabel("Temperature")
plt.legend()
plt.show()

#Testing Data vs Hybrid RNN+CNN Prediction
plt.figure(figsize=(12,6))
plt.plot(y_test, label="Testing Data (Actual)", color="blue")
plt.plot(test_pred_hybrid, label="Hybrid CNN+RNN Prediction (Test)", color="green")
plt.title("Hybrid CNN+RNN Testing Data vs Predictions")
plt.xlabel("Time Step")
plt.ylabel("Temperature")
plt.legend()
plt.show()


### **Prophet**

In [None]:
data.head(5)

In [None]:
data = data.reset_index().rename(columns={'index': 'Date'})
data.head()

In [None]:
data = data.rename(columns={'Date': 'ds', 'Temperature': 'y'})
data['ds'] = pd.to_datetime(data['ds'])

# Prophet expects continuous time data
data = data.sort_values('ds').dropna()

data.head()


In [None]:
data.info()

In [None]:
train_size = int(len(data) * 0.8)
train_data_prophet, test_data_prophet = data.iloc[:train_size], data.iloc[train_size:]

# Initialize Prophet model
prophet_model = Prophet(
    yearly_seasonality=True,
    weekly_seasonality=False,
    daily_seasonality=False,
    seasonality_mode='additive'  # or 'multiplicative'
)

# adding custom seasonalities
prophet_model.add_seasonality(name='yearly', period=365, fourier_order=5)

prophet_model.fit(train_data_prophet)

# Make future dataframe (for the length of the test set)
future = prophet_model.make_future_dataframe(periods=len(test_data_prophet), freq='D')

forecast = prophet_model.predict(future)

# Extract forecast for the test period
forecast_test = forecast.iloc[-len(test_data_prophet):]

# -------------------
# Training set evaluation
# -------------------
forecast_train = forecast.iloc[:len(train_data_prophet)]
rmse_train = np.sqrt(mean_squared_error(train_data_prophet['y'], forecast_train['yhat']))
r2_train = r2_score(train_data_prophet['y'], forecast_train['yhat'])
print(f"Train → RMSE: {rmse_train:.3f}, R²: {r2_train:.3f}")

# -------------------
# Test set evaluation
# -------------------
forecast_test = forecast.iloc[-len(test_data_prophet):]
rmse_test = np.sqrt(mean_squared_error(test_data_prophet['y'], forecast_test['yhat']))
r2_test = r2_score(test_data_prophet['y'], forecast_test['yhat'])
print(f"Test → RMSE: {rmse_test:.3f}, R²: {r2_test:.3f}")


# Plot forecast (includes training and test periods)
fig1 = prophet_model.plot(forecast)
plt.title("Prophet Forecast - Daily Temperature")
plt.show()

# Plot components (trend + seasonality)
fig2 = prophet_model.plot_components(forecast)
plt.show()

In [None]:
forecast_all = forecast[['ds', 'yhat']].set_index('ds')
data_all = data.set_index('ds')
combined = data_all.join(forecast_all, how='left')


# Plot Actual vs Predicted (Entire Dataset)
# -----------------------------
plt.figure(figsize=(14,6))
plt.plot(data_all.index, data_all['y'], label='Actual Data', color='blue')
plt.plot(forecast_all.index, forecast_all['yhat'], label='Predicted (Prophet)', color='orange')
plt.axvline(x=test_data_prophet['ds'].iloc[0], color='red', linestyle='--', label='Train/Test Split')
plt.title("Prophet Forecast: Actual vs Predicted (Full Data)")
plt.xlabel("Date")
plt.ylabel("Temperature")
plt.legend()
plt.grid(True)
plt.show()

# -----------------------------
# 7️⃣ Plot Only Test Period (Zoomed-In)
# -----------------------------
plt.figure(figsize=(12,5))
plt.plot(test_data_prophet['ds'], test_data_prophet['y'], label='Actual Test Data', color='blue')
plt.plot(forecast_test['ds'], forecast_test['yhat'], label='Predicted (Test Forecast)', color='orange')
plt.title("Prophet Forecast: Test Data vs Predicted")
plt.xlabel("Date")
plt.ylabel("Temperature")
plt.legend()
plt.grid(True)
plt.show()

In [None]:
combined

### **NeuralProphet**

In [None]:
import numpy as np
if not hasattr(np, 'NaN'):
    np.NaN = np.nan


In [None]:
data['ds'] = pd.to_datetime(data['ds'])

train_size = int(len(data) * 0.8)
train_data = data.iloc[:train_size]
test_data = data.iloc[train_size:]

# 'ds' should be in datetime format

# NeuralProphet Model

np_model = NeuralProphet(
    yearly_seasonality=True,
    weekly_seasonality=False,
    daily_seasonality=False,
    epochs=100,
    batch_size=64,
    learning_rate=0.01,
    seasonality_mode='additive'  # 'multiplicative' isnot giving good results - Train → RMSE: 2.884, R²: 0.494 & Test  → RMSE: 3.095, R²: 0.431
)


metrics = np_model.fit(train_data, freq='D')


# Predict next 365 days beyond available data
future = np_model.make_future_dataframe(train_data, periods=len(test_data), n_historic_predictions=True)
forecast = np_model.predict(future)


In [None]:
# Evaluate performance

train_forecast = forecast.iloc[:len(train_data)]
test_forecast = forecast.iloc[len(train_data):]

train_rmse = np.sqrt(mean_squared_error(train_data['y'], train_forecast['yhat1']))
train_r2 = r2_score(train_data['y'], train_forecast['yhat1'])
test_rmse = np.sqrt(mean_squared_error(test_data['y'], test_forecast['yhat1']))
test_r2 = r2_score(test_data['y'], test_forecast['yhat1'])

print(f"Train → RMSE: {train_rmse:.3f}, R²: {train_r2:.3f}")
print(f"Test  → RMSE: {test_rmse:.3f}, R²: {test_r2:.3f}")



In [None]:
# Plot Actual vs Predicted

plt.figure(figsize=(14,6))
plt.plot(train_data['ds'], train_data['y'], label='Train Actual', color='blue')
plt.plot(train_data['ds'], train_forecast['yhat1'], label='Train Predicted', color='cyan')

plt.plot(test_data['ds'], test_data['y'], label='Test Actual', color='orange')
plt.plot(test_data['ds'], test_forecast['yhat1'], label='Test Predicted', color='red')

plt.title("NeuralProphet: Actual vs Predicted Temperature")
plt.xlabel("Date")
plt.ylabel("Temperature")
plt.legend()
plt.grid(True)
plt.show()

