# import Libraries

In [1]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler

import warnings
warnings.filterwarnings('ignore')


# 📌 Step 2: Load the dataset

In [2]:

df = pd.read_csv('GlobalLandTemperaturesByCountry.csv')
df.head()


Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,Country
0,1743-11-01,4.384,2.294,Åland
1,1743-12-01,,,Åland
2,1744-01-01,,,Åland
3,1744-02-01,,,Åland
4,1744-03-01,,,Åland


# 📌 Step 3: Filter for India and preprocess

In [None]:

df = df.dropna()
df = df[df['Country'] == 'India']
df['dt'] = pd.to_datetime(df['dt'])
df['Year'] = df['dt'].dt.year

# Average yearly temperature
data = df.groupby('Year').agg({'AverageTemperature': 'mean'}).reset_index()
data.rename(columns={'AverageTemperature': 'TempAnomaly'}, inplace=True)

# Create lag features
for lag in range(1, 4):
    data[f'Temp_Lag_{lag}'] = data['TempAnomaly'].shift(lag)

data.dropna(inplace=True)
data.head()


# 📌 Step 4: Data visualization

In [None]:

plt.figure(figsize=(10, 5))
plt.plot(data['Year'], data['TempAnomaly'], marker='o')
plt.title("Average Temperature Anomaly Over Years (India)")
plt.xlabel("Year")
plt.ylabel("Temperature Anomaly (°C)")
plt.grid(True)
plt.show()


# 📌 Step 5: Prepare training and testing sets

In [None]:

X = data.drop(['TempAnomaly', 'Year'], axis=1)
y = data['TempAnomaly']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


# 📌 Step 6: Train models

In [None]:

models = {
    'Linear Regression': LinearRegression(),
    'Random Forest': RandomForestRegressor(),
    'Gradient Boosting': GradientBoostingRegressor()
}

results = {}

for name, model in models.items():
    model.fit(X_train_scaled, y_train)
    preds = model.predict(X_test_scaled)
    results[name] = {
        'predictions': preds,
        'mae': mean_absolute_error(y_test, preds),
        'mse': mean_squared_error(y_test, preds),
        'r2': r2_score(y_test, preds)
    }


# 📌 Step 7: Evaluation

In [None]:

for name, result in results.items():
    print(f"Model: {name}")
    print(f"MAE: {result['mae']:.4f}")
    print(f"MSE: {result['mse']:.4f}")
    print(f"R2 Score: {result['r2']:.4f}\n")

    # Actual vs Predicted
    plt.figure(figsize=(6, 4))
    plt.scatter(y_test, result['predictions'], alpha=0.7)
    plt.xlabel("Actual")
    plt.ylabel("Predicted")
    plt.title(f"{name} - Actual vs Predicted")
    plt.grid(True)
    plt.show()


# 📌 Step 8: Future prediction

In [None]:

last_values = X.iloc[-1:].values
future_prediction = models['Gradient Boosting'].predict(scaler.transform(last_values))
print(f"🌡️ Projected Temperature Anomaly for Next Year: {future_prediction[0]:.4f}°C")

# Scenario: 0.5°C higher input
scenario = last_values.copy()
scenario += 0.5
scenario_prediction = models['Gradient Boosting'].predict(scaler.transform(scenario))
print(f"🌀 Scenario Anomaly if CO₂/Temp increases: {scenario_prediction[0]:.4f}°C")
