# ClimateWins — Gradient Descent on Temperature (First 3 Stations; 1960, 1990, 2020)

In [4]:
import pandas as pd
import plotly.express as px
import numpy as np
import matplotlib.pyplot as plt
import os
from mpl_toolkits import mplot3d

from plotly.offline import download_plotlyjs, init_notebook_mode
from plotly.offline import plot, iplot
import plotly.graph_objects as go

In [5]:
# Load data
#Create a path to where your data is stored.
path = r'C:\Users\Ratul\anaconda_projects\Machine Learning'
#Read in the European weather data.
climate = pd.read_csv(os.path.join(path, '02 Data', 'Prepared Data', 'climate_scaled.csv'), index_col = False)
climate


Unnamed: 0,DATE,MONTH,BASEL_cloud_cover,BASEL_wind_speed,BASEL_humidity,BASEL_pressure,BASEL_global_radiation,BASEL_precipitation,BASEL_snow_depth,BASEL_sunshine,...,VALENTIA_cloud_cover,VALENTIA_humidity,VALENTIA_pressure,VALENTIA_global_radiation,VALENTIA_precipitation,VALENTIA_snow_depth,VALENTIA_sunshine,VALENTIA_temp_mean,VALENTIA_temp_min,VALENTIA_temp_max
0,19600101,1,7,2.1,0.85,1.0180,0.32,0.09,0,0.7,...,5,0.88,1.0003,0.45,0.34,0,4.7,8.5,6.0,10.9
1,19600102,1,6,2.1,0.84,1.0180,0.36,1.05,0,1.1,...,7,0.91,1.0007,0.25,0.84,0,0.7,8.9,5.6,12.1
2,19600103,1,8,2.1,0.90,1.0180,0.18,0.30,0,0.0,...,7,0.91,1.0096,0.17,0.08,0,0.1,10.5,8.1,12.9
3,19600104,1,3,2.1,0.92,1.0180,0.58,0.00,0,4.1,...,7,0.86,1.0184,0.13,0.98,0,0.0,7.4,7.3,10.6
4,19600105,1,6,2.1,0.95,1.0180,0.65,0.14,0,5.4,...,3,0.80,1.0328,0.46,0.00,0,5.7,5.7,3.0,8.4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22945,20221027,10,1,2.1,0.79,1.0248,1.34,0.22,0,7.7,...,5,0.82,1.0142,1.13,0.41,0,3.4,10.7,7.9,13.5
22946,20221028,10,6,2.1,0.77,1.0244,1.34,0.22,0,5.4,...,5,0.82,1.0142,1.13,0.41,0,3.4,10.7,7.9,13.5
22947,20221029,10,4,2.1,0.76,1.0227,1.34,0.22,0,6.1,...,5,0.82,1.0142,1.13,0.41,0,3.4,10.7,7.9,13.5
22948,20221030,10,5,2.1,0.80,1.0212,1.34,0.22,0,5.8,...,5,0.82,1.0142,1.13,0.41,0,3.4,10.7,7.9,13.5


In [6]:
# Extract station list
station_cols = [c for c in df.columns if c.endswith("_temp_mean")]
stations = sorted({c.split("_")[0] for c in station_cols})
stations_first3 = stations[:3]
years = [1960, 1990, 2020]

NameError: name 'df' is not defined

In [None]:
def get_xy_for_station_year(df, station, year):
    sub = df[df["DATE"].dt.year == year].copy()
    col = f"{station}_temp_mean"
    if col not in sub.columns:
        return None, None, pd.DataFrame()
    sub = sub[["DATE", col]].dropna()
    if sub.empty:
        return None, None, pd.DataFrame()
    sub["doy"] = sub["DATE"].dt.dayofyear.astype(float)
    X = (sub["doy"] - sub["doy"].min()) / (sub["doy"].max() - sub["doy"].min())
    y = sub[col].astype(float)
    return X.values.reshape(-1, 1), y.values, sub

In [None]:

def gradient_descent(X, y, alpha=0.5, n_iters=5000, theta0_init=0.0, theta1_init=0.0, tol=1e-12):
    m = len(y); x = X.flatten()
    theta0, theta1 = theta0_init, theta1_init
    hist = {"theta0": [], "theta1": [], "loss": []}
    for i in range(n_iters):
        y_pred = theta0 + theta1 * x
        error = y_pred - y
        loss = (error @ error) / (2 * m)
        grad0 = error.mean()
        grad1 = (error * x).mean()
        theta0 -= alpha * grad0
        theta1 -= alpha * grad1
        hist["theta0"].append(theta0)
        hist["theta1"].append(theta1)
        hist["loss"].append(loss)
        if i > 5 and abs(hist["loss"][-2] - loss) < tol:
            break
    import pandas as pd
    return theta0, theta1, pd.DataFrame(hist)

def compute_loss_surface(x, y, theta0_range, theta1_range, steps0=80, steps1=80):
    t0_vals = np.linspace(*theta0_range, steps0)
    t1_vals = np.linspace(*theta1_range, steps1)
    L = np.zeros((steps0, steps1)); m = len(y)
    for i, t0 in enumerate(t0_vals):
        for j, t1 in enumerate(t1_vals):
            pred = t0 + t1 * x
            err = pred - y
            L[i, j] = (err @ err) / (2 * m)
    return t0_vals, t1_vals, L

summary = []

for station in stations_first3:
    for year in years:
        X, y, sub = get_xy_for_station_year(df, station, year)
        if X is None or len(y) < 5:
            summary.append({"station": station, "year": year, "status": "No data"})
            continue
        
        theta0_init = float(np.nanmean(y))
        theta1_init = 0.0
        alpha = 0.5
        
        theta0, theta1, hist = gradient_descent(X, y, alpha=alpha, n_iters=5000,
                                                theta0_init=theta0_init, theta1_init=theta1_init)
        summary.append({
            "station": station,
            "year": year,
            "status": "OK",
            "n_points": len(y),
            "theta0_start": theta0_init,
            "theta1_start": 0.0,
            "theta0_end": float(hist["theta0"].iloc[-1]),
            "theta1_end": float(hist["theta1"].iloc[-1]),
            "iterations": len(hist),
            "alpha": alpha,
            "final_loss": float(hist["loss"].iloc[-1])
        })
        
        # Loss profile
        plt.figure(figsize=(8,5))
        plt.plot(hist["loss"].values)
        plt.xlabel("Iteration"); plt.ylabel("Loss (MSE/2)")
        plt.title(f"Loss Profile — {station} {year}")
        plt.tight_layout()
        plt.show()
        
        # Loss surface
        x = X.flatten()
        t0_range = (theta0 - 10, theta0 + 10)
        t1_range = (theta1 - 10, theta1 + 10)
        t0_vals, t1_vals, L = compute_loss_surface(x, y, t0_range, t1_range, 80, 80)
        T0, T1 = np.meshgrid(t1_vals, t0_vals)
        fig = plt.figure(figsize=(8,6))
        ax = fig.add_subplot(111, projection="3d")
        ax.plot_surface(T0, T1, L, linewidth=0, antialiased=True)
        ax.set_xlabel("theta1"); ax.set_ylabel("theta0"); ax.set_zlabel("Loss")
        ax.set_title(f"Loss Surface — {station} {year}")
        plt.tight_layout()
        plt.show()
        
        # Fit visualization
        plt.figure(figsize=(8,5))
        plt.scatter(sub["DATE"].values, y, s=10)
        y_fit = theta0 + theta1 * x
        plt.plot(sub["DATE"].values, y_fit)
        plt.xlabel("Date"); plt.ylabel("Mean Temp (scaled units)")
        plt.title(f"Daily Temps & Fit — {station} {year}")
        plt.tight_layout()
        plt.show()

pd.DataFrame(summary)