In [44]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split

In [14]:
# Simulate temperature data thoughout the day ( 0 - 24 hours)
hours = np.linspace(0, 24, 100).reshape(-1, 1)

# Temperature follows a pattern: cold at night, warm during day
temperature = (15 + 10 * np.sin((hours - 6) * np.pi / 12) 
+ np.random.randn(100, 1) *2).ravel()


In [28]:
print(f"Samples: {len(hours)}")
print(f"Features: {hours.shape}")
print(f"Temperature range: {temperature.min():.2f}°C to {temperature.max():.2f}°C")

Samples: 100
Features: (100, 1)
Temperature range: 2.53°C to 28.05°C


In [30]:
# Split data 
X_train, X_test, y_train, y_test = train_test_split (
    hours, temperature,
    test_size = 0.2,  # 20% for testing
    random_state = 42  # for reproducibility 
)

In [36]:
print(f"Training sample: {len(X_train)} --> ({len(X_train)/len(hours) * 100 :.0f}%)")
print(f"Test sample: {len(X_test)} --> ({len(X_test)/len(hours) * 100:.0f}%)")

Training sample: 80 --> (80%)
Test sample: 20 --> (20%)


In [104]:
# Define degrees to test 
degrees_to_test = [1, 2, 3, 4, 5, 6, 7, 8 , 9, 10]

# Store results 
results = []

print(f"Degree   | Train R²  | Test R²   | Train RMSE |  Test RMSE   |  MAE  | Status \n")

for degree in degrees_to_test:
    # Transform features 
    poly = PolynomialFeatures(degree = degree, include_bias = False)
    X_train_poly = poly.fit_transform(X_train)
    X_test_poly = poly.transform(X_test)

    # Fit model
    model = LinearRegression()
    model.fit(X_train_poly, y_train)

    # Predictions 
    y_train_pred = model.predict(X_train_poly)
    y_test_pred = model.predict(X_test_poly)

    # Calculate metrics
    train_r2 = r2_score(y_train, y_train_pred)
    test_r2 = r2_score(y_test, y_test_pred)
    train_rmse = np.sqrt(mean_squared_error(y_train, y_train_pred))
    test_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))
    test_mae = mean_absolute_error(y_test, y_test_pred)

    # Detecting overfitting 
    overfitting = (train_r2 - test_r2) > 0.1  # If gap > 0.1 likely overfitting
    status = "Overfit" if overfitting else "Ok"

    #Store results
    results.append({
        "degree" : degree,
        "Train_R2": train_r2,
        "Test_R2": test_r2,
        "Trin RMSE": train_rmse,
        "Test RMSE": test_rmse,
        "Test MAE": test_mae,
        "Overfitting": overfitting,
        "Model" : model,
        "Poly": poly
    })
    print(f"{degree:2d}       | {train_r2:7.4f}   | {test_r2:7.4f}   | {train_rmse:7.4f}     |  {test_rmse:7.4f}    |{test_mae:5.2f}  | {status}")



Degree   | Train R²  | Test R²   | Train RMSE |  Test RMSE   |  MAE  | Status 

 1       |  0.0156   | -0.0688   |  7.2552     |   6.4267    | 5.37  | Ok
 2       |  0.8745   |  0.8648   |  2.5902     |   2.2858    | 1.93  | Ok
 3       |  0.8745   |  0.8649   |  2.5900     |   2.2851    | 1.93  | Ok
 4       |  0.9401   |  0.9224   |  1.7892     |   1.7312    | 1.28  | Ok
 5       |  0.9410   |  0.9311   |  1.7759     |   1.6313    | 1.26  | Ok
 6       |  0.9461   |  0.9523   |  1.6971     |   1.3575    | 1.09  | Ok
 7       |  0.9488   |  0.9465   |  1.6549     |   1.4385    | 1.16  | Ok
 8       |  0.9488   |  0.9473   |  1.6544     |   1.4267    | 1.14  | Ok
 9       |  0.9515   |  0.9355   |  1.6102     |   1.5793    | 1.31  | Ok
10       |  0.9515   |  0.9359   |  1.6100     |   1.5738    | 1.31  | Ok
