## Importing Libraries

In [10]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

## DataSet Loading and Structure Check

In [2]:
Odey = pd.read_csv("weather_data.csv")

# Check structure
print(Odey.head())
print(Odey.info())

       Location            Date_Time  Temperature_C  Humidity_pct  \
0     San Diego  2024-01-14 21:12:46      10.683001     41.195754   
1     San Diego  2024-05-17 15:22:10       8.734140     58.319107   
2     San Diego  2024-05-11 09:30:59      11.632436     38.820175   
3  Philadelphia  2024-02-26 17:32:39      -8.628976     54.074474   
4   San Antonio  2024-04-29 13:23:51      39.808213     72.899908   

   Precipitation_mm  Wind_Speed_kmh  
0          4.020119        8.233540  
1          9.111623       27.715161  
2          4.607511       28.732951  
3          3.183720       26.367303  
4          9.598282       29.898622  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000000 entries, 0 to 999999
Data columns (total 6 columns):
 #   Column            Non-Null Count    Dtype  
---  ------            --------------    -----  
 0   Location          1000000 non-null  object 
 1   Date_Time         1000000 non-null  object 
 2   Temperature_C     1000000 non-null  float64
 

## Data Manipulation and Filtering

In [3]:
Odey['Date_Time'] = pd.to_datetime(Odey['Date_Time'])
Odey['hour'] = Odey['Date_Time'].dt.hour
Odey['month'] = Odey['Date_Time'].dt.month
Odey['weekday'] = Odey['Date_Time'].dt.weekday 

In [5]:
defaults = {
    "humidity": round(Odey["Humidity_pct"].mean(), 2),
    "precipitation": round(Odey["Precipitation_mm"].mean(), 2),
    "wind_speed": round(Odey["Wind_Speed_kmh"].mean(), 2),
    "hour": Odey["hour"].mode()[0],
    "month": Odey["month"].mode()[0],
    "weekday": Odey["weekday"].mode()[0]
}

print("Suggested defaults:", defaults)

Suggested defaults: {'humidity': 60.02, 'precipitation': 5.11, 'wind_speed': 15.0, 'hour': 1, 'month': 1, 'weekday': 1}


## Data Training, Modeling & Evaluation

In [7]:
# Features and target
features = ['Humidity_pct', 'Precipitation_mm', 'Wind_Speed_kmh', 'hour', 'month', 'weekday']
target = 'Temperature_C'

X = Odey[features]
y = Odey[target]

In [8]:
# Split and scale
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [9]:
# Train model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train_scaled, y_train)

RandomForestRegressor(random_state=42)

In [11]:
# Evaluation
y_pred = model.predict(X_test_scaled)
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

print(f"MAE: {mae:.2f} °C")
print(f"RMSE: {rmse:.2f} °C")
print(f"R² Score: {r2:.2f}")

MAE: 12.67 °C
RMSE: 14.73 °C
R² Score: -0.04
