In [1]:
import weather as we
import json

import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import RobustScaler

In [2]:
with open('data/data.json', 'r') as json_file:
    data = json.load(json_file)

In [3]:
windows = we.create_data_windows(data, hours = 24)

Number of windows created: 16718


In [4]:
feats = [
    'dt','temp_min','temp','temp_max','dew_point','pressure',
    'humidity','wind_speed','wind_deg','wind_gust','clouds_all',
    'rain_1h','rain_3h','snow_1h','snow_3h'
]
data_array = np.array([
    [
        entry[feats[0]],  # Timestamp
        entry[feats[1]],  # Min Temperature
        entry[feats[2]],  # Temperature
        entry[feats[3]],  # Max Temperature
        entry[feats[4]],  # Dew point
        entry[feats[5]],  # Pressure
        entry[feats[6]],  # Humidity
        entry[feats[7]],  # Wind speed
        entry[feats[8]],  # Wind degree
        entry[feats[9]],  # Wind gust
        entry[feats[10]],  # Clouds
        entry[feats[11]],  # rain 1h
        entry[feats[12]],  # rain 3h
        entry[feats[13]],  # snow 1h
        entry[feats[14]],  # snow 3h
        
    ] for entry in windows.values()
], dtype=object)

In [5]:
data_array.shape

(16718, 15, 24)

In [6]:
num_samples = data_array.shape[0]  
num_timesteps = data_array.shape[2]
num_features = data_array.shape[1] - 4

X = data_array[:, :num_features, :]  
Y = data_array[:, num_features, :]     
Y = np.nan_to_num(Y, nan = 0.0)

X_reshaped = X.reshape(num_samples, num_timesteps, num_features)

# Print the new shape to verify
print("New shape of X:", X_reshaped.shape)  # Should be (557, 720, 8)
print("Shape of Y:", Y.shape)  # Should be (557, 720)

New shape of X: (16718, 24, 11)
Shape of Y: (16718, 24)


In [7]:
X_tr, X_te, Y_tr, Y_te = we.split_data(X_reshaped, Y, frac = 0.2)

In [8]:
# from sklearn.preprocessing import MinMaxScaler

# X_train shape: (n_samples, sequence_length, n_features)
n_samples_r, seq_length_r, n_features_r = X_tr.shape
n_samples_e, seq_length_e, n_features_e = X_te.shape

# # Reshape X_train to 2D: (n_samples * sequence_length, n_features)
X_train_reshaped = X_tr.reshape(-1, n_features_r)
X_test_reshaped = X_te.reshape(-1, n_features_e)


scaler = RobustScaler()  
X_train_scaled = scaler.fit_transform(X_train_reshaped).reshape(n_samples_r, seq_length_r, n_features_r)
X_test_scaled = scaler.transform(X_test_reshaped).reshape(n_samples_e, seq_length_e, n_features_e)

Y_train_scaled = scaler.fit_transform(Y_tr)
Y_test_scaled = scaler.transform(Y_te)

In [9]:
X_train_scaled[np.isnan(X_train_scaled)] = 0.0
X_test_scaled[np.isnan(X_test_scaled)] = 0.0

Y_train_scaled[np.isnan(Y_train_scaled)] = 0.0
Y_test_scaled[np.isnan(Y_test_scaled)] = 0.0


n_samples, n_time_steps, n_features = X_train_scaled.shape
# X_train_scaled_reshaped = X_train_scaled.reshape(-1, n_features)
# X_test_scaled_reshaped = X_test_scaled.reshape(-1, n_features)

In [10]:
n_samples, time_steps, n_features = X_train_scaled.shape
X_train_scaled_reshaped = X_train_scaled.reshape(n_samples, time_steps * n_features)


n_samples, time_steps, n_features = X_test_scaled.shape
X_test_scaled_reshaped = X_test_scaled.reshape(n_samples, time_steps * n_features)


In [11]:
model = RandomForestRegressor(n_estimators=100, random_state=42)

In [12]:
model.fit(X_train_scaled_reshaped, Y_train_scaled)

RandomForestRegressor(random_state=42)

In [13]:
Y_pred = model.predict(X_test_scaled_reshaped)

In [14]:
mse = mean_squared_error(Y_test_scaled, Y_pred)
mae = mean_absolute_error(Y_test_scaled, Y_pred)
r2 = r2_score(Y_test, Y_pred)

NameError: name 'Y_test' is not defined

In [None]:
print(mse)
print(mae)
print(r2)

In [None]:
plt.scatter(Y_test_scaled, Y_pred)