In [105]:
from keras.models import Sequential
from keras.layers import Dense, GRU
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error


In [106]:
# Load the data sets and combine them into a single usable dataframe called data series
data_series = pd.read_csv('./data/data_avg_temp.csv')

# rename columns to be more descriptive
data_series.columns = ['date', 'avg_temp', 'avg_temp_anomaly']
data_series.drop(columns=['avg_temp_anomaly'], inplace=True)

# append to the dataframe
data_cool_degree = pd.read_csv('./data/data_cool_degree_days.csv')
data_heat_degree = pd.read_csv('./data/data_heat_degree_days.csv')
data_max_temp = pd.read_csv('./data/data_max_temp.csv')
data_min_temp = pd.read_csv('./data/data_min_temp.csv')
data_palmer_z = pd.read_csv('./data/data_palmer_z.csv')
data_pdsi = pd.read_csv('./data/data_pdsi.csv')
data_phdi = pd.read_csv('./data/data_phdi.csv')
data_pmdi = pd.read_csv('./data/data_pmdi.csv')
data_precipitation = pd.read_csv('./data/data_precipitation.csv')



# Append columns to the data series
data_series['cool_degree_days'] = data_cool_degree['Value']
data_series['cool_degree_days_anomaly'] = data_cool_degree['Anomaly']

data_series['heat_degree_days'] = data_heat_degree['Value']
data_series['heat_degree_days_anomaly'] = data_heat_degree['Anomaly']

data_series['max_temp'] = data_max_temp['Value']
data_series['max_temp_anomaly'] = data_max_temp['Anomaly']

data_series['min_temp'] = data_min_temp['Value']
data_series['min_temp_anomaly'] = data_min_temp['Anomaly']

data_series['palmer_z'] = data_palmer_z['Value']
data_series['palmer_z_anomaly'] = data_palmer_z['Anomaly']

data_series['pdsi'] = data_pdsi['Value']
data_series['pdsi_anomaly'] = data_pdsi['Anomaly']

data_series['phdi'] = data_phdi['Value']
data_series['phdi_anomaly'] = data_phdi['Anomaly']

data_series['pmdi'] = data_pmdi['Value']
data_series['pmdi_anomaly'] = data_pmdi['Anomaly']

data_series['precipitation'] = data_precipitation['Value']
data_series['precipitation_anomaly'] = data_precipitation['Anomaly']




In [107]:
# Interpolate missing nan values based on neighboring values
data_series.interpolate(inplace=True)

In [108]:
# Convert dataframe to numpy array
avg_temp = data_series['avg_temp'].to_numpy()
data_series.drop(columns=['avg_temp'], inplace=True)
x = data_series.to_numpy()

# Expand first dim
x = np.expand_dims(x, axis=0)
avg_temp = np.expand_dims(avg_temp, axis=0)

# Split the data into training and testing sets
# print(x[0])
# print(avg_temp.shape)

In [109]:
# Train the GRU model
model = Sequential(
    [
        GRU(20, activation='relu', input_shape=(x.shape[1], x.shape[2])),
        Dense(1)
    ]
)

model.compile(optimizer='adam', loss='mse')
model.fit(x, avg_temp, epochs=100, batch_size=1)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/step - loss: 383637344.0000
Epoch 48/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step - loss: 372356896.0000
Epoch 49/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 78ms/step - loss: 361432384.0000
Epoch 50/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 80ms/step - loss: 350655616.0000
Epoch 51/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step - loss: 340219936.0000
Epoch 52/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step - loss: 330041888.0000
Epoch 53/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step - loss: 320116736.0000
Epoch 54/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step - loss: 310439680.0000
Epoch 55/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 80ms/step - loss: 301006656.0000
Epoch 56/100
[1m1/1[0m [32m━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x7de3df520320>

In [None]:
# Predict the temperature by regressing the other features and then predicting average temperature
parameters = ['cool_degree_days', 'cool_degree_days_anomaly', 'heat_degree_days', 'heat_degree_days_anomaly', 'max_temp', 
              'max_temp_anomaly', 'min_temp', 'min_temp_anomaly', 'palmer_z', 'palmer_z_anomaly', 'pdsi', 'pdsi_anomaly', 
              'phdi', 'phdi_anomaly', 'pmdi', 'pmdi_anomaly', 'precipitation', 'precipitation_anomaly']
parameters = ['cool_degree_days',]
models = {}
scalers = {}

date = data_series['date'].to_numpy()
date_splice = np.zeros((len(date), 2))

param_models = []

for i, element in enumerate(date):
    # print(element)
    # Splice this string to get year and month
    date_splice[i,0] = int(str(element)[0:4])
    date_splice[i,1] = int(str(element)[5:7])

for param in parameters:
    y = data_series[param].to_numpy()
    X_train, X_test, y_train, y_test = train_test_split(date_splice, y, test_size=0.2, shuffle=False)

    print(X_train.shape)    
    model = Sequential([
        Dense(5, activation='relu', input_shape=(X_train.shape[1],)),  # input_shape=(2,)
        Dense(1)  # Output layer
    ])

    model.compile(optimizer='adam', loss='mse')
    model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test))

    # Save model
    param_models.append(model)

    # Evaluate
    # y_pred = model.predict(X_test)
    # rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    # print(y_pred)
    # print(rmse)







# for param in parameters:
#     # Define features and target for each parameter
#     # Adjust features as needed
#     param_features = ['GDP', 'Population', 'Year', f'Previous_{param}']
#     df[f'Previous_{param}'] = df[param].shift(1)
#     df_param = df.dropna()
    
#     X = df_param[param_features]
#     y = df_param[param]
    
#     # Split data
#     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)
    
#     # Scale features
#     scaler = StandardScaler()
#     X_train_scaled = scaler.fit_transform(X_train)
#     X_test_scaled = scaler.transform(X_test)
    
#     # Train model
#     regressor = RandomForestRegressor(n_estimators=100, random_state=42)
#     regressor.fit(X_train_scaled, y_train)
    
#     # Evaluate
#     y_pred = regressor.predict(X_test_scaled)
#     rmse = np.sqrt(mean_squared_error(y_test, y_pred))
#     print(f'RMSE for {param} prediction: {rmse}')
    
#     # Save models and scalers
#     models[param] = regressor
#     scalers[param] = scaler

# # Optionally, save all models and scalers
# joblib.dump(models, 'feature_regressors.pkl')
# joblib.dump(scalers, 'feature_scalers.pkl')


(1200, 2)
Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 500789.7188 - val_loss: 158867.9844
Epoch 2/100
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 115014.7500 - val_loss: 15973.2109
Epoch 3/100
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 22896.4727 - val_loss: 678.4603
Epoch 4/100
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 15905.3389 - val_loss: 40.4836
Epoch 5/100
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 15803.4199 - val_loss: 35.1669
Epoch 6/100
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 15890.9111 - val_loss: 29.1906
Epoch 7/100
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 14379.9023 - val_loss: 33.9428
Epoch 8/100
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 14638.4619 - val_loss: 47.0315
Epoch 9/10