In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.optimizers import Adam
import tensorflow as tf

In [4]:
# Define some constants
latitude = [40.79736, 41.78701, 30.1444, 25.7738]
longitude = [-73.97785, -87.77166, -97.66876, -80.1936]
cities = ["ny", "il", "tx", "fl"]
start_date = "2016-01-01"
end_date = "2024-03-17"
time_steps = 10

In [10]:
def trainModel(city, name_prefix=""):
  # Step 1: Load your data
  # Assume 'df' is your DataFrame with columns 'date', 'day_of_year', 'tmax', 'tmin', 'precipitation', 'humidity'
  df = pd.read_pickle("./Data/data_cleaned_"+ city +".pkl")
  df['date'] = pd.to_datetime(df['date'])
  df.set_index('date', inplace=True)
  # df.loc[datetime.datetime(2016, 1, 1), datetime.datetime(2024, 2, 29)]

  df['tmax_avg'].plot(figsize=(10,6))
  # plt.show()
  
  df = df.rename(columns={"day": "day_of_year", "tmax_avg":"tmax", "tmin_avg": "tmin", 'prec_om': "prec", 'humi_vc': "humi"})
#   df.info()
  features = ['day_of_year', 'tmax', 'tmin', 'prec', 'humi']
  df = df[features]

  df.fillna(df.mean(), inplace=True)

  target = 'tmax'
  # Normalize the features
  scaler = StandardScaler()
  df_scaled = scaler.fit_transform(df[features])

  # Function to create a dataset for LSTM
  def create_dataset(X, y, time_steps=1):
      Xs, ys = [], []
      for i in range(len(X) - time_steps):
          v = X[i:(i + time_steps)]
          Xs.append(v)
          ys.append(y[i + time_steps])
      return np.array(Xs), np.array(ys)

  # Use this many days of data to predict the next day's 'tmax'
  X, y = create_dataset(df_scaled, df_scaled[:, 1], time_steps)
  split = int(len(X) * 0.80) 
  
  # Split the data
  X_train, X_test = X[:split], X[split:]
  y_train, y_test = y[:split], y[split:]

  # LSTM model
  model = Sequential()
  model.add(LSTM(70, activation='relu', input_shape=(time_steps, X.shape[2])))
  model.add(Dense(1))
  model.compile(optimizer=Adam(0.001), loss='mean_squared_error')
  model.fit(X_train, y_train, epochs=80, batch_size=32, validation_split=0.2, verbose=1)

  # Evaluate the model
  mse = model.evaluate(X_test, y_test, verbose=0)
  print(f'Test MSE: {mse}')
  model.save("./Data/" + name_prefix + "model_" + city + '.keras')
  # Predictions
  predictions = model.predict(X_test)

In [13]:
def getPrediction(city, name_prefix="", offset=0):
    model = tf.keras.models.load_model("./Data/" + name_prefix + 'model_'+city+'.keras')
    df = pd.read_pickle("./Data/prediction_data_cleaned_"+ city +".pkl")
    df['date'] = pd.to_datetime(df['date'])
    df.set_index('date', inplace=True)
    df = df.rename(columns={"day": "day_of_year", "tmax_avg":"tmax", "tmin_avg": "tmin", 'prec_om': "prec", 'humi_vc': "humi"})
    # df.info()
    features = ['day_of_year', 'tmax', 'tmin', 'prec', 'humi']
    df = df[features]
    target = 'tmax'

    # Normalize the features
    scaler = StandardScaler()
    df_scaled = scaler.fit_transform(df[features])

    # Use this many days of data to predict the next day's 'tmax'
    # X, y = create_dataset(df_scaled, df_scaled[:, 1], time_steps)
    # split = int(len(X) * 0.75)  # 70% for training

    # # Split the data
    # X_train, X_test = X[:split], X[split:]
    # y_train, y_test = y[:split], y[split:]
    old_data = df[-(time_steps):]
    if offset != 0:
        old_data = df[-(time_steps+offset):-offset]
    old_data.fillna(old_data.mean(), inplace=True)
    
    last_days_data = np.array(old_data)
    # print(last_days_data)
    last_days_scaled = scaler.transform(last_days_data)
    last_days_scaled = np.expand_dims(last_days_scaled, axis=0)
    predicted_tmax_scaled = model.predict(last_days_scaled)
    print(model.summary())
    dummy_array = np.zeros((1, len(features))) 
    dummy_array[:, 1] = predicted_tmax_scaled
    inverse_transformed_array = scaler.inverse_transform(dummy_array)
    predicted_tmax = inverse_transformed_array[:, 1]

    print(f"Predicted 'tmax' for {city} for next day: {predicted_tmax[0]}")

In [None]:
for city in cities:
  trainModel(city)

In [None]:
for city in cities:
  getPrediction(city)