In [2]:
import pandas as pd
import numpy as np
from datetime import datetime
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import *

In [5]:
df = pd.read_csv(r"../Datasets/NYC_2000s.csv")

In [6]:
df['DATE'] = pd.to_datetime(df['DATE'], errors='coerce')
df['date_ordinal'] = df['DATE'].apply(lambda x: x.toordinal() if pd.notnull(x) else None)

In [7]:
df['LATITUDE'] = pd.to_numeric(df['LATITUDE'], errors='coerce')
df['LONGITUDE'] = pd.to_numeric(df['LONGITUDE'], errors='coerce')

In [8]:
df = df.dropna(subset=['date_ordinal', 'LATITUDE', 'LONGITUDE', 'SNOW', 'TAVG', 'PRCP'])

In [9]:
df['SNOW'] = pd.to_numeric(df['SNOW'], errors='coerce')
df['TAVG'] = pd.to_numeric(df['TAVG'], errors='coerce')
df['PRCP'] = pd.to_numeric(df['PRCP'], errors='coerce')

In [10]:
model_data_snow = df[['date_ordinal', 'LATITUDE', 'LONGITUDE', 'SNOW']].dropna()
model_data_temp = df[['date_ordinal', 'LATITUDE', 'LONGITUDE', 'TAVG']].dropna()
model_data_prcp = df[['date_ordinal', 'LATITUDE', 'LONGITUDE', 'PRCP']].dropna()

In [12]:
req_df = df[['date_ordinal', 'LATITUDE', 'LONGITUDE']]

In [13]:
req_df.head()

Unnamed: 0,date_ordinal,LATITUDE,LONGITUDE
0,730120,40.77898,-73.96925
1,730121,40.77898,-73.96925
2,730122,40.77898,-73.96925
3,730123,40.77898,-73.96925
4,730124,40.77898,-73.96925


In [14]:
req_df.skew()

date_ordinal    0.0018
LATITUDE        0.0000
LONGITUDE       0.0000
dtype: float64

In [None]:
def build_and_evaluate_nn(data, target_col, epochs=100, batch_size=32):
    X = data[['date_ordinal', 'LATITUDE', 'LONGITUDE']].values
    y = data[target_col].values
        
    X = X.reshape((X.shape[0], 1, X.shape[1]))
    
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )
        
    model = Sequential()
    model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(Dropout(0.2))
    model.add(LSTM(units=50, return_sequences=True))
    model.add(Dropout(0.2))
    model.add(LSTM(units=50, return_sequences=True))
    model.add(Dropout(0.2))
    model.add(LSTM(units=50))
    model.add(Dropout(0.2))
    model.add(Dense(1))  
    
    model.compile(optimizer='adam', loss='mean_squared_error')
    
    print(f"Training model to predict {target_col}...")
    model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, verbose=0)
        
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    print(f"{target_col} model Mean Squared Error: {mse:.3f}")

    r2 = r2_score(y_test, y_pred)
    print(f"{target_col} model R^2 Score: {r2:.3f}")
    
    model_filename = f"../models/nn_model_{target_col}.h5"
    model.save(model_filename)
    print(f"Model saved as {model_filename}\n")
    
    return model

In [38]:
model_snow = build_and_evaluate_nn(df, 'SNOW')
model_temp = build_and_evaluate_nn(df, 'TAVG')
model_prcp = build_and_evaluate_nn(df, 'PRCP')

  super().__init__(**kwargs)


Training model to predict SNOW...
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step




SNOW model Mean Squared Error: 310.141
SNOW model R^2 Score: -0.000
Model saved as nn_model_SNOW.h5

Training model to predict TAVG...


  super().__init__(**kwargs)


[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 28ms/step




TAVG model Mean Squared Error: 99.956
TAVG model R^2 Score: -0.000
Model saved as nn_model_TAVG.h5

Training model to predict PRCP...


  super().__init__(**kwargs)


[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 29ms/step




PRCP model Mean Squared Error: 81.526
PRCP model R^2 Score: -0.000
Model saved as nn_model_PRCP.h5

