In [1]:
from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('Not using a high-RAM runtime')
else:
  print('You are using a high-RAM runtime!')

Your runtime has 17.2 gigabytes of available RAM

Not using a high-RAM runtime


In [2]:
# Read combined_data.csv into pandas data frame

import pandas as pd
import numpy as np
from datetime import datetime
import pandas as pd
import torch
import torch.nn as nn

pd.set_option('display.max_columns', None)
np.set_printoptions(precision=15)

root_dir = "/Users/trevorwiebe/Ktor/radar_backend/radar_data/"

data = pd.read_csv(root_dir + 'data/combined_data.csv')

In [3]:
# Setting device
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

In [4]:
# prepare data frame to be dateTime | latitude | longitude | reflectivity | reflectivity_1 to _15

from copy import deepcopy as dc

def prepare_dataframe_for_lstm(df, n_steps):
  df = dc(df)

  df['datetime'] = pd.to_datetime(df['dateTime'])

  # Encode 'datetime' as cyclical features (excluding day encoding)
  df['minute_sin'] = np.sin(2 * np.pi * df['datetime'].dt.minute / 60)
  df['minute_cos'] = np.cos(2 * np.pi * df['datetime'].dt.minute / 60)
  df['hour_sin'] = np.sin(2 * np.pi * df['datetime'].dt.hour / 24)
  df['hour_cos'] = np.cos(2 * np.pi * df['datetime'].dt.hour / 24)
  df['month_sin'] = np.sin(2 * np.pi * df['datetime'].dt.month / 12)
  df['month_cos'] = np.cos(2 * np.pi * df['datetime'].dt.month / 12)

  # Move new columns to the front
  new_columns = ['minute_sin', 'minute_cos', 'hour_sin', 'hour_cos', 'month_sin', 'month_cos']
  remaining_columns = [col for col in df.columns if col not in new_columns]
  df = df[new_columns + remaining_columns]

  df = df.drop(columns=['dateTime'])
  df = df.drop(columns=['datetime'])

  for i in range(1, n_steps+1):
    df[f'reflectivity_{i}'] = df['reflectivity'].shift(i)

  df.dropna(inplace=True)

  df = df[['reflectivity'] + [col for col in df.columns if col != 'reflectivity']]

  return df

lookback = 15
df = prepare_dataframe_for_lstm(data, lookback)


In [5]:
# remove rows where all -99

# List of the columns reflectivity_1 to reflectivity_15
reflectivity_columns = [f'reflectivity_{i}' for i in range(1, 16)]
reflectivity_columns.append('reflectivity')

# convert all the rows that are less than 0.0 to 0.0
df[reflectivity_columns] = df[reflectivity_columns].mask(df[reflectivity_columns] <= 0, 0)

# Remove rows where all values in the reflectivity columns are 0.0
no_zero_df = df[~(df[reflectivity_columns] == 0.0).all(axis=1)]

In [6]:
X_df = no_zero_df.iloc[:, 9:]
y_df = no_zero_df.iloc[:, 0:1]

In [7]:
# Reshape data and convert negative numbers to 0

X = X_df.to_numpy().reshape(X_df.shape[0], X_df.shape[1], 1)
y = y_df.to_numpy().flatten()

In [8]:
# Create train, val and test splits

train_split = int(X.shape[0] * .8)
val_split = int(X.shape[0] * .9)

X_train, y_train = X[:train_split], y[:train_split]
X_val, y_val = X[train_split:val_split], y[train_split:val_split]
X_test, y_test = X[val_split:], y[val_split:]

X_train.shape, y_train.shape, X_val.shape, y_val.shape, X_test.shape

((13236425, 15, 1),
 (13236425,),
 (1654553, 15, 1),
 (1654553,),
 (1654554, 15, 1))

In [106]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import *
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.metrics import RootMeanSquaredError
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import load_model

# # Use this if starting from scratch
# model1 = Sequential()
# model1.add(InputLayer((lookback, 1)))
# # LSTM layer with 64 units and dropout for regularization
# model1.add(LSTM(64, return_sequences=False))  # return_sequences=False because we predict one value
# model1.add(Dropout(0.2))  # Helps prevent overfitting

# # Dense layer for additional feature extraction
# model1.add(Dense(32, activation='relu'))  # Increased neurons for more complexity
# model1.add(Dropout(0.2))  # More dropout

# # Final output layer (predicting a single value)
# model1.add(Dense(1, activation='linear'))

# model1.summary()

In [107]:
model1 = load_model(root_dir + 'model/model3.keras')
cp = ModelCheckpoint(root_dir + 'model/model3.keras', save_best_only=True)
model1.compile(loss=MeanSquaredError(), optimizer=Adam(learning_rate=0.0001), metrics=[RootMeanSquaredError()])

In [108]:
model1.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=4, callbacks=[cp])

Epoch 1/4
[1m174936/174936[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m448s[0m 3ms/step - loss: 9.8755 - root_mean_squared_error: 3.1425 - val_loss: 8.6800 - val_root_mean_squared_error: 2.9462
Epoch 2/4
[1m174936/174936[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m445s[0m 3ms/step - loss: 9.7903 - root_mean_squared_error: 3.1289 - val_loss: 8.6470 - val_root_mean_squared_error: 2.9406
Epoch 3/4
[1m174936/174936[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m443s[0m 3ms/step - loss: 9.8082 - root_mean_squared_error: 3.1318 - val_loss: 8.6425 - val_root_mean_squared_error: 2.9398
Epoch 4/4
[1m174936/174936[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m438s[0m 3ms/step - loss: 9.7895 - root_mean_squared_error: 3.1288 - val_loss: 8.6915 - val_root_mean_squared_error: 2.9481


<keras.src.callbacks.history.History at 0x3213bf200>

In [109]:
pd.set_option('display.max_rows', 1000)
pd.set_option('display.max_columns', 1000)
pd.set_option('display.max_colwidth', None)

In [110]:
print("Finished training at " + datetime.now().strftime('%d/%m/%y %H:%M:%S.%f'))
test_predictions = model1.predict(X_test).flatten()
X_test_strings = ['[' + ', '.join(str(x[0]) for x in reversed(sample)) + ']' for sample in X_test]
test_results = pd.DataFrame(data={'Historical':X_test_strings, 'Actuals':y_test, 'Val Predictions':test_predictions,})
# test_results[100:1000]
sorted_results = test_results.sort_values(by='Actuals', ascending=False)
sorted_results[:1000]

Finished training at 17/09/24 09:50:11.346425
[1m21868/21868[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 931us/step


Unnamed: 0,Historical,Actuals,Val Predictions
282837,"[47.5, 46.5, 41.3, 38.7, 40.2, 43.7, 45.0, 45.5, 45.0, 47.7, 47.2, 42.7, 41.8, 48.2, 54.3]",62.8,51.148296
683661,"[50.7, 51.0, 49.5, 51.2, 47.7, 48.3, 49.7, 48.5, 50.7, 49.0, 44.8, 46.2, 48.0, 51.3, 60.5]",62.5,54.864182
684187,"[51.0, 50.3, 49.7, 48.7, 49.2, 46.7, 45.5, 49.2, 48.8, 45.3, 48.0, 52.5, 55.5, 58.0, 60.5]",62.5,53.741989
683662,"[51.0, 49.5, 51.2, 47.7, 48.3, 49.7, 48.5, 50.7, 49.0, 44.8, 46.2, 48.0, 51.3, 60.5, 62.5]",62.3,54.952744
282838,"[46.5, 41.3, 38.7, 40.2, 43.7, 45.0, 45.5, 45.0, 47.7, 47.2, 42.7, 41.8, 48.2, 54.3, 62.8]",62.0,55.091686
286415,"[50.5, 49.8, 45.5, 45.3, 46.0, 46.0, 44.8, 42.7, 52.3, 53.5, 54.3, 53.3, 53.7, 53.5, 61.0]",61.7,54.251736
282339,"[46.2, 44.0, 42.7, 38.3, 38.7, 42.2, 44.8, 44.8, 49.2, 50.8, 50.2, 46.0, 40.8, 41.5, 53.2]",61.7,50.45002
285893,"[44.0, 47.7, 41.7, 42.3, 43.0, 44.5, 44.0, 45.3, 51.3, 53.2, 54.8, 52.8, 51.8, 59.2, 60.0]",61.5,53.437412
286414,"[48.8, 50.5, 49.8, 45.5, 45.3, 46.0, 46.0, 44.8, 42.7, 52.3, 53.5, 54.3, 53.3, 53.7, 53.5]",61.0,49.687237
286416,"[49.8, 45.5, 45.3, 46.0, 46.0, 44.8, 42.7, 52.3, 53.5, 54.3, 53.3, 53.7, 53.5, 61.0, 61.7]",60.8,53.782555
