In [79]:
from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('Not using a high-RAM runtime')
else:
  print('You are using a high-RAM runtime!')

Your runtime has 17.2 gigabytes of available RAM

Not using a high-RAM runtime


In [80]:
root_dir = "/Users/trevorwiebe/Ktor/radar_backend/radar_data/"

In [81]:
import os, shutil
import bs4 as bs
import requests
import datetime
import pandas as pd
import glob

def download_csv_files(folder_url, destination_folder, start_time, end_time):

  try:

    # Create the destination folder if it doesn't exist
    if not os.path.exists(destination_folder):
      os.makedirs(destination_folder)

    # Delete old files
    for filename in os.listdir(destination_folder):
      file_path = os.path.join(destination_folder, filename)
      try:
          if os.path.isfile(file_path) or os.path.islink(file_path):
              os.unlink(file_path)
          elif os.path.isdir(file_path):
              shutil.rmtree(file_path)
      except Exception as e:
          print('Failed to delete %s. Reason: %s' % (file_path, e))


    # Get the list of files in the folder
    response = requests.get(folder_url)
    data = bs.BeautifulSoup(response.text, "html.parser")

    csv_files = data.find_all("a", href=lambda href: href and href.endswith(".csv"))

    # Filter CSV files based on time range
    filtered_files = []
    for file_name in csv_files:
        csv_filename = file_name['href'].split('/')[-1]
        try:
            # Parse the filename to extract the creation or modification time
            file_time_str = csv_filename.split('_')[0]
            file_time = datetime.datetime.strptime(file_time_str, '%d-%m-%Y-%H%M')
            if start_time <= file_time <= end_time:
                filtered_files.append(file_name)
        except ValueError as e:
            # Handle parsing errors (e.g., invalid filename format)
            print(f"Error parsing filename: {csv_filename}, {e.args[0]}")

    for file_name in filtered_files:
      csv_url = file_name['href']  # Get the CSV file URL
      csv_filename = csv_url.split('/')[-1]  # Extract the filename
      link = folder_url + csv_filename
      destination_link = os.path.join(destination_folder, csv_filename)

      response = requests.get(link)
      with open(destination_link, 'wb') as f:
         f.write(response.content)

    print("Downloading finished, outcome unknown.")

  except requests.exceptions.RequestException as e:
    print(f"Error downloading files: {e}")



# Initiate download of files
folder_url = "http://69.48.179.226/csv_files/"
destination_folder = root_dir + "csv_files"
start_time = datetime.datetime(2024, 9, 14, 23, 44)
end_time = datetime.datetime(2024, 9, 15, 0, 14)

download_csv_files(folder_url, destination_folder, start_time, end_time)

Error downloading files: ('Connection broken: IncompleteRead(187870466 bytes read, 52316354 more expected)', IncompleteRead(187870466 bytes read, 52316354 more expected))


In [82]:
# Combines multiple CSV files into one.
def combine_csv_files(input_folder, output_file):

    # Get a list of all CSV files in the input folder
    csv_files = glob.glob(input_folder + "/*.csv")

    # Initialize an empty list to store DataFrames
    dataframes = []

    # Iterate through each CSV file
    for file in csv_files:
        # Read the CSV file into a DataFrame
        df = pd.read_csv(file)

        df = df.rename(columns={'time': 'dateTime'})
        df = df.rename(columns={'unknown': 'reflectivity'})

        # Select the desired columns
        df = df[['dateTime', 'latitude', 'longitude', 'reflectivity']]

        # Append the DataFrame to the list
        dataframes.append(df)

    # Concatenate all DataFrames into one
    combined_df = pd.concat(dataframes, ignore_index=True)

    # Save the combined DataFrame to a CSV file
    combined_df.to_csv(output_file, index=False)

input_folder = root_dir + "csv_files"
output_file = root_dir + "data/combined_data.csv"

combine_csv_files(input_folder, output_file)

KeyboardInterrupt: 

In [64]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn

pd.set_option('display.max_columns', None)
np.set_printoptions(precision=15)

data = pd.read_csv(root_dir + 'data/combined_data.csv')

In [65]:
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

In [66]:
from copy import deepcopy as dc

# prepare data frame to be dateTime | latitude | longitude | reflectivity | reflectivity_1 to _15
def prepare_dataframe_for_lstm(df, n_steps):
  df = dc(df)

  df['datetime'] = pd.to_datetime(df['dateTime'])

  # Encode 'datetime' as cyclical features (excluding day encoding)
  df['minute_sin'] = np.sin(2 * np.pi * df['datetime'].dt.minute / 60)
  df['minute_cos'] = np.cos(2 * np.pi * df['datetime'].dt.minute / 60)
  df['hour_sin'] = np.sin(2 * np.pi * df['datetime'].dt.hour / 24)
  df['hour_cos'] = np.cos(2 * np.pi * df['datetime'].dt.hour / 24)
  df['month_sin'] = np.sin(2 * np.pi * df['datetime'].dt.month / 12)
  df['month_cos'] = np.cos(2 * np.pi * df['datetime'].dt.month / 12)

  # Move new columns to the front
  new_columns = ['minute_sin', 'minute_cos', 'hour_sin', 'hour_cos', 'month_sin', 'month_cos']
  remaining_columns = [col for col in df.columns if col not in new_columns]
  df = df[new_columns + remaining_columns]

  df = df.drop(columns=['dateTime'])
  df = df.drop(columns=['datetime'])

  for i in range(1, n_steps+1):
    df[f'reflectivity_{i}'] = df['reflectivity'].shift(i)

  df.dropna(inplace=True)

  df = df[['reflectivity'] + [col for col in df.columns if col != 'reflectivity']]

  return df

lookback = 15
df = prepare_dataframe_for_lstm(data, lookback)

In [67]:
# remove rows where all -99
# List of the columns reflectivity_1 to reflectivity_15
reflectivity_columns = [f'reflectivity_{i}' for i in range(1, 16)]

# Filter rows where all values in the reflectivity columns are -99.0
df = df[~(df[reflectivity_columns] == -99.0).all(axis=1)]

In [68]:
X_df = df.iloc[:, 7:]
y_df = df.iloc[:, 0]

# print(X_df)

lat_lon = X_df.iloc[:, [0,1]].to_numpy()
time_series_data = X_df.iloc[:, 2:].to_numpy()
time_series_data = time_series_data.reshape(X_df.shape[0], 15, 1)
time_series_data[time_series_data < 15] = 15
lat_lon_repeated = np.repeat(lat_lon[:, np.newaxis, :], 15, axis=1)

X = np.concatenate([lat_lon_repeated, time_series_data], axis=2)
y = y_df.to_numpy().flatten()


In [69]:
# Create train, val and test splits

train_split = int(X.shape[0] * .8)
val_split = int(X.shape[0] * .9)

X_train, y_train = X[:train_split], y[:train_split]
X_val, y_val = X[train_split:val_split], y[train_split:val_split]
X_test, y_test = X[val_split:], y[val_split:]

X_train.shape, y_train.shape, X_val.shape, y_val.shape, X_test.shape, y_test.shape

((2241151, 15, 3),
 (2241151,),
 (280144, 15, 3),
 (280144,),
 (280144, 15, 3),
 (280144,))

In [70]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import *
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.metrics import RootMeanSquaredError
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import load_model

# Use this if starting from scratch
model1 = Sequential()
model1.add(InputLayer((lookback, 3)))
model1.add(LSTM(64))
model1.add(Dense(8, 'relu'))
model1.add(Dense(1, 'linear'))

model1.summary()

In [71]:
# model1 = load_model(root_dir + 'model1/model1.keras')
cp = ModelCheckpoint(root_dir + 'model1/model1.keras', save_best_only=True)
model1.compile(loss=MeanSquaredError(), optimizer=Adam(learning_rate=0.0001), metrics=[RootMeanSquaredError()])

In [72]:
model1.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=2, callbacks=[cp])

Epoch 1/2
[1m70036/70036[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m205s[0m 3ms/step - loss: 2265.4724 - root_mean_squared_error: 47.5000 - val_loss: 1857.8910 - val_root_mean_squared_error: 43.1033
Epoch 2/2
[1m70036/70036[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m195s[0m 3ms/step - loss: 1861.3088 - root_mean_squared_error: 43.1429 - val_loss: 1858.5856 - val_root_mean_squared_error: 43.1113


<keras.src.callbacks.history.History at 0x3a77ed2b0>

In [73]:
pd.set_option('display.max_rows', 1000)
pd.set_option('display.max_columns', 1000)
pd.set_option('display.max_colwidth', None)

In [74]:
test_predictions = model1.predict(X_test).flatten()
third_column = np.flip(X_test[:, :, X_test.shape[2]-1], axis=1)
X_test_strings = [' '.join(map(str, row)) for row in third_column]
test_results = pd.DataFrame(data={'Historical':X_test_strings, 'Actuals':y_test, 'Val Predictions':test_predictions,})
test_results.head(100)

[1m8755/8755[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 905us/step


Unnamed: 0,Historical,Actuals,Val Predictions
0,34.8 33.3 35.5 37.0 37.2 38.0 37.2 37.7 35.8 37.3 38.0 37.8 38.5 37.7 40.5,42.5,38.930866
1,33.3 35.5 37.0 37.2 38.0 37.2 37.7 35.8 37.3 38.0 37.8 38.5 37.7 40.5 42.5,40.7,41.13953
2,35.5 37.0 37.2 38.0 37.2 37.7 35.8 37.3 38.0 37.8 38.5 37.7 40.5 42.5 40.7,40.0,39.959843
3,37.0 37.2 38.0 37.2 37.7 35.8 37.3 38.0 37.8 38.5 37.7 40.5 42.5 40.7 40.0,40.3,39.156765
4,37.2 38.0 37.2 37.7 35.8 37.3 38.0 37.8 38.5 37.7 40.5 42.5 40.7 40.0 40.3,40.2,39.279415
5,38.0 37.2 37.7 35.8 37.3 38.0 37.8 38.5 37.7 40.5 42.5 40.7 40.0 40.3 40.2,40.2,39.196789
6,37.2 37.7 35.8 37.3 38.0 37.8 38.5 37.7 40.5 42.5 40.7 40.0 40.3 40.2 40.2,38.3,39.228733
7,37.7 35.8 37.3 38.0 37.8 38.5 37.7 40.5 42.5 40.7 40.0 40.3 40.2 40.2 38.3,39.7,37.494034
8,35.8 37.3 38.0 37.8 38.5 37.7 40.5 42.5 40.7 40.0 40.3 40.2 40.2 38.3 39.7,39.2,38.546154
9,37.3 38.0 37.8 38.5 37.7 40.5 42.5 40.7 40.0 40.3 40.2 40.2 38.3 39.7 39.2,33.0,38.168186
