In [None]:
from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('Not using a high-RAM runtime')
else:
  print('You are using a high-RAM runtime!')

In [1]:
import os, shutil
import bs4 as bs
import requests
from datetime import datetime, timezone
import pandas as pd
import glob

root_dir = "/Users/trevorwiebe/Ktor/radar_backend/radar_data/"

In [None]:
# Downloading csv files

def download_csv_files(folder_url, destination_folder, start_time, end_time):

  try:

    # Create the destination folder if it doesn't exist
    if not os.path.exists(destination_folder):
      os.makedirs(destination_folder)

    # Delete old files
    for filename in os.listdir(destination_folder):
      file_path = os.path.join(destination_folder, filename)
      try:
          if os.path.isfile(file_path) or os.path.islink(file_path):
              os.unlink(file_path)
          elif os.path.isdir(file_path):
              shutil.rmtree(file_path)
      except Exception as e:
          print('Failed to delete %s. Reason: %s' % (file_path, e))


    # Get the list of files in the folder
    response = requests.get(folder_url)
    data = bs.BeautifulSoup(response.text, "html.parser")

    csv_files = data.find_all("a", href=lambda href: href and href.endswith(".csv"))

    # Filter CSV files based on time range
    filtered_files = []
    for file_name in csv_files:
        csv_filename = file_name['href'].split('/')[-1]
        try:
            # Parse the filename to extract the creation or modification time
            file_time_str = csv_filename.split('_')[0]
            file_time = datetime.strptime(file_time_str, '%d-%m-%Y-%H%M')
            if start_time <= file_time <= end_time:
                filtered_files.append(file_name)
        except ValueError as e:
            # Handle parsing errors (e.g., invalid filename format)
            print(f"Error parsing filename: {csv_filename}, {e.args[0]}")

    for file_name in filtered_files:
      csv_url = file_name['href']  # Get the CSV file URL
      csv_filename = csv_url.split('/')[-1]  # Extract the filename
      link = folder_url + csv_filename
      destination_link = os.path.join(destination_folder, csv_filename)

      response = requests.get(link)
      with open(destination_link, 'wb') as f:
         f.write(response.content)

    print("Downloading finished, outcome unknown.")

  except requests.exceptions.RequestException as e:
    print(f"Error downloading files: {e}")

# Initiate download of files
folder_url = "http://69.48.179.226/csv_files/"
destination_folder = root_dir + "csv_files"

now = datetime.now(timezone.utc)
start_time = datetime(2024, 9, now.day, 14, 34)
end_time = datetime(2024, 9, now.day, 15, 34)

download_csv_files(folder_url, destination_folder, start_time, end_time)

In [None]:
# data = pd.read_csv(root_dir + "csv_files/15-09-2024-1434_cropped.csv")
# data_2 = data.drop(data[data["unknown"] < 15].index)
# data_2

In [56]:
# Combines multiple CSV files into one.

def combine_csv_files(input_folder, output_file):

    # Get a list of all CSV files in the input folder
    csv_files = glob.glob(input_folder + "/*.csv")

    # Initialize an empty list to store DataFrames
    dataframes = []

    # Iterate through each CSV file
    for file in csv_files:
        # Read the CSV file into a DataFrame
        df = pd.read_csv(file)

        df = df.rename(columns={'time': 'dateTime'})
        df = df.rename(columns={'unknown': 'reflectivity'})

        # Select the desired columns
        df = df[['dateTime', 'latitude', 'longitude', 'reflectivity']]

        # Append the DataFrame to the list
        dataframes.append(df)

    # Concatenate all DataFrames into one
    combined_df = pd.concat(dataframes, ignore_index=True)

    # Save the combined DataFrame to a CSV file
    combined_df.to_csv(output_file, index=False)

input_folder = root_dir + "csv_files"
output_file = root_dir + "data/combined_data.csv"

combine_csv_files(input_folder, output_file)

In [2]:
# Read combined_data.csv into pandas data frame

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn

pd.set_option('display.max_columns', None)
np.set_printoptions(precision=15)

data = pd.read_csv(root_dir + 'data/combined_data.csv')

In [3]:
# Setting device
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

In [4]:
# prepare data frame to be dateTime | latitude | longitude | reflectivity | reflectivity_1 to _15

from copy import deepcopy as dc

def prepare_dataframe_for_lstm(df, n_steps):
  df = dc(df)

  df['datetime'] = pd.to_datetime(df['dateTime'])

  # Encode 'datetime' as cyclical features (excluding day encoding)
  df['minute_sin'] = np.sin(2 * np.pi * df['datetime'].dt.minute / 60)
  df['minute_cos'] = np.cos(2 * np.pi * df['datetime'].dt.minute / 60)
  df['hour_sin'] = np.sin(2 * np.pi * df['datetime'].dt.hour / 24)
  df['hour_cos'] = np.cos(2 * np.pi * df['datetime'].dt.hour / 24)
  df['month_sin'] = np.sin(2 * np.pi * df['datetime'].dt.month / 12)
  df['month_cos'] = np.cos(2 * np.pi * df['datetime'].dt.month / 12)

  # Move new columns to the front
  new_columns = ['minute_sin', 'minute_cos', 'hour_sin', 'hour_cos', 'month_sin', 'month_cos']
  remaining_columns = [col for col in df.columns if col not in new_columns]
  df = df[new_columns + remaining_columns]

  df = df.drop(columns=['dateTime'])
  df = df.drop(columns=['datetime'])

  for i in range(1, n_steps+1):
    df[f'reflectivity_{i}'] = df['reflectivity'].shift(i)

  df.dropna(inplace=True)

  df = df[['reflectivity'] + [col for col in df.columns if col != 'reflectivity']]

  return df

lookback = 15
df = prepare_dataframe_for_lstm(data, lookback)


In [5]:
# remove rows where all -99

# List of the columns reflectivity_1 to reflectivity_15
reflectivity_columns = [f'reflectivity_{i}' for i in range(1, 16)]
reflectivity_columns.append('reflectivity')

# convert all the rows that are less than 0.0 to 0.0
df[reflectivity_columns] = df[reflectivity_columns].mask(df[reflectivity_columns] <= 0, 0)

# Remove rows where all values in the reflectivity columns are 0.0
no_zero_df = df[~(df[reflectivity_columns] == 0.0).all(axis=1)]

In [6]:
X_df = no_zero_df.iloc[:, 9:]
y_df = no_zero_df.iloc[:, 0:1]

In [7]:
# Reshape data and convert negative numbers to 0

X = X_df.to_numpy().reshape(X_df.shape[0], X_df.shape[1], 1)
y = y_df.to_numpy().flatten()

In [8]:
# Create train, val and test splits

train_split = int(X.shape[0] * .8)
val_split = int(X.shape[0] * .9)

X_train, y_train = X[:train_split], y[:train_split]
X_val, y_val = X[train_split:val_split], y[train_split:val_split]
X_test, y_test = X[val_split:], y[val_split:]

X_train.shape, y_train.shape, X_val.shape, y_val.shape, X_test.shape

((11815868, 15, 1),
 (11815868,),
 (1476984, 15, 1),
 (1476984,),
 (1476984, 15, 1))

In [9]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import *
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.metrics import RootMeanSquaredError
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import load_model

# Use this if starting from scratch
model1 = Sequential()
model1.add(InputLayer((lookback, 1)))
# LSTM layer with 64 units and dropout for regularization
model1.add(LSTM(64, return_sequences=False))  # return_sequences=False because we predict one value
model1.add(Dropout(0.2))  # Helps prevent overfitting

# Dense layer for additional feature extraction
model1.add(Dense(32, activation='relu'))  # Increased neurons for more complexity
model1.add(Dropout(0.2))  # More dropout

# Final output layer (predicting a single value)
model1.add(Dense(1, activation='linear'))

model1.summary()

In [10]:
# model1 = load_model(root_dir + 'model1/model1.keras')
cp = ModelCheckpoint(root_dir + 'model/model3.keras', save_best_only=True)
model1.compile(loss=MeanSquaredError(), optimizer=Adam(learning_rate=0.0001), metrics=[RootMeanSquaredError()])

In [11]:
model1.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=2, callbacks=[cp])

Epoch 1/2
[1m369246/369246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1440s[0m 4ms/step - loss: 6.9873 - root_mean_squared_error: 2.6369 - val_loss: 6.0098 - val_root_mean_squared_error: 2.4515
Epoch 2/2
[1m369246/369246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1506s[0m 4ms/step - loss: 6.3040 - root_mean_squared_error: 2.5108 - val_loss: 5.9427 - val_root_mean_squared_error: 2.4378


<keras.src.callbacks.history.History at 0x5f4fc1d00>

In [12]:
pd.set_option('display.max_rows', 1000)
pd.set_option('display.max_columns', 1000)
pd.set_option('display.max_colwidth', None)

In [13]:
test_predictions = model1.predict(X_test).flatten()
X_test_strings = ['[' + ', '.join(str(x[0]) for x in reversed(sample)) + ']' for sample in X_test]
test_results = pd.DataFrame(data={'Historical':X_test_strings, 'Actuals':y_test, 'Val Predictions':test_predictions,})
# test_results[100:1000]
sorted_results = test_results.sort_values(by='Actuals', ascending=False)
sorted_results[:1000]

[1m46156/46156[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 1ms/step


Unnamed: 0,Historical,Actuals,Val Predictions
1241457,"[27.2, 29.3, 29.8, 31.3, 39.5, 41.5, 37.8, 37.7, 39.5, 40.3, 51.0, 55.0, 53.7, 54.2, 57.8]",61.8,52.911812
1244242,"[35.8, 37.5, 38.3, 41.5, 43.7, 46.7, 46.3, 46.7, 45.8, 44.5, 44.2, 46.2, 47.7, 51.3, 59.8]",61.3,52.963215
759347,"[26.2, 31.3, 40.0, 54.0, 49.7, 50.2, 46.5, 41.8, 39.2, 31.8, 32.3, 38.2, 51.8, 55.2, 58.3]",60.8,54.852921
760065,"[33.5, 37.3, 40.7, 45.7, 48.7, 45.3, 44.3, 44.2, 39.0, 36.5, 33.0, 45.7, 49.7, 57.5, 59.0]",59.8,54.767311
1244243,"[37.5, 38.3, 41.5, 43.7, 46.7, 46.3, 46.7, 45.8, 44.5, 44.2, 46.2, 47.7, 51.3, 59.8, 61.3]",59.8,54.126137
1244241,"[35.0, 35.8, 37.5, 38.3, 41.5, 43.7, 46.7, 46.3, 46.7, 45.8, 44.5, 44.2, 46.2, 47.7, 51.3]",59.8,49.583168
1243541,"[42.7, 41.3, 45.8, 44.5, 46.3, 47.7, 53.5, 50.8, 49.0, 48.7, 48.2, 48.0, 50.0, 53.2, 59.5]",59.7,51.369701
1242151,"[30.0, 28.7, 30.3, 30.7, 34.0, 37.8, 39.8, 39.2, 40.8, 45.8, 46.8, 48.2, 52.2, 53.3, 59.3]",59.7,53.324852
759348,"[31.3, 40.0, 54.0, 49.7, 50.2, 46.5, 41.8, 39.2, 31.8, 32.3, 38.2, 51.8, 55.2, 58.3, 60.8]",59.5,55.072567
1243540,"[37.2, 42.7, 41.3, 45.8, 44.5, 46.3, 47.7, 53.5, 50.8, 49.0, 48.7, 48.2, 48.0, 50.0, 53.2]",59.5,49.366722
