In [13]:
from datetime import datetime, timezone
import bs4 as bs
import os, shutil
import requests
import glob
import pandas as pd
import numpy as np

pd.set_option('display.max_rows', 1000)
pd.set_option('display.max_columns', 1000)
pd.set_option('display.max_colwidth', None)

root_dir = "/Users/trevorwiebe/Ktor/radar_backend/radar_data/"

In [14]:
# Downloading csv file

def download_csv_files(folder_url, destination_folder, start_time, end_time):

  try:

    # Create the destination folder if it doesn't exist
    if not os.path.exists(destination_folder):
      os.makedirs(destination_folder)

    # Delete old files
    for filename in os.listdir(destination_folder):
      file_path = os.path.join(destination_folder, filename)
      try:
          if os.path.isfile(file_path) or os.path.islink(file_path):
              os.unlink(file_path)
          elif os.path.isdir(file_path):
              shutil.rmtree(file_path)
      except Exception as e:
          print('Failed to delete %s. Reason: %s' % (file_path, e))


    # Get the list of files in the folder
    response = requests.get(folder_url)
    data = bs.BeautifulSoup(response.text, "html.parser")

    csv_files = data.find_all("a", href=lambda href: href and href.endswith(".csv"))

    # Filter CSV files based on time range
    filtered_files = []
    for file_name in csv_files:
        csv_filename = file_name['href'].split('/')[-1]
        try:
            # Parse the filename to extract the creation or modification time
            file_time_str = csv_filename.split('_')[0]
            file_time = datetime.strptime(file_time_str, '%d-%m-%Y-%H%M')
            if start_time <= file_time <= end_time:
                filtered_files.append(file_name)
        except ValueError as e:
            # Handle parsing errors (e.g., invalid filename format)
            print(f"Error parsing filename: {csv_filename}, {e.args[0]}")
    
    if len(filtered_files) == 0:
       print("No files matching that criteria")

    for file_name in filtered_files:
      csv_url = file_name['href']  # Get the CSV file URL
      csv_filename = csv_url.split('/')[-1]  # Extract the filename
      link = folder_url + csv_filename
      destination_link = os.path.join(destination_folder, csv_filename)

      response = requests.get(link)
      with open(destination_link, 'wb') as f:
         f.write(response.content)

    print("Downloading finished, outcome unknown.")

  except requests.exceptions.RequestException as e:
    print(f"Error downloading files: {e}")



# Initiate download of files
folder_url = "http://69.48.179.226/csv_files/"
destination_folder = root_dir + "csv_files"
now = datetime.now(timezone.utc)
start_time = datetime(2024, 9, 22, 17, 0)
end_time = datetime(2024, 9, 22, 18, 0)

download_csv_files(folder_url, destination_folder, start_time, end_time)

Downloading finished, outcome unknown.


In [15]:
# Combines multiple CSV files into one.
def combine_csv_files(input_folder, output_file):

    # Get a list of all CSV files in the input folder
    csv_files = glob.glob(input_folder + "/*.csv")

    # Initialize an empty list to store DataFrames
    dataframes = []

    # Iterate through each CSV file
    for file in csv_files:
        # Read the CSV file into a DataFrame
        df = pd.read_csv(file)

        df = df.rename(columns={'time': 'dateTime'})
        df = df.rename(columns={'unknown': 'reflectivity'})

        # Select the desired columns
        df = df[['dateTime', 'latitude', 'longitude', 'reflectivity']]

        # Append the DataFrame to the list
        dataframes.append(df)

    # Concatenate all DataFrames into one
    combined_df = pd.concat(dataframes, ignore_index=True)

    combined_df.sort_values(by=['latitude', 'longitude', 'dateTime'], inplace=True)

    # Save the combined DataFrame to a CSV file
    combined_df.to_csv(output_file, index=False)

input_folder = root_dir + "csv_files"
output_file = root_dir + "data/combined_data.csv"

combine_csv_files(input_folder, output_file)

In [16]:
import subprocess

print("Finished downloading andd data processing at " + datetime.now().strftime('%d/%m/%y %H:%M:%S.%f'))

Finished downloading andd data processing at 22/09/24 21:30:56.141593
