In [3]:
%load_ext autoreload
%autoreload 2

In [4]:
import pandas as pd 
import numpy as np
from matplotlib import pyplot as plt

In [5]:
# Add search path 
import sys
sys.path.append('../wx')
from roi import KDFW, ROI_RADIUS, WX_ROI # KDFW coordinates

In [6]:
def haversine(lat1, lon1, lat2, lon2):
    # Radius of the Earth in kilometers
    R = 6371.0

    # Convert coordinates from degrees to radians
    lat1 = np.radians(lat1)
    lon1 = np.radians(lon1)
    lat2 = np.radians(lat2)
    lon2 = np.radians(lon2)

    # Haversine formula
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = np.sin(dlat / 2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon / 2)**2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))

    distance = R * c
    return distance # in kilometers


# Dropping functions

In [7]:
def add_distance_col_to_df(df):
    df['distance'] = haversine(df['latitude'], df['longitude'], KDFW[1], KDFW[0]) # KDFW[1] is latitude, KDFW[0] is longitude
    
# Sample usage: add_distance_col_to_df(df) / inline modification

In [8]:
def drop_callsigns_not_far_enough_away(df):
    # Compute the max distance for each callsign
    max_distance_per_callsign = df.groupby('callsign')['distance'].max()

    # Callsigns that are within the radius
    callsigns_to_drop = max_distance_per_callsign[max_distance_per_callsign < ROI_RADIUS].index
    print('There are {} callsigns within the radius to be dropped'.format(len(callsigns_to_drop)))

    # Drop callsigns that are within the radius
    return df[~df['callsign'].isin(callsigns_to_drop)]

# Sample usage: df = drop_callsigns_not_far_enough_away(df) / copy

In [9]:
def drop_callsigns_yet_landed(df):
    # Compute the max distance for each callsign
    min_distance_per_callsign = df.groupby('callsign')['distance'].min()

    # Callsigns that are within the radius
    callsigns_to_drop_2 = min_distance_per_callsign[min_distance_per_callsign > 30].index # 4km within the airport is considered as at the airport
    print('There are {} callsigns that haven\'t landed and was dropped'.format(len(callsigns_to_drop_2)))

    # Drop callsigns that are within the radius
    return df[~df['callsign'].isin(callsigns_to_drop_2)]

# Trimming functions

In [10]:
def trim_df_to_roi_radius(df):
    return df[df['distance'] <= ROI_RADIUS]


In [11]:
def trim_and_resample(df, callsign, desired_length = 2000):
    df = df[df['callsign'] == callsign]
    # convert df['timestamp'] to datetime
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    df = df.set_index('timestamp')
    # fill NaN values 
    df.fillna(method='ffill', inplace=True)
    df = df.resample('1s').agg({
        'callsign': 'ffill',
        'groundspeed': 'mean',
        'latitude': 'mean',
        'longitude': 'mean',
        'altitude': 'mean',
        'track': 'mean',
        'vertical_rate': 'mean'
    })
    df = df.reset_index()
    # df = df.interpolate()
    if len(df) > desired_length:
        # trim the trajectory to desired length
        df = df.iloc[0:desired_length]
    elif len(df) < desired_length:
        # repeat the last row until desired length is reached
        last_row = df.iloc[-1]
        while len(df) < desired_length:
            df = pd.concat([df, last_row.to_frame().T])
    return df

# Callsign thunderstorm attribution

In [12]:
import datetime

# Rounding function
def round_to_nearest_half_hour(ts):
    # Extract minutes
    minutes = ts.minute
    # Determine if we should round up or down
    if minutes < 30:
        return ts.replace(minute=0, second=0, microsecond=0)
    else:
        return ts.replace(minute=30, second=0, microsecond=0)

In [13]:
def get_roi_entrance_time(df):
    callsign_roi_entrance_time = df.groupby('callsign')['timestamp'].min().reset_index()
    # Convert timestamp strings to datetime
    callsign_roi_entrance_time['timestamp'] = pd.to_datetime(callsign_roi_entrance_time['timestamp'])
    callsign_roi_entrance_time['rounded_timestamp'] = callsign_roi_entrance_time['timestamp'].apply(round_to_nearest_half_hour)

    # Convert rounded_timestamp back to string
    callsign_roi_entrance_time['rounded_timestamp'] = callsign_roi_entrance_time['rounded_timestamp'].dt.strftime('%Y-%m-%d %H_%M_%S')
    
    return callsign_roi_entrance_time

# Preprocessing of Trajectories (Main Entry Point)

In [14]:
# surpress warnings 
import warnings
warnings.filterwarnings('ignore')

In [15]:
traj_dir = "../tx/"
# Find all CSV files in the directory
import glob
traj_files = glob.glob(traj_dir + "*.csv")
print("Found {} CSV files in {}".format(len(traj_files), traj_dir))

Found 1 CSV files in ../tx/


In [16]:
for csv_file in traj_files:
    # try:
    df = pd.read_csv(csv_file)
    df = df[['callsign', 'groundspeed', 'timestamp', 'latitude', 'longitude', 'altitude', 'track', 'vertical_rate']]
    # Distance preprocessing
    add_distance_col_to_df(df)
    df = drop_callsigns_not_far_enough_away(df)
    df = drop_callsigns_yet_landed(df)
    df = trim_df_to_roi_radius(df)
    roi_entrance_time = get_roi_entrance_time(df)
    storm_idents = roi_entrance_time['rounded_timestamp'].unique().tolist()
    import os
    desired_length = 2000

    for storm_ident in storm_idents:
        print('Processing storm {}'.format(storm_ident))
        callsign_of_storm = roi_entrance_time[roi_entrance_time['rounded_timestamp'] == storm_ident]['callsign'].tolist()

        big_trajectory = np.empty((0, 2, desired_length))
        for callsign in callsign_of_storm:
            processed_trajectory_df = trim_and_resample(df, callsign, desired_length)
            # create a new np array
            processed_trajectory = processed_trajectory_df[['latitude', 'longitude']].to_numpy().T.reshape(1, 2, -1)
            # concatenate to big_trajectory
            big_trajectory = np.concatenate((big_trajectory, processed_trajectory), axis=0)
        
        # save the big_trajectory
        np.savez_compressed(os.path.join('stx', storm_ident), big_trajectory)
            
    # except Exception as e:
    #     print(e)
    #     print('Error processing {}'.format(csv_file))

There are 0 callsigns within the radius to be dropped
There are 18 callsigns that haven't landed and was dropped
Processing storm 2017-01-01 10_30_00
Processing storm 2017-01-01 10_00_00
Processing storm 2017-01-01 11_00_00


In [17]:
# !rm -rf stx/*