# Assign Behaviors

This code goes through the json files created by the FAA's jumpstart code and assigns each STID a behavior.

### Parse Json Data from Jumpstart

In [None]:
import pandas as pd

# Helper function to load and normalize JSON
def load_and_flatten_json(path):
    df = pd.read_json(path)
    if "data" in df.columns:
        return pd.json_normalize(df["data"])
    return df

# Load ATL data
atl_data = {
    "weekendPeak": load_and_flatten_json("./data/flight_data_atl_peakWd.json"),
    "weekendOffPeak": load_and_flatten_json("./data/flight_data_atl_oPeakWn.json"),
    "weekdayPeak": load_and_flatten_json("./data/flight_data_atl_peakWd.json"),
    "weekdayOffPeak": load_and_flatten_json("./data/flight_data_atl_oPeakWd.json")
}

# Load CLT data
clt_data = {
    "weekendPeak": load_and_flatten_json("./data/flight_data_clt_peakWn.json"),
    "weekendOffPeak": load_and_flatten_json("./data/flight_data_clt_oPeakWn.json"),
    "weekdayPeak": load_and_flatten_json("./data/flight_data_clt_peakWd.json"),
    "weekdayOffPeak": load_and_flatten_json("./data/flight_data_clt_oPeakWd.json")
}

# Preview ATL weekendPeak
print("ATL weekendPeak data:\n", atl_data["weekendPeak"].head())

ATL weekendPeak data:
        stid  seqNum  latitude  longitude                      time
0  26472820      39  33.65169  -84.41980  2025-04-10T22:00:10.000Z
1  26472673      40  33.62506  -84.60863  2025-04-10T22:00:10.000Z
2  26471933      41  33.66974  -84.58086  2025-04-10T22:00:10.000Z
3  26472412      42  33.63661  -84.43097  2025-04-10T22:00:10.000Z
4  26472325      43  33.63183  -84.43995  2025-04-10T22:00:10.000Z


### Fix data to minimize Times

In [None]:
## Remove weekend off peak for clt and atl if any time is outside of 7-8
def remove_outside_times(df: pd.DataFrame, start_time: str, end_time: str) -> pd.DataFrame:
    # Ensure datetime conversion
    df = df.copy()
    df['time'] = pd.to_datetime(df['time'], utc=True)

    # Convert time window
    start_time = pd.to_datetime(start_time).time()
    end_time = pd.to_datetime(end_time).time()

    # Create local time column
    df['time_only'] = df['time'].dt.tz_convert('America/New_York').dt.time

    # Filter based on time range
    df = df[(df['time_only'] >= start_time) & (df['time_only'] <= end_time)]

    # Drop helper column
    df = df.drop(columns=['time_only'])

    return df
    
atl_data["weekendOffPeak"] = remove_outside_times(atl_data["weekendOffPeak"], "07:00:00", "08:00:00")
clt_data["weekendOffPeak"] = remove_outside_times(clt_data["weekendOffPeak"], "07:00:00", "08:00:00")

### Folium

In [None]:
import folium
from IPython.display import display, clear_output

In [None]:
import geopy.distance
# airport_center = ("33.636667", "-84.428056")
def is_valid_location(json_array: pd.DataFrame, coordinates, coords) -> bool:
    # check if any of the points are outside of 5 miles from airport center
    for coord in coords:
        distance = geopy.distance.distance(coordinates, coord).miles
        if distance > 5:
            return True
    return False    

In [None]:
import csv
header = [
    "stid", "seqNum", "latitude", "longitude", "time", "behavior"
]
def write_stid_to_csv(stid, behavior, json_array: pd.DataFrame, filename):
    # write the stid and json array to a csv file
    with open(filename, mode='a', newline='') as csv_file:
        writer = csv.DictWriter(csv_file, fieldnames=header)
        
        # Write the header only if the file is empty
        if csv_file.tell() == 0:
            writer.writeheader()

        # Write the data rows
        for _, row in json_array.iterrows():
            # print(f"Entry = {entry}")
            row = {
                "stid": stid,
                "seqNum": row["seqNum"],
                "latitude": row["latitude"],
                "longitude": row["longitude"],
                "time": row["time"],
                "behavior": behavior
            }
            writer.writerow(row)

In [None]:
from os.path import exists
from datetime import datetime
# Create a new dictionary to store behaviors
atl_coords = [33.636667, -84.428056]
clt_coords = [35.213890, -80.943054]

## New behaviors

atl_filenames = {
    "weekendPeak": "./data/classified_data_atl_peakWn.csv",
    "weekendOffPeak": "./data/classified_data_atl_oPeakWn.csv",
    "weekdayPeak": "./data/classified_data_atl_peakWd.csv",
    "weekdayOffPeak": "./data/classified_data_atl_oPeakWd.csv"
}
clt_filenames = {
    "weekendPeak": "./data/classified_data_clt_peakWn.csv",
    "weekendOffPeak": "./data/classified_data_clt_oPeakWn.csv",
    "weekdayPeak": "./data/classified_data_clt_peakWd.csv",
    "weekdayOffPeak": "./data/classified_data_clt_oPeakWd.csv"
}

for key, group in atl_data.items():
    if not key == 'weekdayPeak':
        continue
    for stid, json_array in group.groupby('stid'):
        filename = atl_filenames[key]
        coords = [(float(row['latitude']), float(row['longitude'])) for _, row in json_array.iterrows()]
        
        if json_array.size < 10:
            continue
        
        if not is_valid_location(json_array, atl_coords, coords):
            # If the STID is valid, assign behavior 1 and skip
            write_stid_to_csv(stid, 1, json_array, filename)
            continue
        
        ## Create a folium map
        m = folium.Map(location=atl_coords, zoom_start=11)
        # folium.PolyLine(coords, color='blue', weight=10, opacity=1).add_to(m)
        
        # Add markers for each coordinate
        for coord in coords:
            folium.CircleMarker(location=coord, radius=1, weight=10).add_to(m)
        
        # Display the map inline in the Jupyter notebook
        clear_output(wait=True)
        display(m)
        
        value = input(f"Please assign a behavior to key {key} & STID {stid} (1-5): ")
        
        ## Assign behavior to the STID
        write_stid_to_csv(stid, value, json_array, filename)

# for key, group in clt_data.items():
#     for stid, json_array in group.groupby('stid'):
#         filename = clt_filenames[key]
#         coords = [(float(row['latitude']), float(row['longitude'])) for _, row in json_array.iterrows()]
        
#         if json_array.size < 10:
#             continue
        
#         if not is_valid_location(json_array, clt_coords, coords):
#             # If the STID is valid, assign behavior 1 and skip
#             write_stid_to_csv(stid, 1, json_array, filename)
#             continue
        
#         ## Create a folium map
#         m = folium.Map(location=clt_coords, zoom_start=11)
#         # folium.PolyLine(coords, color='blue', weight=10, opacity=1).add_to(m)
        
#         # Add markers for each coordinate
#         for coord in coords:
#             folium.CircleMarker(location=coord, radius=1, weight=10).add_to(m)
        
#         # Display the map inline in the Jupyter notebook
#         clear_output(wait=True)
#         display(m)
        
#         value = input(f"Please assign a behavior to STID {stid} (1-5): ")
        
#         ## Assign behavior to the STID
#         write_stid_to_csv(stid, value, json_array, filename)