In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import scipy
import sklearn
import os

In [2]:
DATA_DIR = "./raw_data_2010_2023/"

In [3]:
drop = [ 'SLC','CWA','MEM', 'BTV']

In [4]:
def find_csv_files(folder_path, suffix=".csv"):
    """
    Find all CSV files in the given folder path.

    Args:
    - folder_path (str): Path to the folder where CSV files are located.
    - suffix (str): Suffix to filter files (default is '.csv').

    Returns:
    - list: List of CSV files with the given suffix.
    """
    csv_files = []
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            if file.endswith(suffix):
                csv_files.append(os.path.join(root, file))
    return csv_files

In [5]:
def plot_top_n_categorical(data, column, n=None, figsize=(15, 5)):
    """
    Plot the distribution of the top N categories of a categorical variable.

    Args:
    - data (pandas DataFrame): DataFrame containing the categorical variable.
    - column (str): Name of the categorical variable column.
    - n (int or None): Number of top categories to plot. If None, plot all categories (default is None).
    - figsize (tuple): Width and height of the figure in inches (default is (10, 6)).

    Returns:
    - None (displays the plot).
    """
    if n is None:
        categories = data[column].value_counts().index
    else:
        categories = data[column].value_counts().nlargest(n).index
        
    data_filtered = data[data[column].isin(categories)]
    
    plt.figure(figsize=figsize)
    sns.countplot(data=data_filtered, x=column, order=categories)
    plt.title(f'Top {len(categories)} Categories of {column}')
    plt.xlabel(column)
    plt.ylabel('Count')
    plt.xticks(rotation=90)
    plt.show()


In [6]:
all_data_files = find_csv_files(DATA_DIR, suffix="_cleaned.csv")

In [7]:
all_dfs = []
for f in all_data_files:
    all_dfs.append(pd.read_csv(f, parse_dates=["Date (MM/DD/YYYY)"]))

In [8]:
all_merged = pd.concat(all_dfs, ignore_index=True)

ValueError: No objects to concatenate

In [9]:
all_merged.head()

NameError: name 'all_merged' is not defined

In [10]:
sorted_all_merged = all_merged.sort_values(by=['Date (MM/DD/YYYY)', 'Scheduled Arrival Time']).reset_index(drop=True)

NameError: name 'all_merged' is not defined

In [11]:
sorted_all_merged = sorted_all_merged.drop_duplicates().reset_index(drop=True)

NameError: name 'sorted_all_merged' is not defined

In [12]:
sorted_all_merged.shape

NameError: name 'sorted_all_merged' is not defined

In [13]:
sorted_all_merged = sorted_all_merged[~sorted_all_merged['Origin Airport'].isin(drop)].reset_index(drop=True)

NameError: name 'sorted_all_merged' is not defined

In [14]:
sorted_all_merged.shape

NameError: name 'sorted_all_merged' is not defined

In [15]:
sorted_all_merged = sorted_all_merged.sort_values(by=['Date (MM/DD/YYYY)', 'Scheduled Arrival Time']).reset_index(drop=True)

NameError: name 'sorted_all_merged' is not defined

# CARRIER CODE

In [16]:
plot_top_n_categorical(sorted_all_merged, "Carrier Code")

NameError: name 'sorted_all_merged' is not defined

# FLIGHT NUMBER

In [17]:
plot_top_n_categorical(sorted_all_merged, "Flight Number", 40)

NameError: name 'sorted_all_merged' is not defined

# TAIL NUMBER

In [18]:
plot_top_n_categorical(sorted_all_merged, 'Tail Number', 100)

NameError: name 'sorted_all_merged' is not defined

# ORIGIN AIRPORT

In [19]:
plot_top_n_categorical(sorted_all_merged, 'Origin Airport')

NameError: name 'sorted_all_merged' is not defined

# COLUMNS

In [20]:
sorted_all_merged.columns

NameError: name 'sorted_all_merged' is not defined

In [21]:
to_drop = ['Wheels-on Time', 'Taxi-In time (Minutes)',
       'Delay Carrier (Minutes)', 'Delay Weather (Minutes)',
       'Delay National Aviation System (Minutes)', 'Delay Security (Minutes)',
       'Delay Late Aircraft Arrival (Minutes)']

In [22]:
def date_to_season(date):
    """
    Determine the season for a given date without considering the year.

    Parameters:
    date (Timestamp): The date.

    Returns:
    str: The season corresponding to the given date.
    """
    # Define fixed date ranges for each season
    seasons = {
        'winter': ((1, 1), (3, 20)),
        'spring': ((3, 21), (6, 20)),
        'summer': ((6, 21), (9, 22)),
        'autumn': ((9, 23), (12, 20)),
        'winter': ((12, 21), (12, 31)),
        'winter': ((1, 1), (1, 31))
    }

    # Get month and day from the date
    month = date.month
    day = date.day
    print(date.year, date.month, date.day)
    # Determine the season based on the month and day
    for season, (start_md, end_md) in seasons.items():
        if start_md <= (month, day) <= end_md:
            return season

In [23]:
def create_categorical_column(df):
    """
    Create a categorical column in the DataFrame based on the range of a numeric column by iterating over each row.

    Parameters:
    df (DataFrame): The pandas DataFrame.
    numeric_column (str): The name of the numeric column.

    Returns:
    DataFrame: The DataFrame with the new categorical column.
    """
    numeric_column = 'Arrival Delay (Minutes)'
    categorical_values = []
    for index, row in df.iterrows():
        numeric_value = row[numeric_column]
        # Customize the conditions based on your requirement
        if abs(numeric_value) <= 5:
            categorical_values.append('ONTIME')
        elif numeric_value < -5:
            categorical_values.append('EARLY')
        elif numeric_value > 5:
            categorical_values.append('LATE')
        else:
            print("THIS SHOULDN't HAPPEN")
    df['FLIGHT_STATUS'] = categorical_values
    return df

In [24]:
processed = sorted_all_merged.drop(columns=to_drop)

NameError: name 'sorted_all_merged' is not defined

In [25]:
processed = create_categorical_column(processed)

NameError: name 'processed' is not defined

In [26]:
processed = processed.drop(columns=[ 'Actual Elapsed Time (Minutes)', 'Actual Arrival Time'])

NameError: name 'processed' is not defined

In [27]:
def month_to_season(month):
    """
    Determine the season for a given month.

    Parameters:
    month (int): The month number (1-12).

    Returns:
    str: The season corresponding to the given month.
    """
    # Define mapping of months to seasons
    season_map = {
        1: 'winter',
        2: 'winter',
        3: 'spring',
        4: 'spring',
        5: 'spring',
        6: 'summer',
        7: 'summer',
        8: 'summer',
        9: 'autumn',
        10: 'autumn',
        11: 'autumn',
        12: 'winter'
    }
    
    return season_map.get(month, 'Unknown')

In [28]:
processed['month'] = processed['Date (MM/DD/YYYY)'].dt.month

NameError: name 'processed' is not defined

In [29]:
processed['day'] = processed['Date (MM/DD/YYYY)'].dt.day

NameError: name 'processed' is not defined

In [30]:
processed['season'] = processed['month'].apply(month_to_season)

NameError: name 'processed' is not defined

In [31]:
processed['WeekDay'] = processed['Date (MM/DD/YYYY)'].dt.day_name()

NameError: name 'processed' is not defined

In [32]:
# processed.to_csv("./INITIAL_PROCESSED_DATA.csv", index=False)

In [33]:
processed.head()

NameError: name 'processed' is not defined

# DATA PREP FOR 2nd Model

In [34]:
processed.sort_values(by=['Date (MM/DD/YYYY)', 'Scheduled Arrival Time'])

NameError: name 'processed' is not defined

In [35]:
def create_data_2nd_model_data(df, prev_status='ONTIME'):
    prevStatus = []
    for index, row in df.iterrows():
        prevStatus.append(prev_status)
        prev_status = row['FLIGHT_STATUS']
    df['PREV_STAT'] = pd.Series(prevStatus)
    return df

In [36]:
def create_data_2nd_model_data_ext(df, num_prev=3):
    extended_data = []
    for index, row in df.iterrows():
        for i in range(num_prev):
            if index - i - 1 >= 0:
                extended_row = row.copy()  # Create a copy of the current row
                extended_row['PREV_STAT'] = df.iloc[index - i - 1]['FLIGHT_STATUS']
                extended_data.append(extended_row)
            else:
                extended_row = row.copy()  # Create a copy of the current row
                extended_row['PREV_STAT'] = 'ONTIME'
                extended_data.append(extended_row)
    df_extended = pd.DataFrame(extended_data)
    return df_extended.reset_index(drop=True)


In [37]:
final_data = processed.drop(columns=['Arrival Delay (Minutes)'])

NameError: name 'processed' is not defined

In [38]:
final_data.head()

NameError: name 'final_data' is not defined

In [39]:
#create_data_2nd_model_data(final_data).to_csv("1_HOP.csv", index=False)

In [40]:
#create_data_2nd_model_data_ext(final_data).to_csv("3_HOP.csv", index=False)

In [41]:
airport_to_csv_mapping = {
    'ORD': '72530094846.csv', 
    'JFK': '74486094789.csv', 
    'ATL': '72219013874.csv', 
    'CLT': '72314013881.csv', 
    'DTW': '72537094847.csv', 
    'MCO': '72205012815.csv', 
    'LGA': '72503014732.csv', 
    'EWR': '72502014734.csv', 
    'IAD': '72403093738.csv', 
    'SYR':'72519014771.csv',
    'DCA': '72405013743.csv', 
    'BWI': '72406093721.csv', 
    'PHL': '72408013739.csv', 
    'MSP': '72658014922.csv', 
    'BOS': '72509014739.csv', 
    'DEN': '72565003017.csv', 
    'FLL': '74783012849.csv', 
    'DFW': '72259303985.csv', 
    'CLE': '72524014820.csv', 
    'PIE': '72211612873.csv', 
    'BNA': '72327013897.csv', 
    'SFB': '72205712854.csv', 
    'RDU': '72306013722.csv', 
    'SRQ': '72211512871.csv', 
    'CVG': '72421093814.csv', 
    'RSW': '72210812894.csv',
    'TPA': '72211012842.csv', 
    'PGD': '72203412812.csv', 
    'MYR': '74791013717.csv', 
    'MIA': '72202012839.csv',
}

In [42]:
# import os
# import urllib.request

# WEATHER_REPO = './weather/'
# years = list(range(2010, 2023))
# BASE_LINK = 'https://www.ncei.noaa.gov/data/global-hourly/access/'
# airports = airport_to_csv_mapping.keys()

# for ap in airports:
#     apdir = os.path.join(WEATHER_REPO, ap)  # Create directory path for each airport
#     os.makedirs(apdir, exist_ok=True)  # Create directory if it doesn't exist
#     for y in years:
#         file_url = BASE_LINK + str(y) + "/" + airport_to_csv_mapping[ap]
#         file_name = os.path.basename(file_url)  # Extract filename from URL
#         # Add year to the filename
#         file_name_with_year = str(y) + "_" + file_name
#         file_path = os.path.join(apdir, file_name_with_year)  # Create full file path
#         urllib.request.urlretrieve(file_url, file_path)  # Download file into correct directory
#         print("Downloaded:", file_name_with_year, "into", apdir)


In [43]:
columns_to_consider = ["STATION","DATE","SOURCE","LATITUDE","LONGITUDE","ELEVATION","NAME",
                       "REPORT_TYPE","CALL_SIGN","QUALITY_CONTROL","WND","CIG","VIS","TMP","DEW","SLP"]


In [44]:
# import os
# import pandas as pd
# import urllib.request

# WEATHER_REPO = './weather/'
# years = list(range(2009, 2025))
# BASE_LINK = 'https://www.ncei.noaa.gov/data/global-hourly/access/'
# airports = airport_to_csv_mapping.keys()

# # Specify columns you want to consider
# columns_to_consider = ["STATION","DATE","SOURCE","LATITUDE","LONGITUDE","ELEVATION","NAME",
#                        "REPORT_TYPE","CALL_SIGN","QUALITY_CONTROL","WND","CIG","VIS","TMP","DEW","SLP"]


# # Dictionary to store DataFrame for each airport
# airport_dfs = {}

# for ap in airports:
#     # List to store DataFrames for each year
#     dfs_per_year = []
    
#     for y in years:
#         try:
#             file_url = BASE_LINK + str(y) + "/" + airport_to_csv_mapping[ap]
#             # Read only specific columns from CSV data directly from URL into DataFrame
#             df = pd.read_csv(file_url, usecols=columns_to_consider)
#             # Append DataFrame to list
#             dfs_per_year.append(df)
#             print(ap, file_url)
#         except Exception as e:
#             print(e)
#             print(ap, file_url)

#     # Concatenate DataFrames for each year into a single DataFrame
#     airport_df = pd.concat(dfs_per_year, ignore_index=True)
#     # Store DataFrame in dictionary
#     airport_dfs[ap] = airport_df

#     # Save the concatenated DataFrame to a CSV file in the weather repository
#     file_name = os.path.join(WEATHER_REPO, f"{ap}_weather_data.csv")
#     airport_df.to_csv(file_name, index=False)

# # Access DataFrame for a specific airport
# # For example, to access DataFrame for airport 'JFK':
# # jfk_df = airport_dfs['JFK']


In [45]:
# import os
# import pandas as pd
# import urllib.request

# WEATHER_REPO = './weather/'
# WEATHER_WORKING_REPOS = './weather_working/'
# years = list(range(2010, 2024))
# # BASE_LINK = 'https://www.ncei.noaa.gov/data/global-hourly/access/'
# airports = airport_to_csv_mapping.keys()

# # Specify columns you want to consider
# columns_to_consider = ["STATION","DATE","SOURCE","LATITUDE","LONGITUDE","ELEVATION","NAME",
#                        "REPORT_TYPE","CALL_SIGN","QUALITY_CONTROL","WND","CIG","VIS","TMP","DEW","SLP"]


# # Dictionary to store DataFrame for each airport
# airport_dfs = {}

# for ap in airports:
#     file_url = WEATHER_REPO+ f"{ap}_weather_data.csv"
#     df = pd.read_csv(file_url, parse_dates=["DATE"])
#     df['STATION'] = ap
#     df.to_csv(WEATHER_WORKING_REPOS+ f"{ap}_weather_data.csv", index=False)

In [46]:
def parse_WND(wnd):
    split = wnd.split(',')
    data = {
        'wind_direction':int(split[0].strip()) if int(split[0].strip()) != 999 else np.nan,
        'wind_directon_quality':split[1].strip(),
        'wind_type':split[2].strip(),
        'wind_speed':int(split[3].strip()) if int(split[3].strip())!=9999 else np.nan,
        'wind_speed_quality':split[4].strip()
    }
    return data

def parse_CIG(cig):
    split = cig.split(',')
    data = {
        'ceiling_height':int(split[0].strip()) if int(split[0].strip())!=99999 else np.nan ,
        'ceiling_height_quality':split[1].strip(),
        'ceiling_det_code':split[2].strip(),
        'celing_CAVOK':split[3].strip()
    }
    return data

def parse_VIS(vis):
    split = vis.split(',')
    data = {
        'visibility_dist':int(split[0].strip()) if int(split[0].strip()) != 999999 else np.nan,
        'visibility_dist_quality':split[1].strip(),
        'visibility_variability':split[2].strip(),
        'visibility_variability_quality':split[3].strip(),
    }
    return data

def parse_TMP(tmp):
    split = tmp.split(',')
    data = {
        'air_temparature':int(split[0].strip()) if int(split[0].strip())!=9999 else np.nan,
        'air_temparature_quality':split[1].strip()
    }
    return data

def parse_DEW(dew):
    split = dew.split(',')
    data = {
        'dew_point_temparature':int(split[0].strip()) if int(split[0].strip())!=9999 else np.nan,
        'dew_point_temparature_quality':split[1].strip()
    }
    return data

def parse_SLP(slp):
    split = slp.split(',')
    data = {
        'sea_level_pressure':int(split[0].strip()) if int(split[0].strip())!=99999 else np.nan,
        'sea_level_pressure_quality':split[1].strip()
    }
    return data

## CREATING PARSED DATA FRAME FOR WEATHER DATA
def parse_data(weather_data):
    rows = []
    cols = weather_data.columns
    for index, row in weather_data.iterrows():
        new_row = {}
        for c in cols:
            if c == 'WND':
                parsed = parse_WND(row[c])
                new_row.update(parsed)
            elif c == 'CIG':
                parsed = parse_CIG(row[c])
                new_row.update(parsed)
            elif c == 'VIS':
                parsed = parse_VIS(row[c])
                new_row.update(parsed)
            elif c == 'TMP':
                parsed = parse_TMP(row[c])
                new_row.update(parsed)
            elif c == 'DEW':
                parsed = parse_DEW(row[c])
                new_row.update(parsed)
            elif c == 'SLP':
                parsed = parse_SLP(row[c])
                new_row.update(parsed)
            else:
                new_row[c.lower()] = row[c]
        rows.append(new_row)
    
    parsed_df = pd.DataFrame(rows, index=weather_data.index)
    return parsed_df

In [47]:
all_raw_weather_data = find_csv_files('./weather_working/')

In [48]:
all_raw_weather_data

[]

In [49]:
all_raw_weather_data[0].split('/')[-1]

IndexError: list index out of range

### PARSING

In [54]:
SAVE_PATH = "./PARSED_WEATHER_DATA/"
# os.makedirs(SAVE_PATH, exist_ok=True)
# for f in all_raw_weather_data:
#     name = f.split('/')[-1]
#     data = pd.read_csv(f)
#     data_processed = parse_data(data)
#     data_processed.to_csv(SAVE_PATH+name, index=False)
#     print(f"PROCESSED .. {f}")

### Processing

In [55]:
all_csv = find_csv_files(SAVE_PATH)

In [56]:
import pandas as pd

def interpolate_target(df, columns):
    """
    Interpolates missing values in the target column by averaging the five non-null values before and after the missing row.
    
    Parameters:
        df (DataFrame): The input DataFrame with missing values.
        target_column (str): The name of the target column with missing values.
        
    Returns:
        DataFrame: DataFrame with missing values in the target column interpolated.
    """
    df_interpolated = df.copy()
    for i, row in df_interpolated.iterrows():
        for target_column in columns:
            if pd.isnull(row[target_column]):
                start_idx = max(0, i - 10)
                end_idx = min(len(df_interpolated), i + 10)
                valid_values = df_interpolated.iloc[start_idx:end_idx][target_column].dropna()
                if len(valid_values) > 0:
                    avg_value = valid_values.mean()
                    df_interpolated.at[i, target_column] = avg_value
    return df_interpolated

In [57]:
IMPUTED_PATH = './imputed_weather/'
os.makedirs(IMPUTED_PATH, exist_ok=True)
for f in all_csv:
    print(f"PROCESSING ... {f}")
    name = f.split('/')[-1]
    single = pd.read_csv(f)
    droplist = [
         'source',
        'name', 'report_type', 'call_sign', 'quality_control',
        'wind_directon_quality',
         'wind_speed_quality',
        'ceiling_height_quality',
        'visibility_dist_quality',
        'air_temparature_quality',
        'dew_point_temparature_quality',
        'sea_level_pressure_quality',
        'visibility_variability_quality'
    ]
    na_cols = single.columns[single.isna().any()].tolist()
    single = single.drop(columns=droplist)
    imputed = interpolate_target(single, na_cols)
#     assert imputed.isna().sum().sum() == 0
    imputed.to_csv(IMPUTED_PATH+name, index=False)
    print(f"PROCESSING DONE ====> {f}")

PROCESSING ... ./PARSED_WEATHER_DATA/MSP_weather_data.csv


  single = pd.read_csv(f)


PROCESSING DONE ====> ./PARSED_WEATHER_DATA/MSP_weather_data.csv
PROCESSING ... ./PARSED_WEATHER_DATA/ORD_weather_data.csv


  single = pd.read_csv(f)


PROCESSING DONE ====> ./PARSED_WEATHER_DATA/ORD_weather_data.csv
PROCESSING ... ./PARSED_WEATHER_DATA/CLE_weather_data.csv


  single = pd.read_csv(f)


PROCESSING DONE ====> ./PARSED_WEATHER_DATA/CLE_weather_data.csv
PROCESSING ... ./PARSED_WEATHER_DATA/PIE_weather_data.csv


  single = pd.read_csv(f)


PROCESSING DONE ====> ./PARSED_WEATHER_DATA/PIE_weather_data.csv
PROCESSING ... ./PARSED_WEATHER_DATA/RDU_weather_data.csv


  single = pd.read_csv(f)


PROCESSING DONE ====> ./PARSED_WEATHER_DATA/RDU_weather_data.csv
PROCESSING ... ./PARSED_WEATHER_DATA/EWR_weather_data.csv


  single = pd.read_csv(f)


PROCESSING DONE ====> ./PARSED_WEATHER_DATA/EWR_weather_data.csv
PROCESSING ... ./PARSED_WEATHER_DATA/MCO_weather_data.csv


  single = pd.read_csv(f)


PROCESSING DONE ====> ./PARSED_WEATHER_DATA/MCO_weather_data.csv
PROCESSING ... ./PARSED_WEATHER_DATA/PHL_weather_data.csv


  single = pd.read_csv(f)


PROCESSING DONE ====> ./PARSED_WEATHER_DATA/PHL_weather_data.csv
PROCESSING ... ./PARSED_WEATHER_DATA/CVG_weather_data.csv


  single = pd.read_csv(f)


PROCESSING DONE ====> ./PARSED_WEATHER_DATA/CVG_weather_data.csv
PROCESSING ... ./PARSED_WEATHER_DATA/DEN_weather_data.csv


  single = pd.read_csv(f)


PROCESSING DONE ====> ./PARSED_WEATHER_DATA/DEN_weather_data.csv
PROCESSING ... ./PARSED_WEATHER_DATA/DCA_weather_data.csv


  single = pd.read_csv(f)


PROCESSING DONE ====> ./PARSED_WEATHER_DATA/DCA_weather_data.csv
PROCESSING ... ./PARSED_WEATHER_DATA/DFW_weather_data.csv


  single = pd.read_csv(f)


PROCESSING DONE ====> ./PARSED_WEATHER_DATA/DFW_weather_data.csv
PROCESSING ... ./PARSED_WEATHER_DATA/LGA_weather_data.csv


  single = pd.read_csv(f)


PROCESSING DONE ====> ./PARSED_WEATHER_DATA/LGA_weather_data.csv
PROCESSING ... ./PARSED_WEATHER_DATA/RSW_weather_data.csv


  single = pd.read_csv(f)


PROCESSING DONE ====> ./PARSED_WEATHER_DATA/RSW_weather_data.csv
PROCESSING ... ./PARSED_WEATHER_DATA/BNA_weather_data.csv


  single = pd.read_csv(f)


PROCESSING DONE ====> ./PARSED_WEATHER_DATA/BNA_weather_data.csv
PROCESSING ... ./PARSED_WEATHER_DATA/ATL_weather_data.csv


  single = pd.read_csv(f)


PROCESSING DONE ====> ./PARSED_WEATHER_DATA/ATL_weather_data.csv
PROCESSING ... ./PARSED_WEATHER_DATA/IAD_weather_data.csv


  single = pd.read_csv(f)


PROCESSING DONE ====> ./PARSED_WEATHER_DATA/IAD_weather_data.csv
PROCESSING ... ./PARSED_WEATHER_DATA/SRQ_weather_data.csv


  single = pd.read_csv(f)


PROCESSING DONE ====> ./PARSED_WEATHER_DATA/SRQ_weather_data.csv
PROCESSING ... ./PARSED_WEATHER_DATA/CLT_weather_data.csv


  single = pd.read_csv(f)


PROCESSING DONE ====> ./PARSED_WEATHER_DATA/CLT_weather_data.csv
PROCESSING ... ./PARSED_WEATHER_DATA/PGD_weather_data.csv


  single = pd.read_csv(f)


PROCESSING DONE ====> ./PARSED_WEATHER_DATA/PGD_weather_data.csv
PROCESSING ... ./PARSED_WEATHER_DATA/SYR_weather_data.csv


  single = pd.read_csv(f)


PROCESSING DONE ====> ./PARSED_WEATHER_DATA/SYR_weather_data.csv
PROCESSING ... ./PARSED_WEATHER_DATA/BOS_weather_data.csv


  single = pd.read_csv(f)


PROCESSING DONE ====> ./PARSED_WEATHER_DATA/BOS_weather_data.csv
PROCESSING ... ./PARSED_WEATHER_DATA/MIA_weather_data.csv


  single = pd.read_csv(f)


PROCESSING DONE ====> ./PARSED_WEATHER_DATA/MIA_weather_data.csv
PROCESSING ... ./PARSED_WEATHER_DATA/FLL_weather_data.csv


  single = pd.read_csv(f)


PROCESSING DONE ====> ./PARSED_WEATHER_DATA/FLL_weather_data.csv
PROCESSING ... ./PARSED_WEATHER_DATA/MYR_weather_data.csv
PROCESSING DONE ====> ./PARSED_WEATHER_DATA/MYR_weather_data.csv
PROCESSING ... ./PARSED_WEATHER_DATA/DTW_weather_data.csv


  single = pd.read_csv(f)


PROCESSING DONE ====> ./PARSED_WEATHER_DATA/DTW_weather_data.csv
PROCESSING ... ./PARSED_WEATHER_DATA/SFB_weather_data.csv


  single = pd.read_csv(f)


PROCESSING DONE ====> ./PARSED_WEATHER_DATA/SFB_weather_data.csv
PROCESSING ... ./PARSED_WEATHER_DATA/TPA_weather_data.csv


  single = pd.read_csv(f)


PROCESSING DONE ====> ./PARSED_WEATHER_DATA/TPA_weather_data.csv
PROCESSING ... ./PARSED_WEATHER_DATA/JFK_weather_data.csv


  single = pd.read_csv(f)


PROCESSING DONE ====> ./PARSED_WEATHER_DATA/JFK_weather_data.csv
PROCESSING ... ./PARSED_WEATHER_DATA/BWI_weather_data.csv


  single = pd.read_csv(f)


PROCESSING DONE ====> ./PARSED_WEATHER_DATA/BWI_weather_data.csv


#### final_data.head()