In [1]:
import pandas as pd
import os
import numpy as np

In [2]:
df = pd.read_csv('Airport_ID.csv')
df.head()

Unnamed: 0,ORIGIN_AIRPORT_ID,NAME
0,10001,"Afognak Lake, AK: Afognak Lake Airport"
1,10003,"Granite Mountain, AK: Bear Creek Mining Strip"
2,10004,"Lik, AK: Lik Mining Camp"
3,10005,"Little Squaw, AK: Little Squaw Airport"
4,10006,"Kizhuyak, AK: Kizhuyak Bay"


In [3]:

def process_files(path, df, year):
    files = os.listdir(path)
    csv_files = [f for f in files if f.endswith('.csv')]

    directory_name = f'{year}/destinations{year}'
    if not os.path.exists(directory_name):
        os.makedirs(directory_name)

    for file in csv_files:
        df_file = pd.read_csv(os.path.join(path, file))
        
        # Filter rows
        df_file = df_file[df_file['CLASS'] == 'F']

        # Merge dataframes
        merged_df = pd.merge(df_file, df, on='ORIGIN_AIRPORT_ID', how='right')

        # Groupby and sum
        grouped = merged_df.groupby(['ORIGIN_AIRPORT_ID', 'DEST_AIRPORT_ID', 'DEST', 'DEST_CITY_NAME', 'DEST_STATE_ABR']).agg({
            'NAME': 'first',
            'DEPARTURES_PERFORMED': np.sum,
            'PASSENGERS': np.sum,
            'ORIGIN_STATE_ABR': 'first'
        }).reset_index()

        # Replace NaN with 0 for 'DEPARTURES_PERFORMED' and 'PASSENGERS'
        grouped[['DEPARTURES_PERFORMED', 'PASSENGERS']] = grouped[['DEPARTURES_PERFORMED', 'PASSENGERS']].fillna(0)

        # Save to csv
        grouped.to_csv(f'{directory_name}/destinations_{file}', index=False)
    
    print(f"All files for year {year} processed successfully!")

# Assume df is your existing dataframe
# Call the function for each year starting from 2019
for year in range(2019, 2023):  # Adjust the range as per your requirement
    process_files(f'{year}/raw{year}', df, year)




All files for year 2019 processed successfully!
All files for year 2020 processed successfully!
All files for year 2021 processed successfully!
All files for year 2022 processed successfully!
