# Arrivals

In [1]:
# Imports
import pandas as pd
import os
import shutil

### 1 - Creating Main DataFrame

In [2]:
# reading csv and transformin into DataFrame
    # flight logs DataFrame
flights = pd.read_csv(os.getcwd() + r'\\main_related_archives\\flight_logs.csv')

    # METAR logs DataFrame
metar = pd.read_csv(os.getcwd() + r'\\main_related_archives\\metar_logs.csv')

##### Section 1.1 - Adjustment of 'expected_arrival' for alignment with METAR records

 For proper integration of the dataframes, it's crucial to round the 'expected_arrival' times to the nearest hour (setting minutes and seconds to 00). This step ensures that the 'expected_arrival' times are synchronized with the hourly time windows of METAR logs. By doing this, we ensure consistent correlation of flight data with recorded meteorological conditions.


In [3]:
# converting column to datetime
# Primeiro, converta a coluna para o formato datetime
flights['expected_arrival'] = pd.to_datetime(flights['expected_arrival'])

# nect, trunc time to most near time
flights['expected_arrival'] = flights['expected_arrival'].dt.floor('h')

##### 1.2 - filtering by SBGR origin and selectin just used columns
- "destiny"
- "expected_arrival"
- "arrival_status"
- "arrival_delay_time"

In [4]:
# Filter by 'origin' = 'SBGR' (sbgr origin departures)
arrivals = flights[flights['destiny'] == 'SBGR']

# Only required columns
arrivals = arrivals[['destiny', 'expected_arrival', 'arrival_status', 'arrival_delay_time']]

##### 1.3 Join with metar logs

In [5]:
# Checking collumns datetime format
arrivals['expected_arrival'] = pd.to_datetime(arrivals['expected_arrival'])
metar['date_time'] = pd.to_datetime(metar['date_time'])

# Left Join
arrival = pd.merge(arrivals, metar, left_on='expected_arrival', right_on='date_time', how='left')

#drop not used columns
arrival = arrival.drop(columns=['destiny', 'date_time', 'station_id'])

##### 1.4 Juntando chuvas em um dado só

In [6]:
# Defining values ​​that indicate rain
weather_conditions = ['TS', '-TSRA', '+TSRA', 'TSRA']

# creating new column for rain 'RA' (boolean)
arrival['ts'] = arrival['current_wx1'].isin(weather_conditions).astype(int)

#### 2 - Exporting

In [7]:
# transform dataframe to csv and saves on main_related_archives folder

#getting path folder
path = os.getcwd() + '/main_related_archives'

#saving csv
arrival.to_csv(path + '/arrival_logs.csv', index=False)

#### 3 - Coping to RProject folder

In [8]:
# get actual dir
current_dir = os.getcwd()

# get father dir
parent_dir = os.path.dirname(current_dir)

# dir r_analysis (R Project dir)
r_path = parent_dir + '/r_analysis'

In [9]:
#Copyng original file to r_project dir

# original path file (plus archive name)
original_csv = path + '/arrival_logs.csv'

# r_project file path to save (plus archiuve name)
copy_path = r_path + '/arrival_logs.csv'

# Copiando o arquivo
shutil.copy2(original_csv, copy_path)

'd:\\Data Scientist\\MBA\\TCC\\repo\\weather-delay-investigation-MBA-/r_analysis/arrival_logs.csv'