In [1]:
#mount the Google Drive to the Colaboratory runtime
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd
import numpy as np
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
# evaluating the performance of machine learning models
from sklearn.metrics import accuracy_score
from sklearn.metrics import brier_score_loss
import matplotlib.pyplot as plt #For creating visualizations in Python
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.arima.model import ARIMA
#!pip install scikit-learn==1.0.1

In [5]:
data_2015 = pd.read_csv("/content/drive/My Drive/2015.csv")

In [6]:
data_2015.shape

(5819079, 28)

In [7]:
data_2015.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5819079 entries, 0 to 5819078
Data columns (total 28 columns):
 #   Column               Dtype  
---  ------               -----  
 0   FL_DATE              object 
 1   OP_CARRIER           object 
 2   OP_CARRIER_FL_NUM    int64  
 3   ORIGIN               object 
 4   DEST                 object 
 5   CRS_DEP_TIME         int64  
 6   DEP_TIME             float64
 7   DEP_DELAY            float64
 8   TAXI_OUT             float64
 9   WHEELS_OFF           float64
 10  WHEELS_ON            float64
 11  TAXI_IN              float64
 12  CRS_ARR_TIME         int64  
 13  ARR_TIME             float64
 14  ARR_DELAY            float64
 15  CANCELLED            float64
 16  CANCELLATION_CODE    object 
 17  DIVERTED             float64
 18  CRS_ELAPSED_TIME     float64
 19  ACTUAL_ELAPSED_TIME  float64
 20  AIR_TIME             float64
 21  DISTANCE             float64
 22  CARRIER_DELAY        float64
 23  WEATHER_DELAY        float64
 24

In [9]:
data_2015.columns

Index(['FL_DATE', 'OP_CARRIER', 'OP_CARRIER_FL_NUM', 'ORIGIN', 'DEST',
       'CRS_DEP_TIME', 'DEP_TIME', 'DEP_DELAY', 'TAXI_OUT', 'WHEELS_OFF',
       'WHEELS_ON', 'TAXI_IN', 'CRS_ARR_TIME', 'ARR_TIME', 'ARR_DELAY',
       'CANCELLED', 'CANCELLATION_CODE', 'DIVERTED', 'CRS_ELAPSED_TIME',
       'ACTUAL_ELAPSED_TIME', 'AIR_TIME', 'DISTANCE', 'CARRIER_DELAY',
       'WEATHER_DELAY', 'NAS_DELAY', 'SECURITY_DELAY', 'LATE_AIRCRAFT_DELAY',
       'Unnamed: 27'],
      dtype='object')

In [10]:
!pip install meteostat

Collecting meteostat
  Downloading meteostat-1.6.7-py3-none-any.whl (31 kB)
Installing collected packages: meteostat
Successfully installed meteostat-1.6.7


In [11]:
from datetime import datetime
from meteostat import Point
from meteostat import Hourly
from meteostat import Daily
from meteostat import Monthly

from prophet import Prophet
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
from sklearn.metrics import mean_absolute_error

In [13]:
#Reading the file for all the airports and coordinates fetch from Kaggle
Airport_Locations = pd.read_csv("/content/drive/MyDrive/Airport_Locations.csv")

In [14]:
def fetch_daily_weather_data(Airport_Locations, start_date, end_date):
    """
    Fetch daily weather data for each airport listed in the Airport_Locations dataset.

    Parameters:
        airport_locations_df (DataFrame): DataFrame containing airport codes, latitude, and longitude.
        start_date (datetime): Start date for the weather data retrieval.
        end_date (datetime): End date for the weather data retrieval.

    Returns:
        DataFrame: DataFrame containing daily weather data for each airport.
    """
    # Initialize an empty list to store weather data for each airport
    airport_weather_data = []

    # Iterate over each row of the Airport_Locations DataFrame
    for index, row in Airport_Locations.iterrows():
        # Extract airport code, latitude, and longitude from the current row
        airport_code = row['Airport Code']
        latitude = row['Latitude']
        longitude = row['Longitude']

        # Create Point object for the airport location
        airport_point = Point(latitude, longitude)

        # Fetch daily weather data for the airport location
        airport_weather = Daily(airport_point, start=start_date, end=end_date)
        airport_weather = airport_weather.fetch()

        # Add airport code to each daily weather data record
        airport_weather['Airport Code'] = airport_code

        # Append the airport weather data to the list
        airport_weather_data.append(airport_weather)

    # Concatenate the list of DataFrames into a single DataFrame
    airport_weather_df = pd.concat(airport_weather_data)

    return airport_weather_df

# Example usage:
# Set time period
start_date = datetime(2015, 1, 1)
end_date = datetime(2015, 12, 31)

# Fetch daily weather data for each airport in Airport_Locations dataset
airport_weather_dataset = fetch_daily_weather_data(Airport_Locations, start_date, end_date)

# Display the resulting dataset
print(airport_weather_dataset)

            tavg  tmin  tmax  prcp  snow   wdir  wspd  wpgt    pres  tsun  \
2015-01-01   1.7  -2.7   8.3   0.0   0.0  210.0  13.3   NaN  1021.6   NaN   
2015-01-02   5.3   1.7   9.4   0.0   0.0    NaN   4.7   NaN  1025.5   NaN   
2015-01-03   4.0   1.1   5.6  12.7   0.0    NaN   5.4   NaN     NaN   NaN   
2015-01-04   9.8   5.6  19.4   5.1   0.0  209.0  20.2   NaN  1012.5   NaN   
2015-01-05   7.8  -1.6  11.1   0.0   0.0  310.0  23.0   NaN  1025.3   NaN   
...          ...   ...   ...   ...   ...    ...   ...   ...     ...   ...   
2015-12-26   5.5  -1.1   8.3   NaN   NaN   27.0  15.6   NaN  1016.7   NaN   
2015-12-27  -2.5  -3.3  -1.1   0.0   NaN   22.0  27.0   NaN  1027.8   NaN   
2015-12-28  -4.8  -6.1  -2.8   NaN   NaN    NaN  18.5   NaN     NaN   NaN   
2015-12-29  -4.8 -10.6  -1.1   0.0   NaN    NaN   3.7   NaN  1020.5   NaN   
2015-12-30  -2.2  -5.0   0.0   0.0   NaN    NaN   2.6   NaN  1026.1   NaN   

           Airport Code  
2015-01-01          DCA  
2015-01-02          DCA

In [15]:
airport_weather_dataset.head()

Unnamed: 0,tavg,tmin,tmax,prcp,snow,wdir,wspd,wpgt,pres,tsun,Airport Code
2015-01-01,1.7,-2.7,8.3,0.0,0.0,210.0,13.3,,1021.6,,DCA
2015-01-02,5.3,1.7,9.4,0.0,0.0,,4.7,,1025.5,,DCA
2015-01-03,4.0,1.1,5.6,12.7,0.0,,5.4,,,,DCA
2015-01-04,9.8,5.6,19.4,5.1,0.0,209.0,20.2,,1012.5,,DCA
2015-01-05,7.8,-1.6,11.1,0.0,0.0,310.0,23.0,,1025.3,,DCA


In [22]:
airport_weather_dataset.columns

Index(['Date', 'tavg', 'tmin', 'tmax', 'prcp', 'snow', 'wdir', 'wspd', 'wpgt',
       'pres', 'tsun', 'Airport Code'],
      dtype='object')

In [23]:
# Reset the index to convert the date index into a regular column
airport_weather_dataset.reset_index(inplace=True)

# Display the updated airport_weather_dataset DataFrame
print(airport_weather_dataset)

         index       Date  tavg  tmin  tmax  prcp  snow   wdir  wspd  wpgt  \
0            0 2015-01-01   1.7  -2.7   8.3   0.0   0.0  210.0  13.3   NaN   
1            1 2015-01-02   5.3   1.7   9.4   0.0   0.0    NaN   4.7   NaN   
2            2 2015-01-03   4.0   1.1   5.6  12.7   0.0    NaN   5.4   NaN   
3            3 2015-01-04   9.8   5.6  19.4   5.1   0.0  209.0  20.2   NaN   
4            4 2015-01-05   7.8  -1.6  11.1   0.0   0.0  310.0  23.0   NaN   
...        ...        ...   ...   ...   ...   ...   ...    ...   ...   ...   
102481  102481 2015-12-26   5.5  -1.1   8.3   NaN   NaN   27.0  15.6   NaN   
102482  102482 2015-12-27  -2.5  -3.3  -1.1   0.0   NaN   22.0  27.0   NaN   
102483  102483 2015-12-28  -4.8  -6.1  -2.8   NaN   NaN    NaN  18.5   NaN   
102484  102484 2015-12-29  -4.8 -10.6  -1.1   0.0   NaN    NaN   3.7   NaN   
102485  102485 2015-12-30  -2.2  -5.0   0.0   0.0   NaN    NaN   2.6   NaN   

          pres  tsun Airport Code  
0       1021.6   NaN       

In [24]:
airport_weather_dataset.rename(columns={'index': 'Date'}, inplace=True)

In [25]:
#file_path = 'path/to/your/file.csv'  # Replace 'path/to/your/file.csv' with the desired file path and name
import os
os.chdir('/content/drive/MyDrive')
# Write the DataFrame to a CSV file
#df.to_csv(file_path, index=False)
airport_weather_dataset.to_csv('airport_weather_dataset.csv', index=False)