# Main Project File
This is the main file for the project. It contains the core analysis and model development workflow.


In [1]:
# Standard library imports
import os

# Data manipulation and analysis
import numpy as np
import pandas as pd

# Data visualization
import matplotlib.pyplot as plt

# Custom helper classes
from modelhelper import ModelHelper
from helperclasses import DataFetcherKAGGLE, DataFetcherNOAA

# Scikit-learn preprocessing
from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer

# Scikit-learn models
from sklearn.ensemble import (
    RandomForestClassifier,
    RandomForestRegressor
)
from sklearn.linear_model import (
    LogisticRegression,
    LinearRegression
)
from sklearn.svm import SVC, SVR

# Scikit-learn metrics
from sklearn.metrics import (
    accuracy_score,
    mean_squared_error,
    r2_score
)

In [7]:
data_fetcher = DataFetcherKAGGLE()
weatherFetcher = DataFetcherNOAA()
modelHelper = ModelHelper()

In [None]:
#Import the data and target series for the flights
flight_data = data_fetcher.fetch_flight_dataset()
display(flight_data.info())
display(flight_data.head())


Preparing flight dataset...
Optimizing flight dataset data types...
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3000000 entries, 0 to 2999999
Data columns (total 32 columns):
 #   Column                   Dtype         
---  ------                   -----         
 0   FL_DATE                  datetime64[ns]
 1   AIRLINE                  category      
 2   AIRLINE_DOT              category      
 3   AIRLINE_CODE             category      
 4   DOT_CODE                 category      
 5   FL_NUMBER                int64         
 6   ORIGIN                   category      
 7   ORIGIN_CITY              category      
 8   DEST                     category      
 9   DEST_CITY                category      
 10  CRS_DEP_TIME             int64         
 11  DEP_TIME                 float64       
 12  DEP_DELAY                float64       
 13  TAXI_OUT                 float64       
 14  WHEELS_OFF               float64       
 15  WHEELS_ON                float64       
 16  TAXI

None

Unnamed: 0,FL_DATE,AIRLINE,AIRLINE_DOT,AIRLINE_CODE,DOT_CODE,FL_NUMBER,ORIGIN,ORIGIN_CITY,DEST,DEST_CITY,...,DIVERTED,CRS_ELAPSED_TIME,ELAPSED_TIME,AIR_TIME,DISTANCE,DELAY_DUE_CARRIER,DELAY_DUE_WEATHER,DELAY_DUE_NAS,DELAY_DUE_SECURITY,DELAY_DUE_LATE_AIRCRAFT
0,2019-01-09,United Air Lines Inc.,United Air Lines Inc.: UA,UA,19977,1562,FLL,"Fort Lauderdale, FL",EWR,"Newark, NJ",...,False,186.0,176.0,153.0,1065.0,,,,,
1,2022-11-19,Delta Air Lines Inc.,Delta Air Lines Inc.: DL,DL,19790,1149,MSP,"Minneapolis, MN",SEA,"Seattle, WA",...,False,235.0,236.0,189.0,1399.0,,,,,
2,2022-07-22,United Air Lines Inc.,United Air Lines Inc.: UA,UA,19977,459,DEN,"Denver, CO",MSP,"Minneapolis, MN",...,False,118.0,112.0,87.0,680.0,,,,,
3,2023-03-06,Delta Air Lines Inc.,Delta Air Lines Inc.: DL,DL,19790,2295,MSP,"Minneapolis, MN",SFO,"San Francisco, CA",...,False,260.0,285.0,249.0,1589.0,0.0,0.0,24.0,0.0,0.0
4,2020-02-23,Spirit Air Lines,Spirit Air Lines: NK,NK,20416,407,MCO,"Orlando, FL",DFW,"Dallas/Fort Worth, TX",...,False,181.0,182.0,153.0,985.0,,,,,


In [4]:
# Get number of unique destination cities
num_dest_cities = flight_data['DEST_CITY'].nunique()
print(f"Number of unique destination cities: {num_dest_cities}")

Number of unique destination cities: 373


In [5]:
weatherFetcher.clear_noaa_data()
weatherFetcher.fetch_noaa_data()

Fetching NOAA data from NOAA API...
<Response [200]>


Unnamed: 0,date,datatype,station,attributes,value
0,2025-01-01,AWND,GHCND:USW00024127,",,W,",43
1,2025-01-01,PGTM,GHCND:USW00024127,",,W,",1556
2,2025-01-01,PRCP,GHCND:USW00024127,"T,,W,2400",0
3,2025-01-01,SNOW,GHCND:USW00024127,"T,,D,",0
4,2025-01-01,SNWD,GHCND:USW00024127,",,D,2400",0
5,2025-01-01,TAVG,GHCND:USW00024127,"H,,S,",2
6,2025-01-01,TMAX,GHCND:USW00024127,",,W,2400",67
7,2025-01-01,TMIN,GHCND:USW00024127,",,W,2400",-32
8,2025-01-01,WDF2,GHCND:USW00024127,",,W,",190
9,2025-01-01,WDF5,GHCND:USW00024127,",,W,",200
