# Gate Assignment 

## Imports


In [11]:
import pandas as pd
import numpy as np
import pickle
import joblib

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.impute import SimpleImputer

## Load Data 

In [4]:
df = pd.read_csv("simulated_data/TAS_Airport_Data_2024_final.csv")
df

Unnamed: 0,FlightID,Airline,DepartureAirport,ArrivalAirport,OriginAirport,Gate,ScheduledTime,ScheduledArrivalTime,FlightDuration,BaggageHandlers,...,GateAvailability,GateType,AircraftTurnaroundTime,CheckInQueueTime,SecurityWaitTime,LoungeUsage,OnTimePerformance,BaggageClaimBelts,CustomsOfficers,ImmigrationOfficers
0,HY20240101_001D,HY,TAS,FRA,,C18,2024-01-01 22:11:00,,135,4,...,Available,International,57,7,27,No,Delayed,2,9,5
1,KE20240101_002D,KE,TAS,KUL,,C3,2024-01-01 08:02:00,,521,4,...,Available,International,57,7,27,No,Delayed,2,9,5
2,KE20240101_003D,KE,TAS,IST,,C3,2024-01-01 20:33:00,,291,4,...,Available,International,57,7,27,No,Delayed,2,9,5
3,HY20240101_004D,HY,TAS,SIN,,C3,2024-01-01 20:03:00,,462,2,...,Available,International,57,7,27,No,Delayed,2,9,5
4,HY20240101_005A,HY,,,FRA,C8,,2024-01-01 11:50:00,192,4,...,Available,International,57,7,27,No,Delayed,2,9,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32935,TK20241231_086A,TK,,,DXB,B1,,2024-12-31 16:50:00,119,6,...,Available,International,57,7,27,No,Delayed,2,9,5
32936,HY20241231_087A,HY,,,DXB,B16,,2024-12-31 22:10:00,591,2,...,Available,International,57,7,27,No,Delayed,2,9,5
32937,HY20241231_088A,HY,,,LHR,C8,,2024-12-31 19:04:00,103,4,...,Available,International,57,7,27,No,Delayed,2,9,5
32938,TK20241231_089A,TK,,,DEL,B20,,2025-01-01 06:42:00,596,4,...,Available,International,57,7,27,No,Delayed,2,9,5


## Feature Engineering

In [5]:


# Handle datetime features
df['ScheduledTime'] = pd.to_datetime(df['ScheduledTime'], errors='coerce')
df['ScheduledArrivalTime'] = pd.to_datetime(df['ScheduledArrivalTime'], errors='coerce')
df['DepartureHour'] = df['ScheduledTime'].dt.hour
df['ArrivalHour'] = df['ScheduledArrivalTime'].dt.hour
df['DepartureDayOfWeek'] = df['ScheduledTime'].dt.dayofweek
df['ArrivalDayOfWeek'] = df['ScheduledArrivalTime'].dt.dayofweek

# Fill missing categorical values
categorical_cols = [
    "Airline", "DepartureAirport", "ArrivalAirport", "OriginAirport", "AircraftType",
    "WeatherCondition", "ReasonForDelay", "GateAvailability", "GateType", "LoungeUsage"
]
for col in categorical_cols:
    df[col] = df[col].fillna("Unknown")

# Impute numeric missing values
numeric_cols = df.select_dtypes(include=np.number).columns.tolist()
df[numeric_cols] = df[numeric_cols].fillna(df[numeric_cols].median())

# Label encode categorical variables
label_encoders = {}
for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Encode target variable
gate_encoder = LabelEncoder()
df['Gate_encoded'] = gate_encoder.fit_transform(df['Gate'])

# New engineered features
df['PassengerDensity'] = df['Passengers'] / (df['BaggageHandlers'] + 1)
df['TotalServiceCrew'] = df['GroundCrew'] + df['FuelTrucks'] + df['CleaningCrew']
df['DelayImpact'] = df['RunwayDelay'] + df['DepartureDelay'] + df['ArrivalDelay']


## Prepare Features and Target

In [6]:
features = [
    "Airline", "DepartureAirport", "ArrivalAirport", "OriginAirport", "AircraftType",
    "FlightDuration", "BaggageHandlers", "GroundCrew", "FuelTrucks", "CleaningCrew",
    "Passengers", "SecurityChecks", "WeatherCondition", "PreviousFlightDelay",
    "GateAvailability", "GateType", "AircraftTurnaroundTime", "CheckInQueueTime",
    "SecurityWaitTime", "LoungeUsage", "BaggageClaimBelts", "CustomsOfficers",
    "ImmigrationOfficers", "DepartureHour", "ArrivalHour", "DepartureDayOfWeek",
    "ArrivalDayOfWeek", "PassengerDensity", "TotalServiceCrew", "DelayImpact"
]

X = df[features]
y = df["Gate_encoded"]

# Optional scaling (not critical for Random Forest, but good practice)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

## Train-Test Split  

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42, stratify=y)

## Model Training

In [8]:
model = RandomForestClassifier(n_estimators=300, max_depth=20, random_state=42)
model.fit(X_train, y_train)

## Evaluation 

In [9]:
y_pred = model.predict(X_test)

print("\n--- Gate Assignment Model ---\n")
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))


--- Gate Assignment Model ---

              precision    recall  f1-score   support

           0       0.02      0.02      0.02       107
           1       0.01      0.01      0.01       108
           2       0.01      0.01      0.01       124
           3       0.03      0.03      0.03       120
           4       0.03      0.03      0.03       107
           5       0.02      0.02      0.02       108
           6       0.02      0.02      0.02       103
           7       0.01      0.01      0.01       113
           8       0.01      0.01      0.01       115
           9       0.04      0.05      0.04       109
          10       0.03      0.04      0.04       118
          11       0.00      0.00      0.00       107
          12       0.01      0.01      0.01       111
          13       0.02      0.02      0.02       113
          14       0.01      0.01      0.01       103
          15       0.00      0.00      0.00       103
          16       0.00      0.00      0.00      

In [14]:
import joblib

joblib.dump(model, "gate_assignment.joblib")

['gate_assignment.joblib']