In [1]:
# Step 1: Import Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report


In [3]:
# Step 2: Load Dataset
df = pd.read_csv("cleaned_flight_data.csv")  

In [5]:
# Step 3: Define Features and Target
X = df.drop("Flight_Cancelled", axis=1)
y = df["Flight_Cancelled"]

In [7]:
# Step 4: Identify Categorical and Numerical Columns
categorical_cols = X.select_dtypes(include=['object']).columns.tolist()
numerical_cols = X.select_dtypes(include=['int64', 'float64']).columns.tolist()

In [9]:
# Step 5: Preprocessing Pipeline
preprocessor = ColumnTransformer(transformers=[
    ("num", StandardScaler(), numerical_cols),
    ("cat", OneHotEncoder(handle_unknown='ignore'), categorical_cols)
])

In [11]:
# Step 6: Create Pipeline with Logistic Regression
pipeline = Pipeline(steps=[
    ("preprocessor", preprocessor),
    ("classifier", LogisticRegression(max_iter=1000))
])

In [13]:
# Step 7: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [15]:
# Step 8: Train the Model
pipeline.fit(X_train, y_train)

In [17]:
# Step 9: Make Predictions
y_pred = pipeline.predict(X_test)

In [19]:
# Step 10: Evaluation Metrics
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred))

Classification Report:
               precision    recall  f1-score   support

       False       0.71      0.61      0.66       187
        True       0.84      0.89      0.86       413

    accuracy                           0.80       600
   macro avg       0.78      0.75      0.76       600
weighted avg       0.80      0.80      0.80       600

Accuracy: 0.8033333333333333
Precision: 0.835990888382688
Recall: 0.8886198547215496
F1 Score: 0.8615023474178404


In [21]:
# Step 11: Export Processed Data
# Apply transformations to full dataset and export if needed
X_transformed = preprocessor.fit_transform(X)
transformed_df = pd.DataFrame(X_transformed.toarray() if hasattr(X_transformed, 'toarray') else X_transformed)
transformed_df["Flight_Cancelled"] = y.values
transformed_df.to_csv("chinmayee_feature_engineered_data.csv", index=False)