In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder

In [2]:
# Load the dataset
airline = pd.read_csv("Invistico_Airline.csv")

In [3]:
# Data Manipulation
age_bins = [-1, 14, 24, 64, np.inf]
age_labels = ["Children (0-14 years)", "Youth (15-24 years)",
            "Adults (25-64 years)", "Seniors (65 years and over)"]
airline["Age Group"] = pd.cut(airline.Age, bins=age_bins, labels=age_labels)


In [4]:
flight_bins = [-1, 999, 2999, np.inf]
flight_labels = ["Short", "Medium", "Long"]
airline["Flight Distance Category"] = pd.cut(airline["Flight Distance"], bins=flight_bins, labels=flight_labels, include_lowest=True)


In [5]:
airline = airline.rename(columns={
    "satisfaction": "Satisfaction",
    "Customer Type": "Customer Type",
    "Type of Travel": "Type of Travel",
    "Flight Distance": "Flight Distance",
    "Seat comfort": "Seat Comfort",
    "Departure/Arrival time convenient": "Departure/Arrival Time Convenient",
    "Food and drink": "Food and Drink",
    "Gate location": "Gate and Location",
    "Inflight wifi service": "Inflight Wifi Service",
    "Inflight entertainment": "Inflight Entertainment",
    "Online support": "Online Support",
    "Ease of Online booking": "Ease of Online Booking",
    "On-board service": "On-board Service",
    "Leg room service": "Leg Room Service",
    "Baggage handling": "Baggage Handling",
    "Check-in service": "Check-in Service",
    "Cleanliness": "Cleanliness",
    "Online boarding": "Online Boarding",
    "Departure Delay in Minutes": "Departure Delay in Minutes",
    "Arrival Delay in Minutes": "Arrival Delay in Minutes"
})

In [6]:
airline = airline.drop(columns=["Flight Distance"])
airline = airline.dropna()

In [7]:
le = LabelEncoder()
airline[["Gender", "Customer Type", "Type of Travel", "Class", "Age Group", "Flight Distance Category"]] = airline[[
    "Gender", "Customer Type", "Type of Travel", "Class", "Age Group", "Flight Distance Category"]].apply(le.fit_transform)


In [8]:
# Split the data into training and testing sets
X = airline.drop("Satisfaction", axis=1)
y = airline["Satisfaction"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [9]:
# Create a random forest classifier with 100 trees
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

In [10]:
# Train the model on the training data
rf_model.fit(X_train, y_train)

In [11]:
# Use the model to make predictions on the test data
y_pred = rf_model.predict(X_test)

In [12]:
# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
print("Random Forest accuracy:", accuracy)

Random Forest accuracy: 0.9585682292068886
