# Airline Passenger Satisfaction

## Context

This dataset contains an airline passenger satisfaction survey. What factors are highly correlated to a satisfied (or dissatisfied) passenger? Can you predict passenger satisfaction?

## Content

* Gender: Gender of the passengers (Female, Male)

* Customer Type: The customer type (Loyal customer, disloyal customer)

* Age: The actual age of the passengers

* Type of Travel: Purpose of the flight of the passengers (Personal Travel, Business Travel)

* Class: Travel class in the plane of the passengers (Business, Eco, Eco Plus)

* Flight distance: The flight distance of this journey

* Inflight wifi service: Satisfaction level of the inflight wifi service (0:Not Applicable;1-5)

* Departure/Arrival time convenient: Satisfaction level of Departure/Arrival time convenient

* Ease of Online booking: Satisfaction level of online booking

* Gate location: Satisfaction level of Gate location

* Food and drink: Satisfaction level of Food and drink

* Online boarding: Satisfaction level of online boarding

* Seat comfort: Satisfaction level of Seat comfort

* Inflight entertainment: Satisfaction level of inflight entertainment

* On-board service: Satisfaction level of On-board service

* Leg room service: Satisfaction level of Leg room service

* Baggage handling: Satisfaction level of baggage handling

* Check-in service: Satisfaction level of Check-in service

* Inflight service: Satisfaction level of inflight service

* Cleanliness: Satisfaction level of Cleanliness

* Departure Delay in Minutes: Minutes delayed when departure

* Arrival Delay in Minutes: Minutes delayed when Arrival

* Satisfaction: Airline satisfaction level(Satisfaction, neutral or dissatisfaction)

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=RuntimeWarning)

In [None]:
from sklearn.preprocessing import LabelEncoder
def Encode(df,col):
    
    encode = LabelEncoder()
    encode.fit(df[col])
    new_column = encode.transform(df[col])
    test = np.unique(new_column)
    result = encode.inverse_transform(test)
    for i in range(len(test)):
        print(result[i],":".format(),test[i])
    print("=======================================")
    return new_column

In [None]:
df_train = pd.read_csv("/kaggle/input/airline-passenger-satisfaction/train.csv")
df_test  = pd.read_csv("/kaggle/input/airline-passenger-satisfaction/test.csv")
df_train = df_train.drop(["Unnamed: 0","id"],axis=1)
df_test  = df_test.drop(["Unnamed: 0","id"],axis=1)
df =  df_train.append(df_test)

In [None]:
df_train.dropna(inplace=True)
df_test.dropna(inplace=True)
df.dropna(inplace=True)

In [None]:
df.head()

In [None]:
df.info()

In [None]:
fig = px.sunburst(df, path=['Type of Travel','Class', 'Customer Type'],color_continuous_scale='RdBu')
fig.show()

In [None]:
object_attribute = []
for col in df_train.columns:
    if df_train[col].dtype=="object":
        object_attribute.append(col)
object_attribute

In [None]:
for col in object_attribute:
    if df_train[col].dtype=="object":
        df_train[col] = Encode(df_train,col)
    else:
        print("Encoded")
        break

In [None]:
for col in object_attribute:
    if df_test[col].dtype=="object":
        df_test[col] = Encode(df_test,col)
    else:
        print("Encoded")
        break

In [None]:
for col in object_attribute:
    if df[col].dtype=="object":
        df[col] = Encode(df,col)
    else:
        print("Encoded")
        break

In [None]:
df.head()

In [None]:
plt.figure(figsize=(10,10))
plt.subplot(2,2,1)
sns.countplot(data=df, x="Class" , hue="satisfaction", palette="afmhot")
plt.title("Count Plot of Class")
plt.legend()

plt.subplot(2,2,2)
sns.countplot(data=df, x="Gate location" , hue="satisfaction", palette="Paired")
plt.title("Count Plot of Gate location")
plt.legend()

plt.subplot(2,2,3)
sns.countplot(data=df, x="Customer Type" , hue="satisfaction", palette="CMRmap_r")
plt.title("Count Plot of Customer Type")
plt.legend()

plt.subplot(2,2,4)
sns.countplot(data=df, x="Checkin service" , hue="satisfaction", palette="seismic_r")
plt.title("Count Plot of Checkin service")
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(10,10))
plt.subplot(2,2,1)
sns.kdeplot(df.loc[df["satisfaction"]==1]["Age"],alpha=0.5,label="satisfaction",shade=True)
sns.kdeplot(df.loc[df["satisfaction"]==0]["Age"],alpha=0.5,label="neutral or dissatisfied",shade=True)
plt.title("KDE Plot of Age")
plt.legend()

plt.subplot(2,2,2)
sns.kdeplot(df.loc[df["satisfaction"]==1]["Flight Distance"],alpha=0.5,label="satisfaction",shade=True)
sns.kdeplot(df.loc[df["satisfaction"]==0]["Flight Distance"],alpha=0.5,label="neutral or dissatisfied",shade=True)
plt.title("KDE Plot of Flight Distance")
plt.legend()

plt.subplot(2,2,3)
sns.kdeplot(df.loc[df["satisfaction"]==1]["Departure Delay in Minutes"],alpha=0.5,label="satisfaction",shade=True)
sns.kdeplot(df.loc[df["satisfaction"]==0]["Departure Delay in Minutes"],alpha=0.5,label="neutral or dissatisfied",shade=True)
plt.title("KDE Plot of Departure Delay in Minutes")
plt.legend()

plt.subplot(2,2,4)
sns.kdeplot(df.loc[df["satisfaction"]==1]["Arrival Delay in Minutes"],alpha=0.5,label="satisfaction",shade=True)
sns.kdeplot(df.loc[df["satisfaction"]==0]["Arrival Delay in Minutes"],alpha=0.5,label="neutral or dissatisfied",shade=True)
plt.title("KDE Plot of Arrival Delay in Minutes")
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(10,6))
plt.subplot(1,2,1)
plt.hist2d(x=df["Age"],y=df["Flight Distance"])
plt.xlabel("Age")
plt.ylabel("Flight Distance")

plt.subplot(1,2,2)
plt.hist2d(x=df["Departure Delay in Minutes"],y=df["Arrival Delay in Minutes"])
plt.xlabel("Departure Delay in Minutes")
plt.ylabel("Arrival Delay in Minutes")
plt.tight_layout()
plt.show()

In [None]:
ax = plt.figure(figsize=(12,10))
sns.heatmap(df.corr(),annot=True,cmap="coolwarm",fmt='.0%')
plt.show()

In [None]:
ax = plt.figure(figsize=(14,6))
plt.subplot(2,2,1)
df.groupby(['satisfaction'])['Age'].mean().plot(kind="line",color="C0",marker="o")
plt.ylabel("Avg of Age")
plt.grid(True)

plt.subplot(2,2,2)
df.groupby(['satisfaction'])['Flight Distance'].mean().plot(kind="line",color="C0",marker="o")
plt.ylabel("Avg of Flight Distance")
plt.grid(True)

plt.subplot(2,2,3)
df.groupby(['satisfaction'])['Departure Delay in Minutes'].mean().plot(kind="line",color="r",marker="o")
plt.ylabel("Avg of Departure Delay in Minutes ")
plt.grid(True)

plt.subplot(2,2,4)
df.groupby(['satisfaction'])['Arrival Delay in Minutes'].mean().plot(kind="line",color="r",marker="o")
plt.ylabel("Avg of Arrival Delay in Minutes ")
plt.grid(True)
plt.show()

In [None]:
ax = plt.figure(figsize=(14,6))
plt.subplot(2,2,1)
df_train.groupby(['Class','Gender'])['satisfaction'].mean().plot(kind="line",color="C0",marker="o")
plt.ylabel("Avg of satisfaction")
plt.grid(True)

plt.subplot(2,2,2)
df_train.groupby(['Class','Customer Type'])['satisfaction'].mean().plot(kind="line",color="C0",marker="o")
plt.ylabel("Avg of satisfaction")
plt.grid(True)

plt.subplot(2,2,(3,4))
df_train.groupby(['Class','Type of Travel'])['satisfaction'].mean().plot(kind="line",color="r",marker="o")
plt.ylabel("Avg of satisfaction")
plt.grid(True)

In [None]:
X_train = df_train.drop("satisfaction",axis=1)
X_test  = df_test.drop("satisfaction",axis=1)
y_train = df_train["satisfaction"]
y_test  = df_test["satisfaction"]

In [None]:
X = df.drop("satisfaction",axis=1)
y = df["satisfaction"]

In [None]:
from sklearn.metrics import plot_roc_curve,plot_confusion_matrix,accuracy_score,confusion_matrix
def model(model):
    global X,y,X_train, X_test, y_train, y_test
    print(type(model).__name__)
    pred = model.predict(X_test)
    acs = accuracy_score(y_test,pred)
    print("Accuracy Score             :",acs)
    
    plot_confusion_matrix(model,X,y,cmap="cividis")
    plt.title("Confusion Matrix")
    plt.show()

In [None]:
def Check(list_of_disp):
    ax = plt.gca()
    for i in list_of_disp: 
        i.plot(ax=ax)
    plt.plot([0,1],[0,1],"--",color="k",alpha=0.7)
    plt.show()

In [None]:
from sklearn.model_selection import cross_val_score
def CrossValidationScore(model_list):
    global X,y
    mean_cross_val_score = []
    for i in model_list:
        scores = cross_val_score(i, X, y, cv=5)
        mean_cross_val_score.append(scores.mean())
        
    ax = plt.figure()
    plt.plot(model_list, mean_cross_val_score, linewidth=4, markersize=18,
             olor="c",marker="s",label="Cross-Validation Score",linestyle='-.')
    for i in range(len(acc_score)):
        plt.text(x=model_list[i], y=mean_cross_val_score[i], s=mean_cross_val_score[i].round(2), fontsize=17)
    plt.legend()
    ax.set_facecolor("#e1ddbf")
    plt.xlabel("Model's Name")
    plt.ylabel("Cross-Validation Score")
    plt.title("Cross-Validation Score of Models",fontsize=20)
    plt.legend(loc="upper left")
    plt.show()

In [None]:
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression(max_iter=10000)
lr.fit(X_train,y_train)
print("Model Installed!")
print("Please Wait for Results..")
model(lr)

lr_disp = plot_roc_curve(lr, X_test, y_test)
plt.plot([0,1],[0,1],"--",color="k",alpha=0.7)
plt.show()

In [None]:
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()
gnb.fit(X_train,y_train)
print("Model Installed!")
print("Please Wait for Results..")
model(gnb)

gnb_disp = plot_roc_curve(gnb, X_test, y_test)
plt.plot([0,1],[0,1],"--",color="k",alpha=0.7)
plt.show()

In [None]:
from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier()
rfc.fit(X_train,y_train)
print("Model Installed!")
print("Please Wait for Results..")
model(rfc)

rfc_disp = plot_roc_curve(rfc, X_test, y_test)
plt.plot([0,1],[0,1],"--",color="k",alpha=0.7)
plt.show()

In [None]:
from xgboost import XGBClassifier
xgb = XGBClassifier()
xgb.fit(X_train,y_train)
print("Model Installed!")
print("Please Wait for Results..")
model(xgb)

xgb_disp = plot_roc_curve(rfc, X_test, y_test)
plt.plot([0,1],[0,1],"--",color="k",alpha=0.7)
plt.show()

In [None]:
list_of_disp = [rfc_disp,lr_disp,gnb_disp,xgb_disp]
Check(list_of_disp)