<a href="https://colab.research.google.com/github/mikecinnamon/MLearning/blob/main/Notebooks/airsat.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# [ML-16 ] Example - Airline passenger satisfaction

## Importing the data

In [None]:
import pandas as pd, numpy as np
path = 'https://raw.githubusercontent.com/mikecinnamon/Data/main/'
df = pd.read_csv(path + 'airsat.csv')

## Exploring the data

In [None]:
df.info()

In [None]:
df['sat'].mean().round(3)

## Target vector and features matrix

In [None]:
y = df['sat']
X = df.drop(columns='sat')

## Train-test split

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

## Q1. Random forest model

In [None]:
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(max_depth=5, n_estimators=200, random_state=0)
rf.fit(X_train, y_train)

In [None]:
round(rf.score(X_train, y_train), 3), round(rf.score(X_test, y_test), 3)

## Q2. XGBoost model

In [None]:
from xgboost import XGBClassifier
xgb = XGBClassifier(max_depth=5, n_estimators=200, random_state=0)
xgb.fit(X_train, y_train)

In [None]:
round(xgb.score(X_train, y_train), 3), round(xgb.score(X_test, y_test), 3)

## Q3. Relevant features

In [None]:
print(pd.Series(xgb.feature_importances_, index=X.columns).sort_values(ascending=False).round(3))

In [None]:
print(pd.crosstab(df['business'], df['sat']))

## Q4. MLP model

In [None]:
from keras import Input, models, layers

In [None]:
input_tensor = Input(shape=(22,))

In [None]:
x = layers.Dense(32, activation='relu')(input_tensor)

In [None]:
output_tensor = layers.Dense(2, activation='softmax')(x)

In [None]:
mlp = models.Model(input_tensor, output_tensor)

In [None]:
mlp.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['acc'])

In [None]:
mlp.fit(X_train, y_train, epochs=50, verbose=0);

In [None]:
round(mlp.evaluate(X_test, y_test, verbose=0)[1], 3)

## Q5. Multilayer perceptron model (normalized data)

In [None]:
def normalize(x):
    return (x - x.min())/(x.max() - x.min())

In [None]:
XN = X.apply(normalize)

In [None]:
XN_train, XN_test = train_test_split(XN, test_size=0.2, random_state=0)

In [None]:
mlp = models.Model(input_tensor, output_tensor)
mlp.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['acc'])
mlp.fit(XN_train, y_train, epochs=50, verbose=0);
round(mlp.evaluate(XN_test, y_test, verbose=0)[1], 3)