In [1]:
import pandas as pd, numpy as np, seaborn as sns, matplotlib.pyplot as plt, warnings
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler , OneHotEncoder, OrdinalEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
warnings.filterwarnings("ignore")
pd.set_option('display.max_columns', None)

In [2]:
import tensorflow as tf

In [3]:
from tensorflow.keras.layers import Normalization, Dense, InputLayer
from tensorflow.keras.losses import MeanSquaredError, Huber, MeanAbsoluteError
from tensorflow.keras.metrics import RootMeanSquaredError

In [4]:
df = pd.read_csv("https://raw.githubusercontent.com/ManonYa09/Statistics_with_Python_G7/main/Dataset/YdGwfiz4Tp2RsH4s-E6d5g_fe6fe3c8cf0d49028b6706bf33f91df1_Invistico_Airline.csv")

In [5]:
target = 'satisfaction'

In [6]:
cat_columns = []
num_column = []
for column in df.columns:
    if column == target:
        continue
    elif df[column].nunique() <= 3:
        cat_columns.append(column)
    else:
        num_column.append(column)

In [7]:
from sklearn.impute import SimpleImputer

In [8]:
numeric_transformer = Pipeline([
    ('imputer_num', SimpleImputer(strategy='mean')),
    ('Scaler', StandardScaler())
])

In [9]:
categorical_transformer = Pipeline([
    ('imputer_cat', SimpleImputer(strategy='most_frequent')),
    ('Encoder', OneHotEncoder())
])

In [10]:
preprocessor = ColumnTransformer(transformers=[
    ('num', numeric_transformer, num_column),
    ('cat', categorical_transformer, cat_columns)
])

In [11]:
x = df.drop(columns=target)
y = df[target]

In [12]:
y.value_counts()

satisfaction
satisfied       71087
dissatisfied    58793
Name: count, dtype: int64

In [13]:
X_transformed = preprocessor.fit_transform(x)

In [14]:
X_transformed

array([[ 1.69135082, -1.6711027 , -2.03779055, ...,  0.        ,
         1.        ,  0.        ],
       [ 0.50081965,  0.46985247, -2.03779055, ...,  1.        ,
         0.        ,  0.        ],
       [-1.61568021,  0.15245757, -2.03779055, ...,  0.        ,
         1.        ,  0.        ],
       ...,
       [ 1.9559133 ,  0.32965349,  0.11586891, ...,  0.        ,
         1.        ,  0.        ],
       [ 1.75749144,  0.45622201,  0.11586891, ...,  0.        ,
         1.        ,  0.        ],
       [-0.09444593,  2.26420462,  0.11586891, ...,  0.        ,
         1.        ,  0.        ]])

In [15]:
y = y.replace({'satisfied':1, 'dissatisfied':0})

In [16]:
X_train, X_test, y_train, y_test = train_test_split(X_transformed, y, test_size=0.2, random_state=42)


In [17]:
input_shape = X_train.shape[1]

In [18]:
  # Determine input shape dynamically
model = tf.keras.Sequential([
    InputLayer(input_shape=(input_shape,)),
    Dense(128, activation="relu"),
    Dense(64, activation="relu"),
    Dense(32, activation="sigmoid"),
    Dense(16, activation="sigmoid"),
    Dense(2, activation='sigmoid')
]) #relu : output >0

In [19]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [None]:
model.fit(X_train, y_train, epochs=300, validation_data=(X_test, y_test))


Epoch 1/300
[1m3247/3247[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 5ms/step - accuracy: 0.8794 - loss: 0.2824 - val_accuracy: 0.9331 - val_loss: 0.1550
Epoch 2/300
[1m3247/3247[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 5ms/step - accuracy: 0.9390 - loss: 0.1441 - val_accuracy: 0.9434 - val_loss: 0.1299
Epoch 3/300
[1m3247/3247[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 5ms/step - accuracy: 0.9466 - loss: 0.1251 - val_accuracy: 0.9480 - val_loss: 0.1202
Epoch 4/300
[1m3247/3247[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 5ms/step - accuracy: 0.9502 - loss: 0.1150 - val_accuracy: 0.9518 - val_loss: 0.1108
Epoch 5/300
[1m3247/3247[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 5ms/step - accuracy: 0.9528 - loss: 0.1085 - val_accuracy: 0.9474 - val_loss: 0.1177
Epoch 6/300
[1m3247/3247[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 5ms/step - accuracy: 0.9546 - loss: 0.1041 - val_accuracy: 0.9534 - val_loss: 0.1069
Epoc