# Testing 

In [5]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer

### Preprocessing data

In [6]:
#Loading data
df = pd.read_csv('test.csv')
#Delete unnecessary columns
df.drop(columns=['Unnamed: 0', 'id'], inplace=True, errors='ignore')
#Remove all null-values rows or fill in with mean
df = df.dropna(axis=0)
# imputer = SimpleImputer(strategy='median')
# missing_cols = df.columns[df.isnull().any()]
# print(missing_cols)
# print(df[missing_cols].dtypes)
# df[missing_cols] = imputer.fit_transform(df[missing_cols])
# print(df.isnull().sum())
#Encoding categorical features
le = LabelEncoder()
df['satisfaction'] = le.fit_transform(df['satisfaction'])

class_map = {'Eco': 1, 'Eco Plus': 2, 'Business': 3}
df['Class'] = df['Class'].map(class_map)

df = pd.get_dummies(df, columns=['Gender', 'Customer Type', 'Type of Travel'], drop_first=True)

### Generating X, y

In [7]:
# Splitting data into training and testing sets
X = df.drop(columns= "satisfaction")
scaler = MinMaxScaler()
X = scaler.fit_transform(X)

y = df['satisfaction']

X_train, X_test, y_train, y_test = train_test_split(X, y,test_size = 0.2, random_state=42)

#Logistic Regression model with Sklearn

classifier = LogisticRegression()
classifier.fit(X_train, y_train)

y_pred = classifier.predict(X_test)

print('Accuracy: ', accuracy_score(y_pred, y_test))


Accuracy:  0.8731415331145008


In [8]:
#Train model with gradient descent (from scratch)
X_train = np.concatenate((np.ones((X_train.shape[0], 1)), X_train), axis=1)
X_test = np.concatenate((np.ones((X_test.shape[0], 1)), X_test), axis=1)

def sigmoid(s):
    return 1 / (1 + np.exp(-s))

def gradient_descent(X, y, w, eta):
    for _ in range(1000):
        for i in range(X.shape[0]):
            xi = X[i].reshape(-1, 1)
            yi = y[i]
            pi = sigmoid(w.T @ xi)
            w -= eta * (pi - yi) * xi
    return w

num_features = X_train.shape[1] 
w = np.random.randn(num_features, 1)
eta = 0.01
w = gradient_descent(X_train, y_train.values, w, eta)

y_pred_gd = sigmoid(X_test @ w).flatten()
y_pred_gd = (y_pred_gd >= 0.5).astype(int)
print('Accuracy (Gradient Descent): ', accuracy_score(y_pred_gd, y_test))

Accuracy (Gradient Descent):  0.8743000579262405
