In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score


In [2]:
train =  pd.read_csv('../train.csv')
test =  pd.read_csv('../test.csv')

print("Train data shape:", train.shape)
print("Test data shape:", test.shape)

train.drop(columns=['Unnamed: 0','id'],inplace = True)
test.drop(columns=['Unnamed: 0','id'],inplace = True)

Train data shape: (103904, 25)
Test data shape: (25976, 25)


In [3]:
numerical_columns = ['Age', 'Flight Distance', 'Departure Delay in Minutes','Arrival Delay in Minutes']
nominal_columns = ['Gender', 'Customer Type','Type of Travel', 'Class','satisfaction']

In [4]:
# drop rows with nulls values
train = train.dropna()

# encode nominal features
train_mappings = {}

for col in nominal_columns:
    encoder = LabelEncoder()
    train[col] = encoder.fit_transform(train[col])
    mapping = {category: label for category, label in zip(encoder.classes_, encoder.transform(encoder.classes_))}
    train_mappings[col] = mapping

print(train_mappings)


# scaling numerical features
for col in numerical_columns:
    scaler = StandardScaler()
    train[col] = scaler.fit_transform(train[[col]])

{'Gender': {'Female': 0, 'Male': 1}, 'Customer Type': {'Loyal Customer': 0, 'disloyal Customer': 1}, 'Type of Travel': {'Business travel': 0, 'Personal Travel': 1}, 'Class': {'Business': 0, 'Eco': 1, 'Eco Plus': 2}, 'satisfaction': {'neutral or dissatisfied': 0, 'satisfied': 1}}


In [5]:
train

Unnamed: 0,Gender,Customer Type,Age,Type of Travel,Class,Flight Distance,Inflight wifi service,Departure/Arrival time convenient,Ease of Online booking,Gate location,...,Inflight entertainment,On-board service,Leg room service,Baggage handling,Checkin service,Inflight service,Cleanliness,Departure Delay in Minutes,Arrival Delay in Minutes,satisfaction
0,1,0,-1.745542,1,2,-0.731305,3,4,3,1,...,5,4,3,4,4,5,5,0.268966,0.072905,0
1,1,1,-0.951526,0,0,-0.956916,3,2,3,3,...,1,1,5,3,1,4,1,-0.360682,-0.237184,0
2,0,0,-0.885358,0,0,-0.047454,2,2,2,2,...,5,4,3,4,4,4,5,-0.386917,-0.392229,1
3,0,0,-0.951526,0,0,-0.629028,2,5,5,5,...,2,2,5,3,1,4,2,-0.098328,-0.159662,0
4,1,0,1.430521,0,0,-0.977973,3,3,3,3,...,3,3,4,4,3,3,3,-0.386917,-0.392229,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
103899,0,1,-1.083862,0,1,-1.000033,2,1,2,3,...,2,3,1,4,2,3,2,-0.308211,-0.392229,0
103900,1,0,0.636505,0,0,1.160818,4,4,4,4,...,5,5,5,5,5,5,4,-0.386917,-0.392229,1
103901,1,1,-0.620686,0,0,0.807862,1,1,1,3,...,4,3,2,4,5,5,4,-0.203270,-0.030458,0
103902,0,1,-1.150030,0,1,-0.189839,1,1,1,5,...,1,4,5,1,5,4,1,-0.386917,-0.392229,0


In [6]:
# drop rows with nulls values
test = test.dropna()

# encode nominal features
test_mappings = {}

for col in nominal_columns:
    encoder = LabelEncoder()
    test[col] = encoder.fit_transform(test[col])
    mapping = {category: label for category, label in zip(encoder.classes_, encoder.transform(encoder.classes_))}
    test_mappings[col] = mapping

print(test_mappings)


# scaling numerical features
for col in numerical_columns:
    scaler = StandardScaler()
    test[col] = scaler.fit_transform(test[[col]])



{'Gender': {'Female': 0, 'Male': 1}, 'Customer Type': {'Loyal Customer': 0, 'disloyal Customer': 1}, 'Type of Travel': {'Business travel': 0, 'Personal Travel': 1}, 'Class': {'Business': 0, 'Eco': 1, 'Eco Plus': 2}, 'satisfaction': {'neutral or dissatisfied': 0, 'satisfied': 1}}


In [7]:
test

Unnamed: 0,Gender,Customer Type,Age,Type of Travel,Class,Flight Distance,Inflight wifi service,Departure/Arrival time convenient,Ease of Online booking,Gate location,...,Inflight entertainment,On-board service,Leg room service,Baggage handling,Checkin service,Inflight service,Cleanliness,Departure Delay in Minutes,Arrival Delay in Minutes,satisfaction
0,0,0,0.817898,0,1,-1.035195,5,4,3,4,...,5,5,5,5,2,5,5,0.962074,0.779894,1
1,0,0,-0.239329,0,0,1.671574,1,1,3,1,...,4,4,4,4,3,4,5,-0.382547,-0.392913,1
2,1,1,-1.296556,0,1,-1.003150,2,0,2,4,...,2,4,1,3,2,2,2,-0.382547,-0.392913,0
3,1,0,0.289285,0,0,2.186291,0,0,0,2,...,1,1,1,1,3,1,4,-0.382547,-0.232985,1
4,0,0,0.619668,0,1,-0.011770,2,3,4,3,...,2,2,2,2,4,2,4,-0.382547,0.140181,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25971,1,1,-0.371482,0,0,-0.668684,3,3,3,1,...,4,3,2,4,4,5,4,-0.382547,-0.392913,0
25972,1,0,-1.098326,0,0,-0.548517,4,4,4,4,...,4,4,5,5,5,5,4,-0.382547,-0.392913,1
25973,0,0,-1.494786,1,1,-0.366263,2,5,1,5,...,2,4,3,4,5,4,2,-0.382547,-0.392913,0
25974,1,0,-1.693016,0,0,-0.066846,3,3,3,3,...,4,3,2,5,4,5,4,-0.382547,-0.392913,1


In [8]:
y_train = train['satisfaction']
x_train = train.drop('satisfaction', axis=1)

y_test = test['satisfaction']
x_test = test.drop('satisfaction', axis=1)

In [9]:
# Create an SVM classifier with a linear kernel
svm = SVC()

# Train the model using the training data
svm.fit(x_train, y_train)

# Make predictions on the test data
y_pred = svm.predict(x_test)

# Evaluate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.9458154713629167
