In [11]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

In [2]:
# load the data

df = pd.read_csv('titanic.csv')

# remove irrelevant columns

df = df.drop('Name', axis = 1)

# engode categorical features

le = LabelEncoder()
df['Sex'] = le.fit_transform(df['Sex'])

In [3]:
# Split the data into features and labels

X = df[['Pclass', 'Sex', 'Age', 'Siblings/Spouses Aboard', 'Parents/Children Aboard', 'Fare']]
y = df['Survived']

In [4]:
# Split the data into training and testing sets

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
# Create a K-NN classifier

knn = KNeighborsClassifier(n_neighbors=10)

In [6]:
# Train the model

knn.fit(X_train, y_train)

In [7]:
# Make predictions on the test set

y_pred = knn.predict(X_test)

In [8]:
# Evaluate the model

accuracy = knn.score(X_test, y_test)
print("Accuracy:", accuracy)

Accuracy: 0.7191011235955056


In [10]:
# metrics

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
confusion_mat = confusion_matrix(y_test, y_pred)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)
print("Confusion Matrix:")
print(confusion_mat)

Accuracy: 0.7191011235955056
Precision: 0.6888888888888889
Recall: 0.4626865671641791
F1 Score: 0.5535714285714286
Confusion Matrix:
[[97 14]
 [36 31]]


# Same data, normalized

In [12]:
# apply MinMaxScaler

scaler = MinMaxScaler()
X_normalized = scaler.fit_transform(X)

In [13]:
# Create a K-NN classifier

knn_1 = KNeighborsClassifier(n_neighbors=10)

In [14]:
# train test split

X_train, X_test, y_train, y_test = train_test_split(X_normalized, y, test_size=0.2, random_state=42)

In [16]:
# Train the model

knn_1.fit(X_train, y_train)


# Make predictions on the test set

y_pred = knn_1.predict(X_test)

In [17]:
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
confusion_mat = confusion_matrix(y_test, y_pred)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)
print("Confusion Matrix:")
print(confusion_mat)

Accuracy: 0.7528089887640449
Precision: 0.7804878048780488
Recall: 0.47761194029850745
F1 Score: 0.5925925925925926
Confusion Matrix:
[[102   9]
 [ 35  32]]
