In [12]:
import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Load the data into a Pandas dataframe
data = pd.read_csv('dataset/train_data.txt', header=None)
X = data.iloc[:, :-1] # Features
y = data.iloc[:, -1] # Labels

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create an instance of the random forest classifier
rf = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42)

# Train the random forest classifier on the training data
rf.fit(X_train, y_train)

# Use the trained random forest classifier to make predictions on the testing data
y_pred = rf.predict(X_test)

# Evaluate the accuracy of the predictions
print('Accuracy:', accuracy_score(y_test, y_pred))
print('Precision:', precision_score(y_test, y_pred, average='macro'))
print('Recall:', recall_score(y_test, y_pred, average='macro'))
print('F1-score:', f1_score(y_test, y_pred, average='macro'))

Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1-score: 1.0


In [3]:
y_test

136     1
628     0
184     1
31      1
677     0
       ..
543     0
617     0
60      1
841     0
1010    0
Name: 28, Length: 208, dtype: int64

In [6]:

cm = confusion_matrix(y_test, y_pred)
print(cm)

[[106   0]
 [  0 102]]


In [10]:
knn = KNeighborsClassifier(n_neighbors=3)

# Fit the model to the training data
knn.fit(X_train, y_train)

# Make predictions on the testing data
y_pred_KNN = knn.predict(X_test)

# Print the accuracy score of the model on the testing data
print("Accuracy:", knn.score(X_test, y_test))


Accuracy: 0.8942307692307693


In [11]:
cm = confusion_matrix(y_test, y_pred_KNN)
print(cm)

[[97  9]
 [13 89]]


In [15]:
svm = SVC(kernel='linear')

# Fit the model to the training data
svm.fit(X_train, y_train)

# Make predictions on the testing data
y_pred_SVM = svm.predict(X_test)

# Calculate the accuracy score of the model on the testing data
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 1.0


In [16]:
cm = confusion_matrix(y_test, y_pred_SVM)
print(cm)

[[106   0]
 [  0 102]]
