In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

In [None]:
np.random.seed(123)
train_data = pd.read_csv("activity_train.csv")
test_data = pd.read_csv("activity_test.csv")
train_data.describe()

In [None]:
numeric_cols = train_data.select_dtypes(include=[np.number]).columns
first_numeric_col = numeric_cols[0]
sns.histplot(train_data[first_numeric_col], bins=30, kde=False, color="blue")
plt.title(f"Distribution of {first_numeric_col}")
plt.xlabel(first_numeric_col)
plt.ylabel("Count")
plt.show()

In [None]:
train_data[numeric_cols[:10]].corr()

In [None]:
corr_matrix = train_data[numeric_cols[:10]].corr()
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm')
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
sns.boxplot(data=train_data[numeric_cols[:10]])
plt.xticks(rotation=45)
plt.show()

In [None]:
sns.countplot(x="activity", data=train_data)
plt.show()

In [None]:
train_labels = train_data.iloc[:, -1]
train_features = train_data.iloc[:, :-1]
test_labels = test_data.iloc[:, -1]
test_features = test_data.iloc[:, :-1]
knn_model = KNeighborsClassifier(n_neighbors=5)
knn_model.fit(train_features, train_labels)
predictions = knn_model.predict(test_features)
print(classification_report(test_labels, predictions))

In [None]:
accuracy = accuracy_score(test_labels, predictions)
print(f"Accuracy: {accuracy}")

In [None]:
conf_matrix = confusion_matrix(test_labels, predictions)
sns.heatmap(conf_matrix, annot=True, fmt='g')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.show()

In [None]:
train_sample = train_data.sample(frac=0.5)
train_labels_sample = train_sample.iloc[:, -1]
train_features_sample = train_sample.iloc[:, :-1]
param_grid = {
    'n_neighbors': np.arange(1, 31),
    'weights': ['uniform', 'distance']
}
tuned_knn = GridSearchCV(KNeighborsClassifier(), param_grid, cv=5)
tuned_knn.fit(train_features_sample, train_labels_sample)
predictions_tuned = tuned_knn.predict(test_features)
print(classification_report(test_labels, predictions_tuned))

In [None]:
accuracy_tuned = accuracy_score(test_labels, predictions_tuned)
print(f"Accuracy for Tuned Model: {accuracy_tuned}")

In [None]:
conf_matrix_tuned = confusion_matrix(test_labels, predictions_tuned)
sns.heatmap(conf_matrix_tuned, annot=True, fmt='g')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix for Tuned KNN Model')
plt.show()

In [None]:
accuracy_scores = []
for i in np.arange(1, 31):
    knn = KNeighborsClassifier(n_neighbors=i)
    knn.fit(train_features, train_labels)
    pred_i = knn.predict(test_features)
    accuracy_scores.append(accuracy_score(test_labels, pred_i))
plt.figure(figsize=(12, 6))
plt.plot(range(1, 31), accuracy_scores, color='blue', linestyle='dashed', marker='o',
         markerfacecolor='red', markersize=10)
plt.title('Accuracy vs. K Value')
plt.xlabel('K')
plt.ylabel('Accuracy')
plt.show()

In [None]:
param_grid = {
    'n_neighbors': np.arange(1, 31),
    'weights': ['uniform', 'distance']
}
tuned_knn = GridSearchCV(KNeighborsClassifier(), param_grid, cv=5)
tuned_knn.fit(train_features, train_labels)
predictions_tuned = tuned_knn.predict(test_features)
print(classification_report(test_labels, predictions_tuned))


In [None]:
accuracy_tuned = accuracy_score(test_labels, predictions_tuned)
print(f"Accuracy for Tuned Model: {accuracy_tuned}")

In [None]:
conf_matrix_tuned = confusion_matrix(test_labels, predictions_tuned)
sns.heatmap(conf_matrix_tuned, annot=True, fmt='g')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix for Tuned KNN Model')
plt.show()

In [None]:
accuracy_scores = []
for i in np.arange(1, 31):
    knn = KNeighborsClassifier(n_neighbors=i)
    knn.fit(train_features, train_labels)
    pred_i = knn.predict(test_features)
    accuracy_scores.append(accuracy_score(test_labels, pred_i))
plt.figure(figsize=(12, 6))
plt.plot(range(1, 31), accuracy_scores, color='blue', linestyle='dashed', marker='o',
         markerfacecolor='red', markersize=10)
plt.title('Accuracy vs. K Value')
plt.xlabel('K')
plt.ylabel('Accuracy')  
plt.show()

In [None]:
sns.pairplot(train_data[numeric_cols[:10]])
plt.show()