In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix


In [None]:
file_path = "Final_Dataset.xlsx"
xls = pd.ExcelFile(file_path)
df = xls.parse('User_ID,Age,Gender,Total_App_Us')


In [None]:
def age_group(age):
    if 20 <= age <= 30:
        return 'Young'
    elif 30 < age <= 50:
        return 'Middle'
    elif 50 < age <= 60:
        return 'Older'
    else:
        return None

df['Age_Group'] = df['Age'].apply(age_group)
df = df[df['Age_Group'].notna()]


In [None]:
label_encoders = {}
for col in ['Gender', 'Location', 'Relationship_Status']:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

df = df.dropna()
X = df.drop(columns=['User_ID', 'Age', 'Age_Group'])
y = df['Age_Group']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
nb_model = GaussianNB()
nb_model.fit(X_train, y_train)
y_pred = nb_model.predict(X_test)
report = classification_report(y_test, y_pred, output_dict=True)
pd.DataFrame(report).transpose()


In [None]:
conf_matrix = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(6, 5))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues',
            xticklabels=nb_model.classes_, yticklabels=nb_model.classes_)
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix - Naïve Bayes Classifier')
plt.tight_layout()
plt.show()


In [None]:
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
y_test_encoded = label_encoder.fit_transform(y_test)
y_pred_encoded = label_encoder.transform(y_pred)

plt.figure(figsize=(10, 5))
plt.scatter(range(len(y_test_encoded)), y_test_encoded, label='True', marker='o')
plt.scatter(range(len(y_pred_encoded)), y_pred_encoded, label='Predicted', marker='x')
plt.title("True vs Predicted Labels - Naïve Bayes")
plt.xlabel("Sample Index")
plt.ylabel("Class Label (Encoded)")
plt.legend()
plt.tight_layout()
plt.show()


In [None]:
knn_model = KNeighborsClassifier(n_neighbors=5)
knn_model.fit(X_train, y_train)
y_knn_pred = knn_model.predict(X_test)
knn_report = classification_report(y_test, y_knn_pred, output_dict=True)
pd.DataFrame(knn_report).transpose()


In [None]:
knn_conf_matrix = confusion_matrix(y_test, y_knn_pred)
plt.figure(figsize=(6, 5))
sns.heatmap(knn_conf_matrix, annot=True, fmt='d', cmap='Greens',
            xticklabels=knn_model.classes_, yticklabels=knn_model.classes_)
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix - kNN Classifier')
plt.tight_layout()
plt.show()


In [None]:
label_encoder_knn = LabelEncoder()
y_test_knn_encoded = label_encoder_knn.fit_transform(y_test)
y_knn_pred_encoded = label_encoder_knn.transform(y_knn_pred)

plt.figure(figsize=(10, 5))
plt.scatter(range(len(y_test_knn_encoded)), y_test_knn_encoded, label='True', marker='o')
plt.scatter(range(len(y_knn_pred_encoded)), y_knn_pred_encoded, label='Predicted', marker='x')
plt.title("True vs Predicted Labels - kNN Classifier")
plt.xlabel("Sample Index")
plt.ylabel("Class Label (Encoded)")
plt.legend()
plt.tight_layout()
plt.show()
