In [27]:
#Import Necessary Libraries

In [28]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import BernoulliNB
from sklearn.metrics import accuracy_score, f1_score

In [29]:
#Load the Dataset

In [30]:
data = pd.read_excel("GenderPrediction.xlsx")

In [31]:
#Extract Last letter 

In [32]:
data["Lastletter"] = data["Name"].apply(lambda x: str(x)[-1])

In [33]:
#Split dataset using stratified sampling (80-20 ratio)

In [34]:
X_train, X_test, y_train, y_test = train_test_split(data[["Name","Lastletter"]], data["Gender"], test_size=0.2, stratify=data["Gender"], random_state=42)

In [35]:
#Check Training Dataset

In [36]:
X_train

Unnamed: 0,Name,Lastletter
482,Kalapini,i
2129,Vinutha,a
1773,Amal,l
1353,Chandrabhan,n
261,Charita,a
...,...,...
1989,Udyan,n
1303,Piyush,h
1380,Sajala,a
1206,Devahuti,i


In [37]:
# Encode last letters and gender

In [38]:
le1 = LabelEncoder()
le2 = LabelEncoder()

X_train["Lastletter"] = le1.fit_transform(X_train["Lastletter"])
y_train = le2.fit_transform(y_train)

In [39]:
X_test["Lastletter"] = le1.transform(X_test["Lastletter"])
y_test = le2.transform(y_test)

In [40]:
#Train the model and Predict

In [41]:
X_train_lastletter = X_train[["Lastletter"]].values
X_test_lastletter = X_test[["Lastletter"]].values

model = BernoulliNB()
model.fit(X_train_lastletter,y_train)

y_pred = model.predict(X_test_lastletter)
y_pred


array([1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0,
       0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1,
       1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0,
       1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1,
       1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0,
       1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0,
       1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1,
       1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1,
       0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1,
       0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0,
       0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1,
       1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0,
       1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0,
       0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1,

In [42]:
#Evaluate Accuracy and F1-Score

In [43]:
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='weighted')
print(f"Model Accuracy: {accuracy * 100:.2f}%")
print(f"F1 Score: {f1:.2f}")

Model Accuracy: 72.00%
F1 Score: 0.72


In [44]:
# Function to predict gender based on input name

In [45]:
def predict_gender(name):
    last_letter = name[-1].lower()
    if last_letter not in le1.classes_:
        print("Error: Unknown last letter. Try another name.")
        return
    encoded_letter = le1.transform([last_letter]).reshape(-1, 1)
    predicted_label = model.predict(encoded_letter)[0]
    gender = le2.inverse_transform([predicted_label])[0]
    print(f"Predicted Gender: {gender}")

In [46]:
#Input loop for user

In [47]:
while True:
    user_input = input("Enter a name to predict gender (or type 'exit' to quit): ")
    if user_input.lower() == 'exit':
        break
    predict_gender(user_input)

Enter a name to predict gender (or type 'exit' to quit):  Saket


Predicted Gender: Male


Enter a name to predict gender (or type 'exit' to quit):  Mahima


Predicted Gender: Female


Enter a name to predict gender (or type 'exit' to quit):  Exit
