In [1]:
import numpy as np
import pandas as pd
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

In [2]:
gender_data_with_null_duplicates_values = pd.read_csv("gender_classification.csv")

In [3]:
gender_data_with_duplicates_values = gender_data_with_null_duplicates_values.dropna()
gender_data = gender_data_with_duplicates_values.drop_duplicates()
gender_data.head()

Unnamed: 0,long_hair,forehead_width_cm,forehead_height_cm,nose_wide,nose_long,lips_thin,distance_nose_to_lip_long,gender
0,1,11.8,6.1,1,0,1,1,Male
1,0,14.0,5.4,0,0,1,0,Female
2,0,11.8,6.3,1,1,1,1,Male
3,0,14.4,6.1,0,1,1,1,Male
4,1,13.5,5.9,0,0,0,0,Female


In [4]:
gender_data.shape

(3233, 8)

In [5]:
gender_data['gender'].value_counts()

Male      1783
Female    1450
Name: gender, dtype: int64

In [6]:
gender_data_train, gender_data_test= train_test_split(gender_data,test_size=0.33, stratify = gender_data['gender'])

In [7]:
gender = gender_data['gender']
male_count = 0
female_count = 0
for distribute_gender in gender:
    if distribute_gender == 'Male':
        male_count += 1
    else:
        female_count += 1
print(male_count)
print(female_count)

1783
1450


In [8]:
train_gender_data_features = gender_data_train.drop('gender', axis='columns')
train_target = gender_data_train['gender']

In [9]:
test_gender_data_features = gender_data_test.drop('gender', axis='columns')
test_target = gender_data_test['gender']

In [10]:
def converted(f1, f2, f3, f4, f5, f6, f7):
#     f1, f2, f3, f4, f5, f6 = [f1], [f2], [f3], [f4], [f5], [f6]
#     f1 = le_buying.transform(f1)
#     f2 = le_maint.transform(f2)
#     f3 = le_doors.transform(f3)
#     f4 = le_persons.transform(f4)
#     f5 = le_lug_boot.transform(f5)
#     f6 = le_safety.transform(f6)
#     features = [[f1[0],f2[0],f3[0],f4[0],f5[0],f6[0]]]
    f1 = float(f1)
    f2 = float(f2)
    f3 = float(f3)
    f4 = float(f4)
    f5 = float(f5)
    f6 = float(f6)
    f7 = float(f7)
    features = [[f1,f2,f3,f4,f5,f6,f7]]
    return features

In [11]:
feature_1 = "1"
feature_2 = "14.4"
feature_3 = '6.1'
feature_4 = '0'
feature_5 = "1"
feature_6 = "1"
feature_7 = "1"

In [12]:
converted_features = converted(feature_1, feature_2, feature_3, feature_4, feature_5, feature_6, feature_7)
print(converted_features)

[[1.0, 14.4, 6.1, 0.0, 1.0, 1.0, 1.0]]


In [13]:
# train_gender_data_features.values()

In [14]:
np.array(train_target)

array(['Male', 'Male', 'Male', ..., 'Male', 'Male', 'Female'],
      dtype=object)

In [15]:
logistic_regression_classifier = LogisticRegression(random_state=0)
logistic_regression_classifier.fit(train_gender_data_features.to_numpy(), np.array(train_target))

predicted_class = logistic_regression_classifier.predict(converted_features)
print("Predicted Class", predicted_class, "\n")
target_predict = logistic_regression_classifier.predict(test_gender_data_features)
class_names = ['Male', 'Female']
print("Classification Report: Logistic Regression\n", classification_report(test_target, target_predict, target_names=class_names))
# print("Confusion Metric ",confusion_matrix(test_target, target_predict)) 

Predicted Class ['Male'] 

Classification Report: Logistic Regression
               precision    recall  f1-score   support

        Male       0.94      0.93      0.94       479
      Female       0.94      0.96      0.95       588

    accuracy                           0.94      1067
   macro avg       0.94      0.94      0.94      1067
weighted avg       0.94      0.94      0.94      1067



In [16]:
naive_bayes_classifier = MultinomialNB(alpha=1.9)
naive_bayes_classifier.fit(train_gender_data_features.to_numpy(), np.array(train_target))


predicted_class = naive_bayes_classifier.predict(converted_features)
print("Predicted Class", predicted_class, "\n")
target_predict = naive_bayes_classifier.predict(test_gender_data_features)
class_names = ['Male', 'Female']
print("Classification Report: Naive Bayes\n", classification_report(test_target, target_predict, target_names=class_names))
# print("Confusion Metric ",confusion_matrix(test_target, target_predict)) 

Predicted Class ['Male'] 

Classification Report: Naive Bayes
               precision    recall  f1-score   support

        Male       0.99      0.87      0.92       479
      Female       0.90      0.99      0.94       588

    accuracy                           0.94      1067
   macro avg       0.94      0.93      0.93      1067
weighted avg       0.94      0.94      0.93      1067

