In [228]:
import pandas as pd
import numpy as np

In [229]:
df_0 = pd.read_csv('gender_classification_v7.csv')

In [230]:
df1 = df_0
df1.shape

(5001, 8)

In [231]:
df1.head()

Unnamed: 0,long_hair,forehead_width_cm,forehead_height_cm,nose_wide,nose_long,lips_thin,distance_nose_to_lip_long,gender
0,1,11.8,6.1,1,0,1,1,Male
1,0,14.0,5.4,0,0,1,0,Female
2,0,11.8,6.3,1,1,1,1,Male
3,0,14.4,6.1,0,1,1,1,Male
4,1,13.5,5.9,0,0,0,0,Female


In [232]:
df1.isnull().sum()

long_hair                    0
forehead_width_cm            0
forehead_height_cm           0
nose_wide                    0
nose_long                    0
lips_thin                    0
distance_nose_to_lip_long    0
gender                       0
dtype: int64

In [233]:
print(df1['long_hair'].unique())
print(df1['nose_wide'].unique())
print(df1['nose_long'].unique())
print(df1['lips_thin'].unique())
print(df1['distance_nose_to_lip_long'].unique())
print(df1['gender'].unique())

[1 0]
[1 0]
[0 1]
[1 0]
[1 0]
['Male' 'Female']


In [234]:
df1['gender'] = df1['gender'].apply({'Male':1, 'Female':0}.get)

In [235]:
df1.head()

Unnamed: 0,long_hair,forehead_width_cm,forehead_height_cm,nose_wide,nose_long,lips_thin,distance_nose_to_lip_long,gender
0,1,11.8,6.1,1,0,1,1,1
1,0,14.0,5.4,0,0,1,0,0
2,0,11.8,6.3,1,1,1,1,1
3,0,14.4,6.1,0,1,1,1,1
4,1,13.5,5.9,0,0,0,0,0


In [236]:
x = df1.iloc[:,:-1].values
y = df1.iloc[:,-1].values

In [237]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.25,random_state=100)

# Feature Scaling

In [238]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x_train[:,1:3] = sc.fit_transform(x_train[:,1:3])
x_test[:,1:3] = sc.transform(x_test[:,1:3])

# Logistic Regression

In [239]:
from sklearn.linear_model import LogisticRegression
lg = LogisticRegression(random_state = 0)
lg.fit(x_train,y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=0, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

## Prediction

In [240]:
y_lg = lg.predict(x_test)
print(np.concatenate((y_lg.reshape(len(y_lg),1), y_test.reshape(len(y_test),1)),1))

[[1 1]
 [0 0]
 [0 0]
 ...
 [1 1]
 [0 0]
 [1 1]]


## Confusion Matrix

In [241]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm_lg = confusion_matrix(y_test,y_lg)
print(cm_lg)
accuracy_score(y_test,y_lg)

[[617  21]
 [ 18 595]]


0.9688249400479616

# K Nearest Neighbors

In [242]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=5, metric='minkowski',p=2)
knn.fit(x_train,y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=5, p=2,
                     weights='uniform')

## Prediction

In [243]:
y_knn = knn.predict(x_test)
print(np.concatenate((y_knn.reshape(len(y_knn),1), y_test.reshape(len(y_test),1)),1))

[[1 1]
 [0 0]
 [0 0]
 ...
 [1 1]
 [0 0]
 [1 1]]


## Confusion Matrix

In [244]:
cm_knn = confusion_matrix(y_test, y_knn)
print(cm_knn)
accuracy_score(y_test,y_knn)

[[619  19]
 [ 20 593]]


0.9688249400479616

# Support Vector Machine (SVM)

In [245]:
from sklearn.svm import SVC
svc = SVC(kernel='linear',random_state=0)
svc.fit(x_train,y_train)

SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='linear',
    max_iter=-1, probability=False, random_state=0, shrinking=True, tol=0.001,
    verbose=False)

## Prediction

In [246]:
y_svc = svc.predict(x_test)
print(np.concatenate((y_svc.reshape(len(y_svc),1), y_test.reshape(len(y_test),1)),1))

[[1 1]
 [0 0]
 [0 0]
 ...
 [1 1]
 [0 0]
 [1 1]]


## Confusion Matrix

In [247]:
cm_svc = confusion_matrix(y_test,y_svc)
print(cm_svc)
accuracy_score(y_test,y_svc)

[[616  22]
 [ 18 595]]


0.9680255795363709

# Kernel Support Vector Machine 

In [248]:
k_svc = SVC(kernel='rbf',random_state=0)
k_svc.fit(x_train,y_train)

SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
    max_iter=-1, probability=False, random_state=0, shrinking=True, tol=0.001,
    verbose=False)

## Prediction

In [249]:
y_k_svc = k_svc.predict(x_test)

## Confusion Matrix

In [250]:
cm_k_svc = confusion_matrix(y_test,y_k_svc)
print(cm_k_svc)
accuracy_score(y_test,y_k_svc)

[[627  11]
 [ 19 594]]


0.9760191846522782

# Naive Bayes

In [251]:
from sklearn.naive_bayes import GaussianNB
nb = GaussianNB()
nb.fit(x_train,y_train)

GaussianNB(priors=None, var_smoothing=1e-09)

## Prediction

In [252]:
y_nb = nb.predict(x_test)

## Confusion Matrix

In [253]:
cm_nb = confusion_matrix(y_test,y_nb)
print(cm_nb)
accuracy_score(y_test,y_nb)

[[618  20]
 [ 19 594]]


0.9688249400479616

# Decision Tree Classification

In [254]:
from sklearn.tree import DecisionTreeClassifier
dt = DecisionTreeClassifier(criterion='entropy',random_state=0)
dt.fit(x_train,y_train)

DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='entropy',
                       max_depth=None, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=0, splitter='best')

## Prediction

In [255]:
y_dt = dt.predict(x_test)

## Confusion Matrix

In [256]:
cm_dt = confusion_matrix(y_test,y_dt)
print(cm_dt)
accuracy_score(y_test,y_dt)

[[619  19]
 [ 19 594]]


0.9696243005595524

# Random Forest Classification

In [257]:
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_estimators=10,criterion='entropy',random_state=0)
rf.fit(x_train,y_train)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='entropy', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=10,
                       n_jobs=None, oob_score=False, random_state=0, verbose=0,
                       warm_start=False)

## Prediction

In [258]:
y_rf = rf.predict(x_test)

## Confusion Matrix

In [259]:
cm_rf = confusion_matrix(y_test,y_rf)
print(cm_rf)
accuracy_score(y_test,y_rf)

[[621  17]
 [ 19 594]]


0.9712230215827338