In [None]:
#libraries
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report
from matplotlib.colors import ListedColormap
import seaborn as sns
import time

In [None]:
#read the file
raw_data=pd.read_csv("../input/gender-classification-dataset/gender_classification_v7.csv")

genders=raw_data.gender.values
genders=np.array(genders)
genders=np.where(genders=='Male',1,genders)
genders=np.where(genders=='Female',2,genders)
genders=genders.astype('int')
data=raw_data[["long_hair","forehead_width_cm","forehead_height_cm","nose_wide",
               "nose_long","lips_thin","distance_nose_to_lip_long"]].values

#data=MinMaxScaler(feature_range=(-1,1)).fit(data).transform(data)

#divide into train and test
data_train, data_test, genders_train, genders_test = train_test_split(data, genders, test_size = 0.25, random_state =0)

data_train=np.asarray(data_train)
data_test=np.asarray(data_test)
genders_train=np.asarray(genders_train)
genders_test=np.asarray(genders_test)

In [None]:
#for execution time
start_time = time.time()

#Valori pentru parametrii C si gamma
C_vect = [1,10,100]
gamma_vect = [0.0001,0.05,0.01,1]
Acc=[]
Param=[]
nr=1
Accmax = 0
Best_params = ""
for idx,kernel in enumerate(('linear','rbf')):
    C_vect = [100,1000,10000]
    j=1
    for i in range(3):
        print(nr,".")
        print(('Kernel : ' + str(kernel) + ' C = ' + (str(C_vect[i])) + ' Gamma = ' + (str(gamma_vect[j]))))
            
        svc = SVC(C=C_vect[i],gamma=gamma_vect[j],kernel=kernel,probability=True)
            
        svc.fit(data_train, genders_train)
            
        genders_pred_svc =svc.predict(data_test)
        c_m = confusion_matrix(genders_test, genders_pred_svc)
        print(c_m)
        print('Confusion matrix plot of SVC ' + kernel + ' C = ' + str(C_vect[i]) + ' Gamma = ' + str(gamma_vect[j]))
            
        f, ax = plt.subplots(figsize = (5,5))
        sns.heatmap(c_m, annot = True, linewidth = 0.5, linecolor = "red", fmt = ".0f", ax = ax)
        plt.show()
            
        # classification report
        print('\n Classification report for ' + kernel + ' C = ' + str(C_vect[i]) + ' Gamma = ' + str(gamma_vect[j]) + '\n',classification_report(genders_test, genders_pred_svc))
            
        TN = c_m[0][0]
        FN = c_m[1][0]
        TP = c_m[1][1]
        FP = c_m[0][1]
            
        if FN!=0 or TN!=0:
            # False alarm rate
            FAR = 100*FP/(TN+FN)
        else:
            FAR=100
        if FP==0:
            FAR=0
            
        if TP!=0 or FN!=0:
            # Miss Alarm Rate
            MAR = 100*FN/(TP+FN)
        else:
            MAR=100
        if FN==0:
            MAR=0
            
        # Overall accuracy
        if TN==0 and FN==0 and TP==0 and FP==0:
            ACC=0
        else:   
            ACC = 100*(TP+TN)/(TP+FP+FN+TN)
        Acc.append(ACC)
        Param.append(kernel + " C =" + str(C_vect[i]) + " gamma = " + str(gamma_vect[j]) + "FAR = "+ str(FAR) + "MAR = " + str(MAR))
            
        if ACC > Accmax :
            Accmax = ACC
            Best_params = kernel + " C =" + str(C_vect[i]) + " gamma = " + str(gamma_vect[j])
        print('Accuracy = ' + str(ACC) + '\n False Alarm Rate = ' + str(FAR) + '\n Missed Alarm Rate = ' + str(MAR))
            
        #Grafice
        svc.fit(data_train[:,0:2], genders_train)
        cmap_light = ListedColormap(['#AAFBFF','#FFAAAA'])
        cmap_bold = ListedColormap(['#0000FF','#FF0000'])
            
        # creating a meshgrid
        x_min, x_max = data[:, 0].min() - 1, data[:, 0].max() + 1
        y_min, y_max = data[:, 1].min() - 1, data[:, 1].max() + 1
        h=0.05
        xx, yy = np.meshgrid(np.arange(x_min, x_max, h),np.arange(y_min, y_max, h))
        xy_mesh=np.c_[xx.ravel(), yy.ravel()]
        Z = svc.predict(xy_mesh)
        Z = Z.reshape(xx.shape)
            
        #plotting data on decision boundary
        plt.figure()
        plt.pcolormesh(xx, yy, Z, cmap=cmap_light,shading='auto')
        plt.scatter(data[:, 0], data[:, 1], c=genders, cmap=cmap_bold)
        plt.xlim(xx.min(), xx.max())
        plt.ylim(yy.min(), yy.max())
        plt.xlabel('PC1');plt.ylabel('PC2')
        plt.title('Kernel=' + kernel + ' C=' + str(C_vect[i]) + ' Gamma=' + str(gamma_vect[j]))
        plt.show()
        nr=nr+1
    break
for idx,kernel in enumerate(('sigmoid','rbf')):
    C_vect = [10,100,1000]
    gamma_vect = [0.0001,0.00001]
    for i in range(3):
        for j in range(2):
            print(nr,".")
            print(('Kernel : ' + str(kernel) + ' C = ' + (str(C_vect[i])) + ' Gamma = ' + (str(gamma_vect[j]))))
            
            svc = SVC(C=C_vect[i],gamma=gamma_vect[j],kernel=kernel,probability=True)
            
            svc.fit(data_train, genders_train)
            
            genders_pred_svc =svc.predict(data_test)
            c_m = confusion_matrix(genders_test, genders_pred_svc)
            
            print(c_m)
            print('Confusion matrix plot of SVC ' + kernel + ' C = ' + str(C_vect[i]) + ' Gamma = ' + str(gamma_vect[j]))
            
            f, ax = plt.subplots(figsize = (5,5))
            sns.heatmap(c_m, annot = True, linewidth = 0.5, linecolor = "red", fmt = ".0f", ax = ax)
            plt.show()
            
            # classification report
            print('\n Classification report for ' + kernel + ' C = ' + str(C_vect[i]) + ' Gamma = ' + str(gamma_vect[j]) + '\n',classification_report(genders_test, genders_pred_svc))
            
            TN = c_m[0][0]
            FN = c_m[1][0]
            TP = c_m[1][1]
            FP = c_m[0][1]
            
            if FN!=0 or TN!=0:
                # False alarm rate
                FAR = 100*FP/(TN+FN)
            else:
                FAR=100
            if FP==0:
                FAR=0
            
            if TP!=0 or FN!=0:
                # Miss Alarm Rate
                MAR = 100*FN/(TP+FN)
            else:
                MAR=100
            if FN==0:
                MAR=0
            
            # Overall accuracy
            if TN==0 and FN==0 and TP==0 and FP==0:
                ACC=0
            else:   
                ACC = 100*(TP+TN)/(TP+FP+FN+TN)
            Acc.append(ACC)
            Param.append(kernel + " C =" + str(C_vect[i]) + " gamma = " + str(gamma_vect[j]) + "FAR = "+ str(FAR) + "MAR = " + str(MAR))
            
            if ACC > Accmax :
                Accmax = ACC
                Best_params = kernel + " C =" + str(C_vect[i]) + " gamma = " + str(gamma_vect[j])
            print('Accuracy = ' + str(ACC) + '\n False Alarm Rate = ' + str(FAR) + '\n Missed Alarm Rate = ' + str(MAR))
            
            #Grafice
            svc.fit(data_train[:,0:2], genders_train)
            cmap_light = ListedColormap(['#AAFBFF','#FFAAAA'])
            cmap_bold = ListedColormap(['#0000FF','#FF0000'])
            
            # creating a meshgrid
            x_min, x_max = data[:, 0].min() - 1, data[:, 0].max() + 1
            y_min, y_max = data[:, 1].min() - 1, data[:, 1].max() + 1
            h=0.05
            xx, yy = np.meshgrid(np.arange(x_min, x_max, h),np.arange(y_min, y_max, h))
            xy_mesh=np.c_[xx.ravel(), yy.ravel()]
            Z = svc.predict(xy_mesh)
            Z = Z.reshape(xx.shape)
            
            #plotting data on decision boundary
            plt.figure()
            plt.pcolormesh(xx, yy, Z, cmap=cmap_light,shading='auto')
            plt.scatter(data[:, 0], data[:, 1], c=genders, cmap=cmap_bold)
            plt.xlim(xx.min(), xx.max())
            plt.ylim(yy.min(), yy.max())
            plt.xlabel('PC1');plt.ylabel('PC2')
            plt.title('Kernel=' + kernel + ' C=' + str(C_vect[i]) + ' Gamma=' + str(gamma_vect[j]))
            plt.show()
            nr=nr+1
    break
for idx,kernel in enumerate(('rbf','linear')):
    C_vect = [10,100,1000]
    gamma_vect = [0.05,0.1,0.5]
    for i in range(3):
        for j in range(3):
            print(nr,".")
            print(('Kernel : ' + str(kernel) + ' C = ' + (str(C_vect[i])) + ' Gamma = ' + (str(gamma_vect[j]))))
            
            svc = SVC(C=C_vect[i],gamma=gamma_vect[j],kernel=kernel,probability=True)
            
            svc.fit(data_train, genders_train)
            
            genders_pred_svc =svc.predict(data_test)
            c_m = confusion_matrix(genders_test, genders_pred_svc)
            
            print(c_m)
            print('Confusion matrix plot of SVC ' + kernel + ' C = ' + str(C_vect[i]) + ' Gamma = ' + str(gamma_vect[j]))
            
            f, ax = plt.subplots(figsize = (5,5))
            sns.heatmap(c_m, annot = True, linewidth = 0.5, linecolor = "red", fmt = ".0f", ax = ax)
            plt.show()
            
            # classification report
            print('\n Classification report for ' + kernel + ' C = ' + str(C_vect[i]) + ' Gamma = ' + str(gamma_vect[j]) + '\n',classification_report(genders_test, genders_pred_svc))
            
            TN = c_m[0][0]
            FN = c_m[1][0]
            TP = c_m[1][1]
            FP = c_m[0][1]
            
            if FN!=0 or TN!=0:
                # False alarm rate
                FAR = 100*FP/(TN+FN)
            else:
                FAR=100
            if FP==0:
                FAR=0
            
            if TP!=0 or FN!=0:
                # Miss Alarm Rate
                MAR = 100*FN/(TP+FN)
            else:
                MAR=100
            if FN==0:
                MAR=0
            
            # Overall accuracy
            if TN==0 and FN==0 and TP==0 and FP==0:
                ACC=0
            else:   
                ACC = 100*(TP+TN)/(TP+FP+FN+TN)
            Acc.append(ACC)
            Param.append(kernel + " C =" + str(C_vect[i]) + " gamma = " + str(gamma_vect[j]) + "FAR = "+ str(FAR) + "MAR = " + str(MAR))
            
            if ACC > Accmax :
                Accmax = ACC
                Best_params = kernel + " C =" + str(C_vect[i]) + " gamma = " + str(gamma_vect[j])
            print('Accuracy = ' + str(ACC) + '\n False Alarm Rate = ' + str(FAR) + '\n Missed Alarm Rate = ' + str(MAR))
            
            #Grafice
            svc.fit(data_train[:,0:2], genders_train)
            cmap_light = ListedColormap(['#AAFBFF','#FFAAAA'])
            cmap_bold = ListedColormap(['#0000FF','#FF0000'])
            
            # creating a meshgrid
            x_min, x_max = data[:, 0].min() - 1, data[:, 0].max() + 1
            y_min, y_max = data[:, 1].min() - 1, data[:, 1].max() + 1
            h=0.05
            xx, yy = np.meshgrid(np.arange(x_min, x_max, h),np.arange(y_min, y_max, h))
            xy_mesh=np.c_[xx.ravel(), yy.ravel()]
            Z = svc.predict(xy_mesh)
            Z = Z.reshape(xx.shape)
            
            #plotting data on decision boundary
            plt.figure()
            plt.pcolormesh(xx, yy, Z, cmap=cmap_light,shading='auto')
            plt.scatter(data[:, 0], data[:, 1], c=genders, cmap=cmap_bold)
            plt.xlim(xx.min(), xx.max())
            plt.ylim(yy.min(), yy.max())
            plt.xlabel('PC1');plt.ylabel('PC2')
            plt.title('Kernel=' + kernel + ' C=' + str(C_vect[i]) + ' Gamma=' + str(gamma_vect[j]))
            plt.show()
            nr=nr+1 
    break
for idx,kernel in enumerate(('poly','linear')):
    C_vect = [10,100]
    gamma_vect = [0.05,0.1,0.5]
    for i in range(2):
        for j in range(3):
            print(nr,".")
            print(('Kernel : ' + str(kernel) + ' C = ' + (str(C_vect[i])) + ' Gamma = ' + (str(gamma_vect[j]))))
            
            svc = SVC(C=C_vect[i],gamma=gamma_vect[j],kernel=kernel,probability=True)
            
            svc.fit(data_train, genders_train)
            
            genders_pred_svc =svc.predict(data_test)
            c_m = confusion_matrix(genders_test, genders_pred_svc)
            
            print(c_m)
            print('Confusion matrix plot of SVC ' + kernel + ' C = ' + str(C_vect[i]) + ' Gamma = ' + str(gamma_vect[j]))
            
            f, ax = plt.subplots(figsize = (5,5))
            sns.heatmap(c_m, annot = True, linewidth = 0.5, linecolor = "red", fmt = ".0f", ax = ax)
            plt.show()
            
            # classification report
            print('\n Classification report for ' + kernel + ' C = ' + str(C_vect[i]) + ' Gamma = ' + str(gamma_vect[j]) + '\n',classification_report(genders_test, genders_pred_svc))
            
            TN = c_m[0][0]
            FN = c_m[1][0]
            TP = c_m[1][1]
            FP = c_m[0][1]
            
            if FN!=0 or TN!=0:
                # False alarm rate
                FAR = 100*FP/(TN+FN)
            else:
                FAR=100
            if FP==0:
                FAR=0
            
            if TP!=0 or FN!=0:
                # Miss Alarm Rate
                MAR = 100*FN/(TP+FN)
            else:
                MAR=100
            if FN==0:
                MAR=0
            
            # Overall accuracy
            if TN==0 and FN==0 and TP==0 and FP==0:
                ACC=0
            else:   
                ACC = 100*(TP+TN)/(TP+FP+FN+TN)
            Acc.append(ACC)
            Param.append(kernel + " C =" + str(C_vect[i]) + " gamma = " + str(gamma_vect[j]) + "FAR = "+ str(FAR) + "MAR = " + str(MAR))
            
            if ACC > Accmax :
                Accmax = ACC
                Best_params = kernel + " C =" + str(C_vect[i]) + " gamma = " + str(gamma_vect[j])
            print('Accuracy = ' + str(ACC) + '\n False Alarm Rate = ' + str(FAR) + '\n Missed Alarm Rate = ' + str(MAR))
            
            #Grafice
            svc.fit(data_train[:,0:2], genders_train)
            cmap_light = ListedColormap(['#AAFBFF','#FFAAAA'])
            cmap_bold = ListedColormap(['#0000FF','#FF0000'])
            
            # creating a meshgrid
            x_min, x_max = data[:, 0].min() - 1, data[:, 0].max() + 1
            y_min, y_max = data[:, 1].min() - 1, data[:, 1].max() + 1
            h=0.05
            xx, yy = np.meshgrid(np.arange(x_min, x_max, h),np.arange(y_min, y_max, h))
            xy_mesh=np.c_[xx.ravel(), yy.ravel()]
            Z = svc.predict(xy_mesh)
            Z = Z.reshape(xx.shape)
            
            #plotting data on decision boundary
            plt.figure()
            plt.pcolormesh(xx, yy, Z, cmap=cmap_light,shading='auto')
            plt.scatter(data[:, 0], data[:, 1], c=genders, cmap=cmap_bold)
            plt.xlim(xx.min(), xx.max())
            plt.ylim(yy.min(), yy.max())
            plt.xlabel('PC1');plt.ylabel('PC2')
            plt.title('Kernel=' + kernel + ' C=' + str(C_vect[i]) + ' Gamma=' + str(gamma_vect[j]))
            plt.show()
            nr=nr+1 
    break
Acc=np.array(Acc)
Param=np.array(Param)
for i in range (nr):
    print(Acc[i],' ',Param[i],'\n')
print("Maximum Accuracy: ", Accmax," With algorithm:", Best_params)
print("--- %s seconds ---" % (time.time() - start_time))