In [18]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import seaborn as sn
from sklearn.utils import resample


In [19]:
df = pd.read_csv('gender.csv')


In [20]:
df

Unnamed: 0,long_hair,forehead_width_cm,forehead_height_cm,nose_wide,nose_long,lips_thin,distance_nose_to_lip_long,gender
0,1,11.8,6.1,1,0,1,1,Male
1,0,14.0,5.4,0,0,1,0,Female
2,0,11.8,6.3,1,1,1,1,Male
3,0,14.4,6.1,0,1,1,1,Male
4,1,13.5,5.9,0,0,0,0,Female
...,...,...,...,...,...,...,...,...
4996,1,13.6,5.1,0,0,0,0,Female
4997,1,11.9,5.4,0,0,0,0,Female
4998,1,12.9,5.7,0,0,0,0,Female
4999,1,13.2,6.2,0,0,0,0,Female


In [21]:
df.isnull().sum()
# No Null Values

long_hair                    0
forehead_width_cm            0
forehead_height_cm           0
nose_wide                    0
nose_long                    0
lips_thin                    0
distance_nose_to_lip_long    0
gender                       0
dtype: int64

In [22]:
No_cols = df.columns.size
No_cols

8

In [23]:
def Scaling(data):
    for j in range(No_cols-1):
        data[data.columns[j]] = data[data.columns[j]]/data[data.columns[j]].max()
    return(data)

In [24]:
Scale_df = Scaling(df)


In [25]:
df = Scale_df


In [26]:
df


Unnamed: 0,long_hair,forehead_width_cm,forehead_height_cm,nose_wide,nose_long,lips_thin,distance_nose_to_lip_long,gender
0,1.0,0.761290,0.859155,1.0,0.0,1.0,1.0,Male
1,0.0,0.903226,0.760563,0.0,0.0,1.0,0.0,Female
2,0.0,0.761290,0.887324,1.0,1.0,1.0,1.0,Male
3,0.0,0.929032,0.859155,0.0,1.0,1.0,1.0,Male
4,1.0,0.870968,0.830986,0.0,0.0,0.0,0.0,Female
...,...,...,...,...,...,...,...,...
4996,1.0,0.877419,0.718310,0.0,0.0,0.0,0.0,Female
4997,1.0,0.767742,0.760563,0.0,0.0,0.0,0.0,Female
4998,1.0,0.832258,0.802817,0.0,0.0,0.0,0.0,Female
4999,1.0,0.851613,0.873239,0.0,0.0,0.0,0.0,Female


In [27]:
df.describe()


Unnamed: 0,long_hair,forehead_width_cm,forehead_height_cm,nose_wide,nose_long,lips_thin,distance_nose_to_lip_long
count,5001.0,5001.0,5001.0,5001.0,5001.0,5001.0,5001.0
mean,0.869626,0.850418,0.837509,0.493901,0.507898,0.493101,0.4989
std,0.336748,0.071428,0.076235,0.500013,0.499988,0.500002,0.500049
min,0.0,0.735484,0.71831,0.0,0.0,0.0,0.0
25%,1.0,0.787097,0.774648,0.0,0.0,0.0,0.0
50%,1.0,0.845161,0.830986,0.0,1.0,0.0,0.0
75%,1.0,0.903226,0.901408,1.0,1.0,1.0,1.0
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [28]:
df["gender"].value_counts()
# Data is Imbalanced

Female    2501
Male      2500
Name: gender, dtype: int64

In [29]:
class_0 = df[df['gender'] == 0]
class_1 = df[df['gender'] == 1]

In [30]:
class_0_count,class_1_count = df.gender.value_counts()


In [31]:
print(class_0_count,class_1_count)


2501 2500


In [32]:
class_1_overS = class_1.sample(class_0_count,replace = True)
# We can add random values from that class to over sample that dataset...But replace = True os required for over sampling
class_1_overS.shape


ValueError: ignored

In [34]:
balanced_df = pd.concat([class_1_overS,class_0])
balanced_df.shape

NameError: ignored

In [None]:
df = balanced_df


In [None]:
from sklearn.model_selection import train_test_split
X = df.drop(columns = ['gender'])
y = df["gender"]
X_train,X_test,y_train,y_test = train_test_split(X,y, test_size= 0.1, random_state = 15)



In [None]:
plt.plot(X_train.T, '.')
plt.xticks(rotation='vertical')
plt.show()

In [None]:
plt.figure(figsize=(17,17))
for j in range(No_cols):
    plt.subplot(5,5,j+1)
    x = df[df.columns[j]]
    y = df['gender']
    plt.scatter(x, y)
    plt.xlabel(df.columns[j])

In [None]:
x_binarised_train = X_train.apply(pd.cut, bins=2, labels=[1,0])
x_binarised_test = X_test.apply(pd.cut, bins=2, labels=[1,0])

In [None]:
X_test = x_binarised_test.values
X_train = x_binarised_train.values


In [None]:
X_train[0]


In [None]:
for b in range(X_train.shape[1] + 1):
    y_pred_train = []
    accurate_rows = 0

    for x, y in zip(X_train, y_train):
        y_pred = (np.sum(x) <= b)
        y_pred_train.append(y_pred)
        accurate_rows += (y == y_pred)

    print(b, accurate_rows, accurate_rows/X_train.shape[0])

In [None]:
b = 12
y_pred_test = []
accurate_rows = 0

for x, y in zip(X_test, y_test):
    y_pred = (np.sum(x) <= b)
    y_pred_test.append(y_pred)
    accurate_rows += (y == y_pred)

print(b, accurate_rows, accurate_rows/X_test.shape[0])

In [None]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test,y_pred_test)



In [None]:
import seaborn as sn
sn.heatmap(cm,annot=True)

In [None]:
warnings.filterwarnings('ignore')
from sklearn.metrics import classification_report
print(classification_report(y_test,y_pred_test))

In [None]:
class MPNeuron:

    def __init__(self):
        self.b = None

    def model(self, x):
        return(sum(x) <= self.b)

    def predict(self, X):
        Y = []
        for x in X:
            result = self.model(x)
            Y.append(result)
        return np.array(Y)

    def fit(self, X, Y):
        accuracy = {}

        for b in range(X.shape[1] + 1):
            self.b = b
            Y_pred = self.predict(X)
            accuracy[b] = accuracy_score(Y_pred, Y)

        best_b = max(accuracy,key = accuracy.get)
        self.b = best_b

        print('Optimal Value of  is', best_b)
        print('Highest accuracy is',accuracy[best_b])


In [None]:
mp_neuron = MPNeuron()
mp_neuron.fit(X_train, y_train)

In [None]:
predicted = mp_neuron.predict(X_test)
accuracy_test = accuracy_score(predicted, y_test)
print(accuracy_test)


In [None]:
class Perceptron:
# Globl variables weight and bias
    def __init__ (self):
        self.w = None
        self.b = None

# model
    def model(self, x):
        return 1 if (np.dot(self.w, x) >= self.b) else 0

    def predict(self, X):
        Y = []
        for x in X:
            result = self.model(x)
            Y.append(result)
        return np.array(Y)

    def fit(self, X, Y, epochs = 1, lr = 1):
        self.w = np.ones(X.shape[1])
        self.b = 0

        accuracy = {}
        max_accuracy = 0

        for i in range(epochs):
            for x, y in zip(X, Y):
                y_pred = self.model(x)

                if  y == 1 and y_pred == 0:
                    self.w = self.w + lr * x
                    self.b = self.b - lr * 1
                elif y == 0 and y_pred == 1:
                    self.w = self.w - lr * x
                    self.b = self.b + lr * 1

            accuracy[i] = accuracy_score(self.predict(X), Y)
            if (accuracy[i] > max_accuracy):
                max_accuracy = accuracy[i]
                chkptw = self.w
                chkptb = self.b

        self.w = chkptw
        self.b = chkptb

        print("Max Accuracy", max_accuracy)

        plt.plot(list(accuracy.values()))
        plt.ylim([0, 1])
        plt.show()


In [None]:
perceptron =Perceptron()
perceptron.fit(X_train, y_train,100,0.001)


In [None]:
predicted = perceptron.predict(X_test)
accuracy_test = accuracy_score(predicted, y_test)
print(accuracy_test)

In [None]:
cm = confusion_matrix(y_test,predicted)
sn.heatmap(cm,annot=True)

In [None]:
warnings.filterwarnings('ignore')
print(classification_report(y_test,y_pred_test))
