In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split 
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC, LinearSVC

### Algoritmo SVM

In [None]:
class SVM_local:

    def __init__(self, learning_rate=0.001, C=0.01, n_iters=500):
        self.lr = learning_rate
        self.C = C
        self.n_iters = n_iters
        self.w = None
        self.b = None


    def fit(self, X, y):
        n_samples, n_features = X.shape
        
        y_ = np.where(y <= 0, -1, 1)
        
        self.w = np.zeros(n_features)
        self.b = 0

        for e in range(self.n_iters):

            for idx, x_i in enumerate(X):   #índice y vector i

                #Evaluar el la multiplicación del valor de yi por el vector xi multiplicado por los pesos 
                condition = y_[idx] * (np.dot(x_i, self.w) - self.b) >= 1

                #Si la condición evaluada es verdadera
                if condition:
                    #Actualizar los pesos con el gradiente
                    self.w -= self.lr * (2 * self.C * self.w)
                else:

                    #Actualizar los pesos y la ordenada con el gradiente menos la multiplicación de x * y
                    self.w -= self.lr * (2 * self.C * self.w - (x_i * y_[idx]))
                    self.b -= self.lr * y_[idx]


    def predict(self, X):
        approx = np.dot(X, self.w) - self.b
        return np.sign(approx)

### Lectura y separación del dataset

In [None]:
dataset = pd.read_csv("~/Documents/BM/Curso DS/gender_classification_v7.csv",dtype='str',encoding = "ISO-8859-1")

dataset["long_hair"] = dataset["long_hair"].astype(float)
dataset["forehead_width_cm"] = dataset["forehead_width_cm"].astype(float)
dataset["forehead_height_cm"] = dataset["forehead_height_cm"].astype(float)
dataset["nose_wide"] = dataset["nose_wide"].astype(float)
dataset["nose_long"] = dataset["nose_long"].astype(float)
dataset["lips_thin"] = dataset["lips_thin"].astype(float)
dataset["distance_nose_to_lip_long"] = dataset["distance_nose_to_lip_long"].astype(float)

In [None]:
dataset

In [None]:
clean_dataset = dataset.drop_duplicates()

In [None]:
X = clean_dataset.iloc[:,:7]
y = clean_dataset.iloc[:,-1]

In [None]:
y = y.to_frame()

In [None]:
y[y.gender == 'Male'] = -1.0
y[y.gender == 'Female'] = 1.0

In [None]:
y['gender'] = y['gender'].astype(float)

In [None]:
x_train,x_test,y_train,y_test =train_test_split(X,y,test_size=0.2)

### Entrenamiento y evaluación

In [None]:
model = SVM_local(C=0.1,n_iters=500)

In [None]:
model.fit(x_train.to_numpy(),y_train.to_numpy())

In [None]:
y_true = model.predict(x_test.to_numpy())

In [None]:
accuracy = accuracy_score(pd.DataFrame(y_true), y_test)
print("Precisión: ",accuracy)

### Entrenamiento y evaluación algoritmo de SKLEARN

#### LinearSVC

In [None]:
model_skl_lin = LinearSVC(C=0.1,max_iter=500,fit_intercept=True)

In [None]:
model_skl_lin.fit(x_train.to_numpy(),y_train.to_numpy())

In [None]:
y_true_skl = model_skl_lin.predict(x_test.to_numpy())

In [None]:
accuracy = accuracy_score(pd.DataFrame(y_true_skl), y_test)
print("Precisión: ",accuracy)

#### SVC Linear

In [None]:
model_skl_svc = SVC(kernel = 'linear', C=0.1,max_iter=500)

In [None]:
model_skl_svc.fit(x_train.to_numpy(),y_train.to_numpy())

In [None]:
y_true_skl_svc = model_skl_lin.predict(x_test.to_numpy())

In [None]:
accuracy = accuracy_score(pd.DataFrame(y_true_skl_svc), y_test)
print("Precisión: ",accuracy)

#### SVC Polinómico

In [None]:
model_skl_svc_p = SVC(kernel = 'poly', C=0.1,max_iter=500)

In [None]:
model_skl_svc_p.fit(x_train.to_numpy(),y_train.to_numpy())

In [None]:
y_true_skl_svc_p = model_skl_svc_p.predict(x_test.to_numpy())

In [None]:
accuracy = accuracy_score(pd.DataFrame(y_true_skl_svc_p), y_test)
print("Precisión: ",accuracy)

https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html