# Regresión Logística

In [14]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from hyperopt import fmin, hp, tpe, Trials, space_eval, STATUS_OK
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.linear_model import LogisticRegression


In [15]:
class OurLogisticRegression:
    def __init__(self, lr=0.01, num_iter=100000, fit_intercept=True):
        self.lr = lr
        self.num_iter = num_iter
        self.fit_intercept = fit_intercept
       

    def __add_intercept(self, X):
        intercept = np.ones((X.shape[0], 1))
        return np.concatenate((intercept, X), axis=1)

    def __sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def __loss(self, h, y):
        return (-y * np.log(h) - (1 - y) * np.log(1 - h)).mean()

    def fit(self, X, y):
        if self.fit_intercept:
            X = self.__add_intercept(X)

        # weights initialization
        self.w = np.zeros(X.shape[1])

        for i in range(self.num_iter):
            z = np.dot(X, self.w)
            h = self.__sigmoid(z)
            gradient = np.dot(X.T, (h-y)) / y.size
            #print(gradient.shape, self.w.shape, z.shape,h.shape)
            #gradient = (h - y) / y.size
            self.w -= self.lr * gradient

    def predict_prob(self, X):
        if self.fit_intercept:
            X = self.__add_intercept(X)

        return self.__sigmoid(np.dot(X, self.w))

    def predict(self, X, threshold):
        return self.predict_prob(X) >= threshold
    def score(self, y_pred,y_test):
        return float(sum(y_pred == y_test)) / float(len(y_test))


In [16]:
def metrics(X_test, y_test, y_pred):
    score = float(sum(y_pred == y_test)) / float(len(y_test))
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted', labels=np.unique(y_pred))
    recall = recall_score(y_test, y_pred, average='weighted', labels=np.unique(y_pred))
    
    metrics = [score, accuracy, precision, recall]
    
    return metrics
    

In [17]:
def compare(our_metrics, sklearn_metrics):
    
    res = pd.DataFrame([[our_metrics[0], sklearn_metrics[0]],
                  [our_metrics[1], sklearn_metrics[1]], 
                  [our_metrics[2], sklearn_metrics[2]],
                  [our_metrics[3], sklearn_metrics[3]]],
             ['Score','Accuracy','Precision','Recall'],    
             ['Our Implementation', 'Sklearn\'s Implementation'])
    return res


In [18]:
#Regresion Logistica usando la libreria Sklearn
sklearn_metrics = []

def SkLogisticRegr():
    logisticRegr = LogisticRegression(random_state = 16, max_iter = 1000)
    logisticRegr.fit(X_train, y_train)
    logisticRegr.predict(X_test)
    
    global sklearn_metrics
    sklearn_metrics = metrics(X_test, y_test, y_pred)

### Red Wine Quality

In [19]:
df = pd.read_csv("winequality-red.csv")
df.head()

#X = np.array([[1,2],[1,3],[1,4],[1,5]])
#y = np.array([[0],[0],[1],[1]])
#ourRegression=LogisticRegression(alpha=0.01,iterations=10000)

#w, J_history = ourRegression.gradient_descent(X, y)

#print("W encontrado por gradiente descendente: ")
#print(w)
    


Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11,34,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25,67,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15,54,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17,60,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11,34,0.9978,3.51,0.56,9.4,5


In [20]:
#Feature Engineering
#Todas las caracteristicas que ofrece el set de datos se conseideraron importantes 
#para determinar si el vino es bueno o malo

features=['fixed acidity','volatile acidity','citric acid','residual sugar','chlorides','free sulfur dioxide','total sulfur dioxide','density','pH','sulphates','alcohol']
y = df.quality
Outcome = []

#Se toma la calidad como referencia para el resultado, tal que si es mayor a 5 es un
#buen vino y de lo contrario es un mal vino

for i in y:
    if(y[i] <= 5):
        Outcome.append(0)
    else:
        Outcome.append(1)
        
df['Outcome'] = Outcome
X=df[features]
y=df.Outcome

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.50,random_state=100)

In [21]:
model = OurLogisticRegression(lr=0.03, num_iter=1000)

# fit the model to the training data
model.fit(X_train, y_train)



In [22]:
# predict probabilities for test set
probs = model.predict_prob(X_test)

# predict classes for test set
y_pred = model.predict(X_test, 0.5)

#Metricas para el modelo de la clase OurLogisticRegression
our_metrics = metrics(X_test, y_test, y_pred)

In [10]:
#Usando la libreria Hyperopt para la optimizacion del modelo
# remove missing values
#df = df.dropna()

# build the model
model = OurLogisticRegression(lr=0.03, num_iter=100)
model.fit(X_train, y_train)
probs = model.predict_prob(X_test)
y_pred = model.predict(X_test, 0.5)

# print out the score accuracy
print("Accuracy:", model.score(y_test, y_pred))

def objective(args):
    model = OurLogisticRegression(lr=0.03, num_iter=100)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test, 0.5)
    accuracy = accuracy_score(y_test, y_pred)
    return {'loss': -accuracy, 'status': STATUS_OK}

search_space = hp.randint('n_estimators', 0, 1000)
algorithm = tpe.suggest
best_params = fmin(
  fn=objective,
  space=search_space,
  algo=algorithm,
  max_evals=200)


print(best_params)

In [23]:
SkLogisticRegr()
compare(our_metrics, sklearn_metrics)

Unnamed: 0,Our Implementation,Sklearn's Implementation
Score,0.86375,0.86375
Accuracy,0.86375,0.86375
Precision,0.86375,0.86375
Recall,1.0,1.0


### Notas del curso de Arquitectura de Computadores 1

In [24]:
df = pd.read_csv("grades.csv")
df.head()

Unnamed: 0,Proyecto1,Proyecto2,Examen1,Taller1,Tarea1,Final,Resultado
0,100.0,100.0,53.3,80.0,76.0,80.0,1
1,0.0,0.0,12.6,40.0,0.0,5.0,0
2,100.0,45.0,49.6,100.0,100.0,80.0,1
3,0.0,0.0,0.0,0.0,0.0,0.0,0
4,100.0,100.0,61.6,100.0,66.7,80.0,1


In [25]:
#Feature Engineering
#Se seleccionaron las caracteristicas solicitadas en el enunciado

features=['Proyecto1','Proyecto2','Examen1','Tarea1']

X=df[features]
y=df['Resultado']

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.50,random_state=100)

In [26]:
model = OurLogisticRegression(lr=0.03, num_iter=1000)
model.fit(X_train, y_train)



In [27]:
probs = model.predict_prob(X_test)
y_pred = model.predict(X_test, 0.5)
our_metrics = metrics(X_test, y_test, y_pred)

In [28]:
SkLogisticRegr()
compare(our_metrics, sklearn_metrics)

Unnamed: 0,Our Implementation,Sklearn's Implementation
Score,0.849057,0.849057
Accuracy,0.849057,0.849057
Precision,0.856165,0.856165
Recall,0.849057,0.849057
