In [48]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from hyperopt import fmin, hp, tpe, Trials, space_eval, STATUS_OK
from sklearn.metrics import accuracy_score


In [49]:
class OurLogisticRegression:
    def __init__(self, lr=0.01, num_iter=100000, fit_intercept=True):
        self.lr = lr
        self.num_iter = num_iter
        self.fit_intercept = fit_intercept
       

    def __add_intercept(self, X):
        intercept = np.ones((X.shape[0], 1))
        return np.concatenate((intercept, X), axis=1)

    def __sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def __loss(self, h, y):
        return (-y * np.log(h) - (1 - y) * np.log(1 - h)).mean()

    def fit(self, X, y):
        if self.fit_intercept:
            X = self.__add_intercept(X)

        # weights initialization
        self.w = np.zeros(X.shape[1])

        for i in range(self.num_iter):
            z = np.dot(X, self.w)
            h = self.__sigmoid(z)
            gradient = np.dot(X.T, (h-y)) / y.size
            #print(gradient.shape, self.w.shape, z.shape,h.shape)
            #gradient = (h - y) / y.size
            self.w -= self.lr * gradient

    def predict_prob(self, X):
        if self.fit_intercept:
            X = self.__add_intercept(X)

        return self.__sigmoid(np.dot(X, self.w))

    def predict(self, X, threshold):
        return self.predict_prob(X) >= threshold
    def score(self, y_pred,y_test):
        return float(sum(y_pred == y_test)) / float(len(y_test))


In [50]:
df = pd.read_csv("winequality-red.csv")
df.head()

#X = np.array([[1,2],[1,3],[1,4],[1,5]])
#y = np.array([[0],[0],[1],[1]])
#ourRegression=LogisticRegression(alpha=0.01,iterations=10000)

#w, J_history = ourRegression.gradient_descent(X, y)

#print("W encontrado por gradiente descendente: ")
#print(w)
    


Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11,34,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25,67,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15,54,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17,60,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11,34,0.9978,3.51,0.56,9.4,5


In [106]:
#Feature Engineering
#Todas las caracteristicas que ofrece el set de datos se conseideraron importantes 
#para determinar si el vino es bueno o malo

features=['fixed acidity','volatile acidity','citric acid','residual sugar','chlorides','free sulfur dioxide','total sulfur dioxide','density','pH','sulphates','alcohol']
y = df.quality
Outcome = []

for i in y:
    if(y[i] <= 5):
        Outcome.append(0)
    else:
        Outcome.append(1)
        
df['Outcome'] = Outcome
X=df[features]
y=df.Outcome

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.50,random_state=100)

In [135]:
model = OurLogisticRegression(lr=0.03, num_iter=1000)

# fit the model to the training data
model.fit(X_train, y_train)



In [136]:
# predict probabilities for test set
probs = model.predict_prob(X_test)

# predict classes for test set
y_pred = model.predict(X_test, 0.5)

In [137]:
#print(y_pred)
#print(y_test)

print("Precisión: ",model.score(y_pred,y_test))


Precisión:  0.86375


In [144]:
#Usando la libreria Hyperopt para la optimizacion del modelo
# remove missing values
#df = df.dropna()

# build the model
model = OurLogisticRegression(lr=0.03, num_iter=100)
model.fit(X_train, y_train)
probs = model.predict_prob(X_test)
y_pred = model.predict(X_test, 0.5)

# print out the score accuracy
print("Accuracy:", model.score(y_test, y_pred))

def objective(args):
    model = OurLogisticRegression(lr=0.03, num_iter=100)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test, 0.5)
    accuracy = accuracy_score(y_test, y_pred)
    return {'loss': -accuracy, 'status': STATUS_OK}

search_space = hp.randint('n_estimators', 0, 1000)
algorithm = tpe.suggest
best_params = fmin(
  fn=objective,
  space=search_space,
  algo=algorithm,
  max_evals=200)


print(best_params)

Accuracy: 0.31375


  'criterion':hp.choice('criterion'['gini','entropy']),
  'criterion':hp.choice('criterion'['gini','entropy']),
  'criterion':hp.choice('criterion'['gini','entropy']),


TypeError: string indices must be integers