# Logistic Regression parameters

In [12]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import datasets, metrics
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_val_predict
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler

# import data
df_beijing = pd.read_csv('data/Beijing.csv', delimiter= ",",header=0)

#mean normalization function
def feature_normalize(df):
    df_norm = (df - df.mean()) / df.std()
    return df_norm

#variable initialization
X_beijing = df_beijing.iloc[:,1:14] 
y_beijing = df_beijing.iloc[:,15]
X_beijing = feature_normalize(X_beijing)

X = X_beijing.as_matrix()
y = y_beijing.as_matrix()
y = y.reshape(19062)
print(X.shape)
scaler = StandardScaler()
X = scaler.fit_transform(X)


#function for grid search
class CustomGridCV(object):
    def __init__(self, X, y, model, metric, griddata, cv=5):
        self.X = X
        self.y = y
        self.model = model
        self.metric = metric
        self.params = self.gridpoints(griddata)
        self.cv = cv
        self.bestScore = None
        self.bestParams = None
        
    def gridpoints(self, data):
        newparams = [{}]
        for k in data.keys():
            params = newparams
            newparams = []
            for v in data[k]:
                for param in params:
                    item = param.copy()
                    item[k]=v
                    newparams.append(item)           
        return newparams
    
    def GridSearch(self):
        for param in self.params:
            self.model.set_params(**param)
            score = self.KFoldScore()
            if self.bestScore==None or self.bestScore<score:
                self.bestScore = score
                self.bestParams = param
            print("Score: {0:.5f}, Params: {1}".format(score,param))
    
    def KFoldScore(self):
        #kf = KFold(n_splits=5, shuffle=True, random_state=2)
        kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=2)
        y_pred = np.zeros(len(self.y))

        for train_index, test_index in kf.split(self.X,self.y):
            train_X, test_X = self.X[train_index], self.X[test_index]
            train_y, test_y = self.y[train_index], self.y[test_index]
            self.model.fit(train_X,train_y)
            #y_pred[test_index] = self.model.predict_proba(test_X)[:,1]
            y_pred[test_index] = self.model.predict(test_X)
            #print y_pred[test_index]

        return self.metric(self.y,y_pred)
    
    def Best(self):
        return self.bestScore, self.bestParams
    

(19062, 13)


In [14]:
model = LogisticRegression()
griddata = {"C": np.arange(0.01,10,0.01)
            }

# Grid Search for the best parameters
GCV = CustomGridCV(X, y, model, accuracy_score, griddata)
GCV.GridSearch()

print("Best Params:")
print(GCV.Best())

Score: 0.47482, Params: {'C': 0.01}
Score: 0.47597, Params: {'C': 0.02}
Score: 0.47639, Params: {'C': 0.029999999999999999}
Score: 0.47739, Params: {'C': 0.040000000000000001}
Score: 0.47755, Params: {'C': 0.050000000000000003}
Score: 0.47807, Params: {'C': 0.060000000000000005}
Score: 0.47828, Params: {'C': 0.069999999999999993}
Score: 0.47818, Params: {'C': 0.080000000000000002}
Score: 0.47870, Params: {'C': 0.089999999999999997}
Score: 0.47886, Params: {'C': 0.099999999999999992}
Score: 0.47886, Params: {'C': 0.11}
Score: 0.47923, Params: {'C': 0.12}
Score: 0.47938, Params: {'C': 0.13}
Score: 0.47975, Params: {'C': 0.14000000000000001}
Score: 0.47975, Params: {'C': 0.15000000000000002}
Score: 0.47975, Params: {'C': 0.16}
Score: 0.48007, Params: {'C': 0.17000000000000001}
Score: 0.48027, Params: {'C': 0.18000000000000002}
Score: 0.48038, Params: {'C': 0.19}
Score: 0.48075, Params: {'C': 0.20000000000000001}
Score: 0.48069, Params: {'C': 0.21000000000000002}
Score: 0.48054, Params: {'

Score: 0.48022, Params: {'C': 1.79}
Score: 0.48022, Params: {'C': 1.8}
Score: 0.48017, Params: {'C': 1.8100000000000001}
Score: 0.48017, Params: {'C': 1.8200000000000001}
Score: 0.48017, Params: {'C': 1.8300000000000001}
Score: 0.48017, Params: {'C': 1.8400000000000001}
Score: 0.48017, Params: {'C': 1.8500000000000001}
Score: 0.48017, Params: {'C': 1.8600000000000001}
Score: 0.48017, Params: {'C': 1.8700000000000001}
Score: 0.48017, Params: {'C': 1.8800000000000001}
Score: 0.48017, Params: {'C': 1.8900000000000001}
Score: 0.48022, Params: {'C': 1.9000000000000001}
Score: 0.48022, Params: {'C': 1.9100000000000001}
Score: 0.48022, Params: {'C': 1.9200000000000002}
Score: 0.48022, Params: {'C': 1.9299999999999999}
Score: 0.48022, Params: {'C': 1.9399999999999999}
Score: 0.48022, Params: {'C': 1.95}
Score: 0.48022, Params: {'C': 1.96}
Score: 0.48022, Params: {'C': 1.97}
Score: 0.48022, Params: {'C': 1.98}
Score: 0.48022, Params: {'C': 1.99}
Score: 0.48017, Params: {'C': 2.0}
Score: 0.48012

Score: 0.47996, Params: {'C': 3.54}
Score: 0.47996, Params: {'C': 3.5499999999999998}
Score: 0.47996, Params: {'C': 3.5600000000000001}
Score: 0.47996, Params: {'C': 3.5699999999999998}
Score: 0.47996, Params: {'C': 3.5800000000000001}
Score: 0.47996, Params: {'C': 3.5899999999999999}
Score: 0.47996, Params: {'C': 3.5999999999999996}
Score: 0.47996, Params: {'C': 3.6099999999999999}
Score: 0.48001, Params: {'C': 3.6199999999999997}
Score: 0.48001, Params: {'C': 3.6299999999999999}
Score: 0.48001, Params: {'C': 3.6399999999999997}
Score: 0.48001, Params: {'C': 3.6499999999999999}
Score: 0.48001, Params: {'C': 3.6599999999999997}
Score: 0.48001, Params: {'C': 3.6699999999999999}
Score: 0.48001, Params: {'C': 3.6799999999999997}
Score: 0.48001, Params: {'C': 3.6899999999999999}
Score: 0.48001, Params: {'C': 3.6999999999999997}
Score: 0.48001, Params: {'C': 3.71}
Score: 0.48001, Params: {'C': 3.7199999999999998}
Score: 0.48001, Params: {'C': 3.73}
Score: 0.48001, Params: {'C': 3.7399999999

Score: 0.48012, Params: {'C': 5.25}
Score: 0.48012, Params: {'C': 5.2599999999999998}
Score: 0.48012, Params: {'C': 5.2699999999999996}
Score: 0.48012, Params: {'C': 5.2800000000000002}
Score: 0.48012, Params: {'C': 5.29}
Score: 0.48012, Params: {'C': 5.2999999999999998}
Score: 0.48012, Params: {'C': 5.3099999999999996}
Score: 0.48012, Params: {'C': 5.3200000000000003}
Score: 0.48012, Params: {'C': 5.3300000000000001}
Score: 0.48012, Params: {'C': 5.3399999999999999}
Score: 0.48012, Params: {'C': 5.3499999999999996}
Score: 0.48012, Params: {'C': 5.3600000000000003}
Score: 0.48017, Params: {'C': 5.3700000000000001}
Score: 0.48017, Params: {'C': 5.3799999999999999}
Score: 0.48017, Params: {'C': 5.3899999999999997}
Score: 0.48017, Params: {'C': 5.3999999999999995}
Score: 0.48017, Params: {'C': 5.4100000000000001}
Score: 0.48017, Params: {'C': 5.4199999999999999}
Score: 0.48017, Params: {'C': 5.4299999999999997}
Score: 0.48017, Params: {'C': 5.4399999999999995}
Score: 0.48017, Params: {'C'

Score: 0.48017, Params: {'C': 6.9500000000000002}
Score: 0.48017, Params: {'C': 6.96}
Score: 0.48017, Params: {'C': 6.9699999999999998}
Score: 0.48017, Params: {'C': 6.9799999999999995}
Score: 0.48017, Params: {'C': 6.9900000000000002}
Score: 0.48017, Params: {'C': 7.0}
Score: 0.48017, Params: {'C': 7.0099999999999998}
Score: 0.48017, Params: {'C': 7.0199999999999996}
Score: 0.48017, Params: {'C': 7.0300000000000002}
Score: 0.48017, Params: {'C': 7.04}
Score: 0.48017, Params: {'C': 7.0499999999999998}
Score: 0.48017, Params: {'C': 7.0599999999999996}
Score: 0.48017, Params: {'C': 7.0700000000000003}
Score: 0.48017, Params: {'C': 7.0800000000000001}
Score: 0.48017, Params: {'C': 7.0899999999999999}
Score: 0.48017, Params: {'C': 7.0999999999999996}
Score: 0.48017, Params: {'C': 7.1100000000000003}
Score: 0.48017, Params: {'C': 7.1200000000000001}
Score: 0.48017, Params: {'C': 7.1299999999999999}
Score: 0.48017, Params: {'C': 7.1399999999999997}
Score: 0.48017, Params: {'C': 7.15000000000

Score: 0.48007, Params: {'C': 8.6400000000000006}
Score: 0.48007, Params: {'C': 8.6500000000000004}
Score: 0.48007, Params: {'C': 8.6600000000000001}
Score: 0.48007, Params: {'C': 8.6699999999999999}
Score: 0.48001, Params: {'C': 8.6799999999999997}
Score: 0.48001, Params: {'C': 8.6899999999999995}
Score: 0.48001, Params: {'C': 8.6999999999999993}
Score: 0.48001, Params: {'C': 8.7100000000000009}
Score: 0.48001, Params: {'C': 8.7200000000000006}
Score: 0.48001, Params: {'C': 8.7300000000000004}
Score: 0.48001, Params: {'C': 8.7400000000000002}
Score: 0.48001, Params: {'C': 8.75}
Score: 0.48001, Params: {'C': 8.7599999999999998}
Score: 0.48001, Params: {'C': 8.7699999999999996}
Score: 0.48001, Params: {'C': 8.7799999999999994}
Score: 0.48001, Params: {'C': 8.7899999999999991}
Score: 0.48001, Params: {'C': 8.8000000000000007}
Score: 0.48001, Params: {'C': 8.8100000000000005}
Score: 0.48001, Params: {'C': 8.8200000000000003}
Score: 0.48001, Params: {'C': 8.8300000000000001}
Score: 0.48001

In [None]:
model = model = LogisticRegression(C=0.44)
trained_model = model.fit(X, y)
predictions = trained_model.predict(X_test)
print('Test accuracy is \n',accuracy_score(y_test, predictions))
print(metrics.classification_report(y_test, predictions))