# Logistic Regression Code Appendix

Resources: https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html

Python Code:

``` Python
# Import dependency
from sklearn.linear_model import LogisticRegression

# Create the logistic regression object
log = LogisticRegression()

# Train the logistic regression model
clf = log.fit(X, y)

# Predict the target class based on p > 0.5 criteria
clf.predict(X)

# Predict the probability with the training data set
clf.predict_proba(X)

# Calculate the model fit
clf.score(X, y)
```

In [122]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder

In [123]:
# Loading the fake pizza dataset from the web
pizza = pd.read_csv('https://jaredlander.com/data/Fake%20Pizza%20Data.csv')
pizza.head()

Unnamed: 0,Rating,CostPerSlice,HeatSource,BrickOven,Neighborhood
0,0.03,1.75,Gas,False,LittleItaly
1,4.89,2.75,Coal,True,SoHo
2,4.73,4.0,Wood,True,LittleItaly
3,0.13,1.75,Gas,False,LittleItaly
4,2.45,2.25,Wood,True,Chinatown


In [124]:
pizza['BrickOven'].value_counts()

False    132
True      68
Name: BrickOven, dtype: int64

In [125]:
pizza = pizza.dropna()

In [126]:
pizza.isna().sum()

Rating          0
CostPerSlice    0
HeatSource      0
BrickOven       0
Neighborhood    0
dtype: int64

In [127]:
pizza.shape

(200, 5)

In [128]:
pizza['Rating'] = pd.to_numeric(pizza['Rating'])

In [129]:
pizza.shape

(200, 5)

In [131]:
enc = LabelEncoder()
pizza['BrickOven'] = enc.fit_transform(pizza['BrickOven'])

In [132]:
pizza.head()

Unnamed: 0,Rating,CostPerSlice,HeatSource,BrickOven,Neighborhood
0,0.03,1.75,Gas,0,LittleItaly
1,4.89,2.75,Coal,1,SoHo
2,4.73,4.0,Wood,1,LittleItaly
3,0.13,1.75,Gas,0,LittleItaly
4,2.45,2.25,Wood,1,Chinatown


In [133]:
log = LogisticRegression()

In [142]:
from sklearn.model_selection import train_test_split, cross_validate
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 27)

In [152]:
#to standard scaler
from sklearn.preprocessing import StandardScaler

ss = StandardScaler()
X_train = ss.fit_transform(X_train)

In [192]:
# Prepare data for model
y = pizza[['BrickOven']]
X = pizza[['Rating']]

#C = 1 needs to be used if no standard scaler
logreg = LogisticRegression(random_state=42, penalty = 'l1', solver = 'liblinear')
logreg.fit(X, y)
logreg.predict(X)

  return f(**kwargs)


array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0])

In [193]:
clf = log.fit(X, y)

  return f(**kwargs)


In [194]:
# Train the model


In [195]:
# Create the logistic regression object

In [199]:
# Predict the class of the target
clf.predict_proba(X_test)

array([[0.64624339, 0.35375661],
       [0.71063463, 0.28936537],
       [0.63066736, 0.36933264],
       [0.63118543, 0.36881457],
       [0.68467955, 0.31532045],
       [0.63428745, 0.36571255],
       [0.63170319, 0.36829681],
       [0.63118543, 0.36881457],
       [0.6792107 , 0.3207893 ],
       [0.63066736, 0.36933264],
       [0.69357609, 0.30642391],
       [0.655009  , 0.344991  ],
       [0.63273781, 0.36726219],
       [0.6519863 , 0.3480137 ],
       [0.65030171, 0.34969829],
       [0.7107871 , 0.2892129 ],
       [0.71017692, 0.28982308],
       [0.64793696, 0.35206304],
       [0.63857735, 0.36142265],
       [0.67042328, 0.32957672],
       [0.63325466, 0.36674534],
       [0.69310305, 0.30689695],
       [0.66433226, 0.33566774],
       [0.6304946 , 0.3695054 ],
       [0.63583431, 0.36416569],
       [0.63222065, 0.36777935],
       [0.63394332, 0.36605668],
       [0.63840617, 0.36159383],
       [0.66201308, 0.33798692],
       [0.64488589, 0.35511411],
       [0.

In [202]:
from sklearn.metrics import log_loss
log_loss(y_test, clf.predict_proba(X_test))

0.6111840419734875

In [198]:
# Predict the probability of the target
