# Building Logistic Regression Model

In [1]:
#import the packages
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression

In [2]:
#Load the Data
iris_data = load_iris()
features = iris_data.data 
target = iris_data.target

feature_names = iris_data.feature_names
target_names = iris_data.target_names

In [3]:
#manipulate the data
df_iris = pd.DataFrame(features, columns=feature_names)
df_iris.rename(columns={
        "sepal length (cm)": "sepal_length",
        "sepal width (cm)": "sepal_width",
        "petal length (cm)": "petal_length",
        "petal width (cm)": "petal_width"
    }, inplace=True)
df_iris["class"] = target
df_iris["flower"] = df_iris["class"].map(lambda val: target_names[val])

In [4]:
df_iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,class,flower
0,5.1,3.5,1.4,0.2,0,setosa
1,4.9,3.0,1.4,0.2,0,setosa
2,4.7,3.2,1.3,0.2,0,setosa
3,4.6,3.1,1.5,0.2,0,setosa
4,5.0,3.6,1.4,0.2,0,setosa


In [5]:
#Create a unique list of classes
unique_targets = df_iris["flower"].unique().tolist()
models = {}

### Let's build Logistic Regression models using one against all approach

In [6]:
#Build Model
for elem in unique_targets:
    lr = LogisticRegression()
    lr.fit(df_iris[["sepal_length", "sepal_width", "petal_length", "petal_width"]], df_iris["flower"] == elem)
    models[elem] = lr

In [7]:
models

{'setosa': LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
           intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
           penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
           verbose=0, warm_start=False),
 'versicolor': LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
           intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
           penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
           verbose=0, warm_start=False),
 'virginica': LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
           intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
           penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
           verbose=0, warm_start=False)}

In [8]:
#Create an empty data frame
testing_probs = pd.DataFrame(columns=unique_targets)

In [9]:
#Calculate the probabilities for the same data using the models built
for elem in unique_targets:
    lr = models[elem]
    ls = lr.predict_proba(df_iris[["sepal_length", "sepal_width", "petal_length", "petal_width"]])
    testing_probs[elem] = ls[:,1]

In [10]:
testing_probs.head()

Unnamed: 0,setosa,versicolor,virginica
0,0.984074,0.134585,1.2e-05
1,0.964775,0.2416,3.7e-05
2,0.976924,0.167258,3e-05
3,0.957121,0.20242,6.7e-05
4,0.985634,0.112769,1.2e-05


In [12]:
#pick the class with high
predicted_value = testing_probs.idxmax(axis = 1)

### Let's predict the value for one set of inputs

In [15]:
l = [5.1,3.5,1.4,0.2]
a = np.array([l])
testing_prob = pd.DataFrame(columns=unique_targets)

In [16]:
for elem in unique_targets:
    lr = models[elem]
    ls = lr.predict_proba(a)
    testing_prob[elem] = ls[:,1]

In [17]:
testing_prob.idxmax(axis = 1)[0]

'setosa'

## Save the model and the required python objects to use in API

In [18]:
import pickle 

In [19]:
filehandler = open("models.obj", "wb")
pickle.dump(models, filehandler)

In [23]:
filehandler = open("uniquetargets.obj", "wb")
pickle.dump(unique_targets, filehandler)

In [24]:
fl = open("uniquetargets.obj",'r')
object_file = pickle.load(fl)