In [1]:
# Notes on Logistic regression and SVM
### Logistic Regression ###

# First draft
#  1) Load the Iris dataset
#  2) Set Test and Training data
#  3) Train the logistic regression model with Train dataset
#  4) Test the precision using Test dataset

# Steps as per standard processes
#  1) Load dataset
#  2) Explore the dataset
#  3) 4 Step Modelling pattern
#    a) Import the model
#    b) Make an instance of the model
#    c) Train the model using train dataset
#    d) Predict using test dataset
#  4) Check the performance
#    a) Score method in sklearn
#    b) Confusion matrix

### SVM ###

# First draft
# 1) Load the iris dataset
# 2) Split into test and train
# 3) Train the model
# 4) Test the performance of the model
# 5) Tweak the model by changing the parameters(Regularization, Gamma etc)
# 6) Observe the changes and ponder on the reasons for the change

# Steps followed
# 1) 4-step process
# 2) The difference in SVM was the part were we could set kernel type - linear or non-linear

In [2]:
# Load data 

from sklearn.datasets import load_iris
iris = load_iris()

In [3]:
# Split the dataset into test and train datasets

from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.60, random_state=0)

In [4]:
# Import and create an instance of your model(Logistic regression)

from sklearn.linear_model import LogisticRegression
logisticRegr = LogisticRegression()

# getAttr of logisticRegr #
# keras wrapper(for scikit learn) #
# Checkout winML repo/source code #

In [5]:
# Train your model using the training dataset

logisticRegr.fit(x_train,y_train)



LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='warn',
          tol=0.0001, verbose=0, warm_start=False)

In [6]:
# Predict the output 

predictions = logisticRegr.predict(x_test)
print(predictions)

[2 1 0 2 0 2 0 1 1 1 2 1 1 1 2 0 2 2 0 0 2 2 0 0 2 0 0 1 1 0 2 2 0 2 2 2 0
 2 2 1 2 0 2 0 0 1 2 2 2 2 1 2 2 1 2 2 2 2 1 2 2 0 2 1 1 1 2 2 0 0 2 1 0 0
 1 0 2 1 0 1 2 1 0 2 2 2 2 0 0 2]


In [7]:
score = logisticRegr.score(x_test,y_test)
print(score)

0.8777777777777778


In [8]:
### SVM ###

## Using support vector classifier(SVC) ##

from sklearn.svm import SVC
svclassifier = SVC(kernel='linear')
svclassifier.fit(x_train,y_train)
prediction = svclassifier.predict(x_test)

score = svclassifier.score(x_test,y_test)
print(score)

# Evaluating the algorithm

from sklearn.metrics import classification_report, confusion_matrix
print(classification_report(y_test,prediction))
print(confusion_matrix(y_test,prediction))

0.9666666666666667
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        26
           1       0.94      0.97      0.96        33
           2       0.97      0.94      0.95        31

   micro avg       0.97      0.97      0.97        90
   macro avg       0.97      0.97      0.97        90
weighted avg       0.97      0.97      0.97        90

[[26  0  0]
 [ 0 32  1]
 [ 0  2 29]]


In [9]:
## Testing WinML tools ##
from winmltools import convert_sklearn
from onnxmltools.convert.common.data_types import FloatTensorType
linear_svr_onnx = convert_sklearn(logisticRegr, name='LinearSVR',
                                  input_features=[('input', FloatTensorType([1, 2]))])
print(linear_svr_onnx)

# Qs #
# What do you mean by "name" parameter being used by mlgen for class names and variables?
# How to change the tensor type?
# Functionality of convert_sklearn - check source code
# Functionality of save_model

simple model: <class 'sklearn.linear_model.logistic.LogisticRegression'> 
ir_version: 3
producer_name: "OnnxMLTools"
producer_version: "1.2.2.0129"
domain: "onnxml"
model_version: 0
doc_string: ""
graph {
  node {
    input: "input"
    output: "label"
    output: "probability_tensor"
    name: "LinearClassifier"
    op_type: "LinearClassifier"
    attribute {
      name: "classlabels_ints"
      ints: 0
      ints: 1
      ints: 2
      type: INTS
    }
    attribute {
      name: "coefficients"
      floats: 0.37919744849205017
      floats: 1.1664749383926392
      floats: -1.8756707906723022
      floats: -0.7896668910980225
      floats: 0.3149051368236542
      floats: -1.332205891609192
      floats: 0.395672470331192
      floats: -0.7511692047119141
      floats: -1.2906813621520996
      floats: -0.7040302753448486
      floats: 1.745176076889038
      floats: 1.491852045059204
      type: FLOATS
    }
    attribute {
      name: "intercepts"
      floats: 0.23969756066799164

In [10]:
### SVM using non-linear classifier ###

## Gaussian ##
## Kernel SVM ##
## Sigmoid ##