In [1]:
!pip install joblib sklearn-porter
!pip install --upgrade scikit-learn==0.22

Collecting sklearn-porter
  Using cached sklearn_porter-0.7.4-py3-none-any.whl (144 kB)
Installing collected packages: sklearn-porter
Successfully installed sklearn-porter-0.7.4
Collecting scikit-learn==0.22
  Downloading scikit_learn-0.22-cp37-cp37m-win_amd64.whl (6.2 MB)
Installing collected packages: scikit-learn
  Attempting uninstall: scikit-learn
    Found existing installation: scikit-learn 0.22.1
    Uninstalling scikit-learn-0.22.1:
      Successfully uninstalled scikit-learn-0.22.1
Successfully installed scikit-learn-0.22


In [2]:
import numpy as np # numerical analysis
import pandas as pd # main library for data analysis
import matplotlib.pyplot as plt # main library for data plotting
import joblib, pickle
from sklearn_porter import Porter

from IPython.display import display, Markdown # to display the headers

from sklearn.model_selection import train_test_split # Import train_test_split function
# Import svm model
from sklearn.svm import SVC
# Import scikit-learn metrics module for accuracy calculation
from sklearn.metrics import *

import warnings
warnings.filterwarnings('ignore') # literally to ignore all the warnings that don't matter

"""pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', -1)"""

%matplotlib inline

def printf(*args, sep=" ", end="\n"):
    string = sep.join([str(i) for i in args])+end
    display(Markdown(string))



In [3]:
df = pd.read_csv("data.csv")[["freezeX", "freezeY", "freezeZ", "freeze"]]
X = df[["freezeX", "freezeY", "freezeZ"]]
y = df.freeze
df

Unnamed: 0,freezeX,freezeY,freezeZ,freeze
0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0
...,...,...,...,...
1140830,0.0,0.0,0.0,0.0
1140831,0.0,0.0,0.0,0.0
1140832,0.0,0.0,0.0,0.0
1140833,0.0,0.0,0.0,0.0


In [4]:
# Split dataset into training set and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.30)
X_train

Unnamed: 0,freezeX,freezeY,freezeZ
1010701,0.272521,0.954653,0.179521
277137,0.000000,0.000000,0.000000
344057,0.000000,0.000000,0.000000
990705,1.127672,0.483101,0.604273
655178,0.000000,0.000000,0.000000
...,...,...,...
469488,1.440898,1.017569,0.808631
1040050,0.230493,0.396379,0.515903
1075126,0.000000,0.000000,0.000000
189307,0.000000,0.000000,0.000000


In [None]:
def plot2d(model, X, y, ax=None, plot_support=True, ylabel="", xlabel="", title=""):
    """Plot the decision function for a 2D SVC"""
    ax = ax or plt.gca()
    ax.scatter(X[:, 0], X[:, 1], c=y, s=5, cmap='winter')
    ax.set_ylabel(ylabel)
    ax.set_xlabel(xlabel)
    ax.set_title(title)

    if ax is None:
        ax = plt.gca()
    xlim = ax.get_xlim()
    ylim = ax.get_ylim()
    
    # create grid to evaluate model
    x = np.linspace(xlim[0], xlim[1], 30)
    y = np.linspace(ylim[0], ylim[1], 30)
    Y, X = np.meshgrid(y, x)
    xy = np.vstack([X.ravel(), Y.ravel()]).T
    P = model.decision_function(xy).reshape(X.shape)
    
    # plot decision boundary and margins
    ax.contour(X, Y, P, colors='k',
               levels=[-1, 0, 1], alpha=0.5,
               linestyles=['--', '-', '--'])
    
    # plot support vectors
    if plot_support:
        ax.scatter(model.support_vectors_[:, 0],
                   model.support_vectors_[:, 1],
                   s=300, linewidth=1, facecolors='none');
    ax.set_xlim(xlim)
    ax.set_ylim(ylim)

    plt.show()


data = ["freezeY", "freezeZ"]
#Create a svm Classifier
clf = SVC(kernel="linear")
# Train the model using the training sets
clf.fit(X[data], y)

# Plots
plot2d(clf, X[data].to_numpy(), y, xlabel="freeze(Y)", ylabel="freeze(Z)", title="Graph of the freeze(Y) and freeze(Z) Linear Kernel Decision Function")

# Predict the response for test dataset
y_pred = clf.predict(X[data])

# Model Accuracy: how often is the classifier correct?
tn, fp, fn, tp = confusion_matrix(y, y_pred).ravel()
p = tp/(tp+fp)
r = sen = tp/(tp+fn)
sp = tn/(tn+fp)
f1 = 2*tp/(2*tp+fn+fp)
a = (tp+tn)/(tp+tn+fp+fn)

print(classification_report(y,y_pred))

print("\n\ntp: ", tp, ", fp: ", fp, ", fn: ", fn, ", tn: ", tn)
print("Accuracy:", a, end="\n\n")

print("Sensitivity:", sen)
print("Specificity:", sp, end="\n\n")

print("Precision:", p)
print("Recall:", r)
print("F1:", f1)
print("\n\n\n")

In [None]:
# save the model to disk
with open('model.sav', 'wb+') as file: pickle.dump(clf, file)

In [None]:
# load the model from disk
with open('model.sav', 'rb') as file:  model = pickle.load(file)

# Plots
try:
  plot2d(model, X[data].to_numpy(), y, xlabel="freeze(Y)", ylabel="freeze(Z)", title="Graph of the freeze(Y) and freeze(Z) Linear Kernel Decision Function")
except: pass

# Predict the response for test dataset
y_pred = model.predict(X[data])

# Model Accuracy: how often is the classifier correct?
tn, fp, fn, tp = confusion_matrix(y, y_pred).ravel()
p = tp/(tp+fp)
r = sen = tp/(tp+fn)
sp = tn/(tn+fp)
f1 = 2*tp/(2*tp+fn+fp)
a = (tp+tn)/(tp+tn+fp+fn)

print(classification_report(y,y_pred))

print("\n\ntp: ", tp, ", fp: ", fp, ", fn: ", fn, ", tn: ", tn)
print("Accuracy:", a, end="\n\n")

print("Sensitivity:", sen)
print("Specificity:", sp, end="\n\n")

print("Precision:", p)
print("Recall:", r)
print("F1:", f1)
print("\n\n\n")

In [None]:
# Export:
porter = Porter(model, language='java')
output = porter.export(embed_data=True)
print(output)