In [68]:
import turicreate as tc
import numpy as np

In [69]:
tc.__version__

'6.4.1'

In [70]:
from pymongo import MongoClient

In [71]:
client = MongoClient(serverSelectionTimeoutMS=50)
db = client.turidatabase

In [72]:
db.labeledinstances.find({'dsid': 4})

<pymongo.cursor.Cursor at 0x7fc862c8d910>

In [73]:
db

Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True, serverselectiontimeoutms=50), 'turidatabase')

In [74]:
def get_dataset_data(dsid):
    features = []
    labels = []
    
    for a in db.labeledinstances.find({"dsid": dsid}):
        features.append([float(val) for val in a['feature']])
        labels.append(a['label'])
        
    data = {'target': labels, 'sequence':np.array(features)}
    
    return data


def get_dataset_sframe(dsid):
    data = get_dataset_data(dsid)
    return tc.SFrame(data=data)


In [75]:
dsid_4_sframe = get_dataset_sframe(4)

## Create a defualt model

In [105]:
moel = tc.classifier.create(dsid_4_sframe, target='target', verbose=False)

In [78]:
db

Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True, serverselectiontimeoutms=50), 'turidatabase')

In [79]:
db.trainedmodels.update({"dsid": 1}, {"$set": {"ds_id":1, "ACC:": 0.99, "path": 'path13'}}, upsert=True)

  """Entry point for launching an IPython kernel.


{'n': 1,
 'upserted': ObjectId('618c9dfe14e5094753fd1367'),
 'nModified': 0,
 'ok': 1.0,
 'updatedExisting': False}

In [101]:
doc = db.trainedmodels.find_one({"dsid":4})
doc["count"]

1866

In [102]:
doc["acc_mlp"]

0.9941050375133976

In [103]:
doc

{'_id': ObjectId('618f0adf14e5094753ff8d61'),
 'dsid': 4,
 'acc_mlp': 0.9941050375133976,
 'acc_turi': 0.9914255091103966,
 'count': 1866,
 'path_mlp': '../models/mlp_model_dsid4',
 'path_turi': '../models/mlp_model_dsid4'}

In [100]:
doc["acc_mlp"]

KeyError: 'acc_mlp'

## Try a MLP from SKLearn

In [106]:
data = get_dataset_data(4)
X = data['sequence']

encode_rotation = {'x90':0,
                  'xNeg90':1,
                  'x180':2,
                  'xNeg180':3,
                  'y90':4,
                  'yNeg90':5,
                  'y180':6,
                  'yNeg180':7,
                  'z90':8,
                  'zNeg90':9,
                  'z180':10,
                  'zNeg180':11}

y = np.array([ encode_rotation[s] for s in data['target']])

In [108]:
from sklearn import __version__ as sklearn_version
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y)

In [109]:

def get_mlp_1(x, y, ephocs=150):
    clf = MLPClassifier(hidden_layer_sizes=(50, 25), 
                        activation='relu',
                        solver='adam',
                        alpha=1e-4, # L2 penalty
                        batch_size='auto', # min of 200, num_samples
                        learning_rate='constant', 
                        #learning_rate_init=0.2, # only SGD
                        #power_t=0.5,    # only SGD
                        max_iter=ephocs, 
                        shuffle=True, 
                        random_state=1, 
                        tol=1e-9, # for stopping
                        verbose=False, 
                        warm_start=False, 
                        momentum=0.9, # only SGD
                        #nesterovs_momentum=True, # only SGD
                        early_stopping=False, 
                        validation_fraction=0.1, # only if early_stop is true
                        beta_1=0.9, # adam decay rate of moment
                        beta_2=0.999, # adam decay rate of moment
                        epsilon=1e-08) # adam numerical stabilizer
    clf.fit(x, y)
    return clf

In [110]:

%time clf = get_mlp_1(x_train, y_train)

CPU times: user 9.56 s, sys: 1.17 s, total: 10.7 s
Wall time: 1.44 s




In [111]:
yhat = clf.predict(x_train)
print('Validation Acc:',accuracy_score(yhat,y_train))
yhat = clf.predict(x_test)
print('Testing Acc:',accuracy_score(yhat,y_test))

Validation Acc: 0.9986595174262735
Testing Acc: 0.9010695187165776


### Try on KFolds

In [112]:

from sklearn.model_selection import StratifiedKFold
data = get_dataset_data(4)
X = data['sequence']
y = np.array([ encode_rotation[s] for s in data['target']])

In [113]:

skf = StratifiedKFold(n_splits=10, random_state=1, shuffle=True)

accs = []
for train, test in skf.split(X,y):
    x_train, x_test = X[train], X[test]
    y_train, y_test = y[train], y[test]
    
    clf = get_mlp_1(x_train, y_train, ephocs=500)
    yhat = clf.predict(x_test)
    accs.append(accuracy_score(yhat, y_test))
    
for a in accs:
    print("Acc:", a)



Acc: 0.8502673796791443
Acc: 0.8609625668449198
Acc: 0.8983957219251337
Acc: 0.8609625668449198
Acc: 0.93048128342246
Acc: 0.8235294117647058
Acc: 0.8978494623655914
Acc: 0.8817204301075269
Acc: 0.9193548387096774
Acc: 0.8924731182795699




In [115]:
np.mean(accs)

0.8815996779943648

In [116]:
## Attempt 2

In [162]:

def get_mlp_2(x, y, ephocs=150):
    clf = MLPClassifier(hidden_layer_sizes=(100, 50), 
                        activation='relu',
                        solver='sgd', 
                        alpha=1e-4, # L2 penalty
                        batch_size='auto', # min of 200, num_samples
                        learning_rate='constant', 
                        learning_rate_init=0.5, # only SGD
                        power_t=0.2,    # only SGD
                        max_iter=ephocs, 
                        shuffle=True, 
                        random_state=1, 
                        tol=1e-9, # for stopping
                        verbose=False, 
                        warm_start=False, 
                        momentum=0.9, # only SGD
                        #nesterovs_momentum=True, # only SGD
                        early_stopping=False, 
                        validation_fraction=0.1, # only if early_stop is true
#                        beta_1=0.9, # adam decay rate of moment
#                        beta_2=0.999, # adam decay rate of moment
                        epsilon=1e-08) # adam numerical stabilizer
    clf.fit(x, y)
    return clf

In [163]:
%%time
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y)
clf = get_mlp_2(x_train, y_train)
yhat = clf.predict(x_train)
print('valid acc:', accuracy_score(yhat, y_train))
yhat = clf.predict(x_test)
print('test Acc:',accuracy_score(yhat,y_test))

valid acc: 0.9792225201072386
test Acc: 0.8903743315508021
CPU times: user 3.08 s, sys: 341 ms, total: 3.42 s
Wall time: 456 ms


In [164]:
%%time
skf = StratifiedKFold(n_splits=10, random_state=1, shuffle=True)

accs = []
for train, test in skf.split(X,y):
    x_train, x_test = X[train], X[test]
    y_train, y_test = y[train], y[test]
    
    clf = get_mlp_2(x_train, y_train)
    yhat = clf.predict(x_test)
    accs.append(accuracy_score(yhat, y_test))
    
for a in accs:
    print("Acc:", a)
print("Mean:",np.mean(accs))



Acc: 0.8770053475935828
Acc: 0.9037433155080213
Acc: 0.8877005347593583
Acc: 0.8716577540106952
Acc: 0.8609625668449198
Acc: 0.8128342245989305
Acc: 0.8548387096774194
Acc: 0.8225806451612904
Acc: 0.8817204301075269
Acc: 0.9032258064516129
Mean: 0.8676269334713357
CPU times: user 50.5 s, sys: 5.4 s, total: 55.9 s
Wall time: 7.25 s


## Try Model 1 with a PCA 

In [127]:
%%time
from sklearn.decomposition import PCA

x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y)

pca = PCA(n_components=150)
pca.fit(x_train)
x_pca = pca.transform(x_train)

clf = get_mlp_2(x_pca, y_train)

x_test_pca = pca.transform(x_test)

yhat = clf.predict(x_pca)
print('Validation Acc:', accuracy_score(yhat, y_train) )
yhat = clf.predict(x_test_pca)
print('Test Acc:',accuracy_score(yhat,y_test))

Validation Acc: 1.0
Test Acc: 0.8636363636363636
CPU times: user 12.4 s, sys: 1.43 s, total: 13.8 s
Wall time: 1.82 s




In [138]:
%%time
skf = StratifiedKFold(n_splits=10, random_state=1, shuffle=True)

accs = []
for train, test in skf.split(X,y):
    x_train, x_test = X[train], X[test]
    y_train, y_test = y[train], y[test]
    
    pca = PCA(n_components=150)
    pca.fit(x_train)
    x_train_pca = pca.transform(x_train)
    
    clf = get_mlp_2(x_train_pca, y_train, ephocs=150)
    
    x_test_pca = pca.transform(x_test)
    
    yhat = clf.predict(x_test_pca)
    
    accs.append(accuracy_score(yhat, y_test))
    
for a in accs:
    print("Acc:", a)
print("Mean:",np.mean(accs))



Acc: 0.8663101604278075
Acc: 0.8770053475935828
Acc: 0.9144385026737968
Acc: 0.8556149732620321
Acc: 0.9144385026737968
Acc: 0.8716577540106952
Acc: 0.8924731182795699
Acc: 0.8655913978494624
Acc: 0.8817204301075269
Acc: 0.8924731182795699
Mean: 0.8831723305157839
CPU times: user 2min 18s, sys: 16.7 s, total: 2min 35s
Wall time: 20.6 s




## Train a model to save with CoreML

In [165]:
import coremltools



In [169]:
%%time
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y)
clf = get_mlp_2(x_train, y_train)
yhat = clf.predict(x_train)
print('valid acc:', accuracy_score(yhat, y_train))
yhat = clf.predict(x_test)
print('test Acc:',accuracy_score(yhat,y_test))

valid acc: 1.0
test Acc: 0.8983957219251337
CPU times: user 14.3 s, sys: 1.64 s, total: 16 s
Wall time: 2.12 s




In [None]:
coreml_model = coremltools.converters.sklearn.convert(clf,
                                                     [ i for i in range(0,300)],
                                                     "rotation")

In [181]:
from sklearn.linear_model import LinearRegression

In [182]:
model = LinearRegression()

In [183]:
model.fit(X, y)

LinearRegression()

In [188]:
yhat = [round(y_i) for y_i in model.predict(X)]
print('valid acc:', accuracy_score(yhat, y))

valid acc: 0.1345123258306538


In [186]:
yhat

array([ 4.73912917, -1.08712252, -0.959817  , ...,  6.68452897,
        5.10036997, 10.00287016])

In [187]:
round(4.7)

5

In [189]:
coreml_model = coremltools.converters.sklearn.convert(model)

ImportError: cannot import name 'Imputer' from 'sklearn.preprocessing' (/Users/nicholaslarsen/opt/anaconda3/envs/mlenv2020/lib/python3.7/site-packages/sklearn/preprocessing/__init__.py)