In [1]:
import joblib
from pathlib import Path
import kachery_p2p as kp
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from utils import parse_sf_results

In [2]:
models_path = Path('models')
models_path.mkdir(exist_ok=True)

In [3]:
# Load in the data from the sf url
khash = 'sha1://b3444629251cafda919af535f0e9837279151c6e/spikeforest-full-gt-qm.json?manifest=cf73c99d06c11e328e635e14dc24b8db7372db3d'
sf_data = kp.load_json(khash)

In [17]:
available_studies = set([entry['studyName'] for entry in sf_data])

In [18]:
# Exclude paired, neurocube, neuronexus and mea_c30
studies_to_exclude = set([entry['studyName'] for entry in sf_data if 'paired' in entry['studyName']
                               or 'neuronexus' in entry['studyName'] or 'neurocube' in entry['studyName']] + ['mea_c30'])

In [19]:
filtered_studies = list(available_studies - studies_to_exclude)

In [5]:
# Get metrics-fp dataset, excluding all paired studies
dataset = parse_sf_results(sf_data=sf_data, exclude_study_names=studies_to_exclude, train_test_split=True)

In [9]:
# rfc_model = RandomForestClassifier()
# rfc_model.fit(dataset['X_train'], dataset['y_train'])
#

In [10]:
# y_test_preds = rfc_model.predict(dataset['X_test'])
# rfc_f1 = f1_score(dataset['y_test'], y_test_preds)
#
# print(f'Random Forest F1-Score is {rfc_f1}')

In [None]:
svc_model = make_pipeline(StandardScaler(), SVC(kernel='linear'))

svc_model.fit(dataset['X_train'], dataset['y_train'])

In [None]:
y_test_preds = svc_model.predict(dataset['X_test'])
svc_f1 = f1_score(dataset['y_test'], y_test_preds)

print(f'Linear SVC F1-Score is {svc_f1}')

In [None]:
# joblib.dump(rfc_model, models_path / 'rfc_general_clf.joblib')
joblib.dump(svc_model, models_path / 'svc_general_clf.joblib')


In [None]:
knn_model = KNeighborsClassifier()
knn_model.fit(dataset['X_train'], dataset['y_train'])

In [None]:
y_test_preds = knn_model.predict(dataset['X_test'])
knn_f1 = f1_score(dataset['y_test'], y_test_preds)
print(f'KNN F1-Score is {knn_f1}')

In [None]:
joblib.dump(knn_model, models_path / 'knn_general_clf.joblib')



