In [1]:
# python imports
import os
import numpy as np
import pandas as pd
import scipy.io
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import argparse
import logging


In [2]:
path_to_classifier = os.path.dirname(os.getcwd())
os.chdir(path_to_classifier)

# ts_class packages, imported from the downloaded Github repository
from activity_classifier.main import run_model
from activity_classifier.config import TSF_MODEL, RISE_MODEL, OBS, PREDICTION, OUTPUT_PATH
from activity_classifier.prepare_data import prepare_data
from activity_classifier.retrain_models import retrain_tsf, retrain_rise
from math import floor
from copy import deepcopy
from reproducibility.repeated_training import run_many_classifiers, run_many_classifiers_shuffle

# Make sure the path is right after all the imports are done
os.chdir(path_to_classifier)

In [3]:
# Setting up experiment details and file paths
sampling_rate = 3.65 # frames per second
duration = 300 # seconds

# Main folder where the outputs should go
output_folder_path = "reproducibility"
data_folder_path = "data"
training_file_path = "data/training_data.csv"
training_file_path_shuf = "data/training_data_shuffled.csv"
data_files = ['training_data.csv','test_data.csv'] # dF/F files, one per animal ("experiment"), with each ROI as a row and each time point as a column. 
experiments = ['training_data','test_data']
has_labels = [True,False] # if experiment has ground truth lidocaine data and those labels are in the csv file

In [4]:
dFoverFs ={}
predictions ={}
for experiment,data_file,labelled in zip(experiments,data_files,has_labels):
    dFoverFs[experiment]=pd.read_csv(os.path.join(data_folder_path,data_file), header=0).iloc[:,:-1].values
    if labelled:
        predictions[experiment]=pd.read_csv(os.path.join(data_folder_path,data_file), header=0).iloc[:,-1].values

In [5]:
# Give each prediction a number to keep track of each and allow for training them in batches
# Predictions from each repeat will be saved in a pickel file with format: SA_classifier_predictions'+str(loop)+'.pickle'

# Warning: This version of the model does not support paralellising as it relies on intermediate files for interpolation
# For that reason, the repeats should be run sequentially with the current version - I plan to change it in the future releases

start_loop=0
end_loop=5

run_many_classifiers(experiments,dFoverFs, duration, sampling_rate, training_file_path, output_folder_path, start_loop, end_loop)
run_many_classifiers_shuffle(experiments,dFoverFs, duration, sampling_rate, training_file_path_shuf, output_folder_path, start_loop, end_loop)

RESULTS RISE: {'ACCURACY': 0.9724781375215699, 'PRECISION': 0.9564671037497124, 'RECALL': 0.9078422859933741}
VARIABILITY RESULTS RISE: {'ACCURACY': 0.006116601824199095, 'PRECISION': 0.02974193649273478, 'RECALL': 0.04082910903418303}
MIN RESULTS RISE: {'ACCURACY': 0.9655172413793104, 'PRECISION': 0.9259259259259259, 'RECALL': 0.8392857142857143}
MAX RESULTS RISE: {'ACCURACY': 0.9808429118773946, 'PRECISION': 1.0, 'RECALL': 0.9433962264150944}
training_data


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[OBS] = [pd.Series(interpolate_data(row, seconds, end_frame_rate)) for row in np.array(data)]


test_data


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[OBS] = [pd.Series(interpolate_data(row, seconds, end_frame_rate)) for row in np.array(data)]


RESULTS RISE: {'ACCURACY': 0.9732531952853088, 'PRECISION': 0.9572201853623035, 'RECALL': 0.9149352396325762}
VARIABILITY RESULTS RISE: {'ACCURACY': 0.017566260865561373, 'PRECISION': 0.030040350139314644, 'RECALL': 0.054520305819801745}
MIN RESULTS RISE: {'ACCURACY': 0.9427480916030534, 'PRECISION': 0.9074074074074074, 'RECALL': 0.8305084745762712}
MAX RESULTS RISE: {'ACCURACY': 0.9923664122137404, 'PRECISION': 0.9838709677419355, 'RECALL': 0.9761904761904762}
training_data


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[OBS] = [pd.Series(interpolate_data(row, seconds, end_frame_rate)) for row in np.array(data)]


test_data


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[OBS] = [pd.Series(interpolate_data(row, seconds, end_frame_rate)) for row in np.array(data)]


RESULTS RISE: {'ACCURACY': 0.9732473457927524, 'PRECISION': 0.9604472823612203, 'RECALL': 0.9102079567748369}
VARIABILITY RESULTS RISE: {'ACCURACY': 0.009642022194003117, 'PRECISION': 0.013231319529511995, 'RECALL': 0.04093451284604104}
MIN RESULTS RISE: {'ACCURACY': 0.9541984732824428, 'PRECISION': 0.9411764705882353, 'RECALL': 0.8305084745762712}
MAX RESULTS RISE: {'ACCURACY': 0.9809160305343512, 'PRECISION': 0.9827586206896551, 'RECALL': 0.9411764705882353}
training_data


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[OBS] = [pd.Series(interpolate_data(row, seconds, end_frame_rate)) for row in np.array(data)]


test_data


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[OBS] = [pd.Series(interpolate_data(row, seconds, end_frame_rate)) for row in np.array(data)]


RESULTS RISE: {'ACCURACY': 0.9717060045041093, 'PRECISION': 0.9560862450068516, 'RECALL': 0.9037244568425787}
VARIABILITY RESULTS RISE: {'ACCURACY': 0.01421765885224032, 'PRECISION': 0.012066514268270156, 'RECALL': 0.063017789930724}
MIN RESULTS RISE: {'ACCURACY': 0.950381679389313, 'PRECISION': 0.9333333333333333, 'RECALL': 0.8076923076923077}
MAX RESULTS RISE: {'ACCURACY': 0.9885496183206107, 'PRECISION': 0.9661016949152542, 'RECALL': 0.9827586206896551}
training_data


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[OBS] = [pd.Series(interpolate_data(row, seconds, end_frame_rate)) for row in np.array(data)]


test_data


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[OBS] = [pd.Series(interpolate_data(row, seconds, end_frame_rate)) for row in np.array(data)]


RESULTS RISE: {'ACCURACY': 0.9717060045041093, 'PRECISION': 0.9501382097211352, 'RECALL': 0.9115553361599649}
VARIABILITY RESULTS RISE: {'ACCURACY': 0.009276091839835219, 'PRECISION': 0.03077741643009311, 'RECALL': 0.03645630039607492}
MIN RESULTS RISE: {'ACCURACY': 0.9540229885057471, 'PRECISION': 0.9074074074074074, 'RECALL': 0.8518518518518519}
MAX RESULTS RISE: {'ACCURACY': 0.9808429118773946, 'PRECISION': 0.9827586206896551, 'RECALL': 0.9607843137254902}
training_data


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[OBS] = [pd.Series(interpolate_data(row, seconds, end_frame_rate)) for row in np.array(data)]


test_data


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[OBS] = [pd.Series(interpolate_data(row, seconds, end_frame_rate)) for row in np.array(data)]


RESULTS RISE: {'ACCURACY': 0.7523237109180778, 'PRECISION': 0.1640464798359535, 'RECALL': 0.04428133994243684}
VARIABILITY RESULTS RISE: {'ACCURACY': 0.023549361795715817, 'PRECISION': 0.14333220548960846, 'RECALL': 0.039687780670179626}
MIN RESULTS RISE: {'ACCURACY': 0.7175572519083969, 'PRECISION': 0.0, 'RECALL': 0.0}
MAX RESULTS RISE: {'ACCURACY': 0.789272030651341, 'PRECISION': 0.42857142857142855, 'RECALL': 0.11320754716981132}
training_data


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[OBS] = [pd.Series(interpolate_data(row, seconds, end_frame_rate)) for row in np.array(data)]


test_data


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[OBS] = [pd.Series(interpolate_data(row, seconds, end_frame_rate)) for row in np.array(data)]


RESULTS RISE: {'ACCURACY': 0.7522944634552953, 'PRECISION': 0.1401315789473684, 'RECALL': 0.040936570168376096}
VARIABILITY RESULTS RISE: {'ACCURACY': 0.012150684437079264, 'PRECISION': 0.10305244341684881, 'RECALL': 0.0320301596968368}
MIN RESULTS RISE: {'ACCURACY': 0.732824427480916, 'PRECISION': 0.0, 'RECALL': 0.0}
MAX RESULTS RISE: {'ACCURACY': 0.7633587786259542, 'PRECISION': 0.2631578947368421, 'RECALL': 0.09433962264150944}
training_data


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[OBS] = [pd.Series(interpolate_data(row, seconds, end_frame_rate)) for row in np.array(data)]


test_data


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[OBS] = [pd.Series(interpolate_data(row, seconds, end_frame_rate)) for row in np.array(data)]


RESULTS RISE: {'ACCURACY': 0.7477494077388787, 'PRECISION': 0.1728254459678608, 'RECALL': 0.05167150716185305}
VARIABILITY RESULTS RISE: {'ACCURACY': 0.029924247647811045, 'PRECISION': 0.08782664116546428, 'RECALL': 0.018364590910849108}
MIN RESULTS RISE: {'ACCURACY': 0.6946564885496184, 'PRECISION': 0.07142857142857142, 'RECALL': 0.018867924528301886}
MAX RESULTS RISE: {'ACCURACY': 0.7777777777777778, 'PRECISION': 0.3333333333333333, 'RECALL': 0.07272727272727272}
training_data


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[OBS] = [pd.Series(interpolate_data(row, seconds, end_frame_rate)) for row in np.array(data)]


test_data


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[OBS] = [pd.Series(interpolate_data(row, seconds, end_frame_rate)) for row in np.array(data)]


RESULTS RISE: {'ACCURACY': 0.7553566728086338, 'PRECISION': 0.2111609907120743, 'RECALL': 0.061150072915519646}
VARIABILITY RESULTS RISE: {'ACCURACY': 0.01881398917477953, 'PRECISION': 0.07519719146301969, 'RECALL': 0.01989849068671152}
MIN RESULTS RISE: {'ACCURACY': 0.7366412213740458, 'PRECISION': 0.11764705882352941, 'RECALL': 0.043478260869565216}
MAX RESULTS RISE: {'ACCURACY': 0.7786259541984732, 'PRECISION': 0.3, 'RECALL': 0.08620689655172414}
training_data


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[OBS] = [pd.Series(interpolate_data(row, seconds, end_frame_rate)) for row in np.array(data)]


test_data


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[OBS] = [pd.Series(interpolate_data(row, seconds, end_frame_rate)) for row in np.array(data)]


RESULTS RISE: {'ACCURACY': 0.7523149366792431, 'PRECISION': 0.17409436109126514, 'RECALL': 0.0515026179069798}
VARIABILITY RESULTS RISE: {'ACCURACY': 0.012229047264023806, 'PRECISION': 0.04820667079625562, 'RECALL': 0.017998303166904327}
MIN RESULTS RISE: {'ACCURACY': 0.7366412213740458, 'PRECISION': 0.10526315789473684, 'RECALL': 0.03508771929824561}
MAX RESULTS RISE: {'ACCURACY': 0.7701149425287356, 'PRECISION': 0.23529411764705882, 'RECALL': 0.0784313725490196}
training_data


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[OBS] = [pd.Series(interpolate_data(row, seconds, end_frame_rate)) for row in np.array(data)]


test_data


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[OBS] = [pd.Series(interpolate_data(row, seconds, end_frame_rate)) for row in np.array(data)]
