In [1]:
import os
import pickle
from sklearn.decomposition import PCA
from datetime import datetime

In [2]:
data_dir = '../data/'
input_data_dir = data_dir + 'data_raw_pickled/'
output_data_dir = data_dir + 'pca_models_pickled/'

In [3]:
timeseries_files = list(filter(lambda path: '_CellRespZ.pickle' in path if True else False,
       os.listdir(path = input_data_dir)))

In [4]:
def open_file(file_path):
    with open(input_data_dir + file_path, "rb") as input_file:
        return pickle.load(input_file)

In [5]:
def create_model(n_components, data):
    pca_model = PCA(n_components=n_components, random_state=10)
    print('Beginning fit: ',datetime.now().time())
    pca_model.fit(data)
    print('Fitted: ',datetime.now().time())
    print('Variance Explained: ', pca_model.explained_variance_ratio_.sum())
    return pca_model

In [6]:
def pickle_model(model, data_label, n_components):
    with open(output_data_dir + data_label + '_PCA_model_num_comp_' + str(n_components) + '.pickle', 'wb') as pickle_file:
        pickle.dump(model, pickle_file, protocol=pickle.HIGHEST_PROTOCOL)

In [7]:
def create_transformed_model(model, data, n_components, data_label):
    model = model.transform(data)
    print('Transformed Model:', model.shape)
    pickle_model(model, data_label, str(n_components) + '_transformed')

In [8]:
def create_pickled_models(n_components):
    for file in timeseries_files:
        data_label = file.split('_CellRespZ')[0]
        print('Beginning fit for', str(data_label))
        data = open_file(file)
        n_components = min(min(data.shape), n_components)
        model = create_model(n_components, data)
        pickle_model(model, data_label, n_components)
        create_transformed_model(model, data, n_components, data_label)

In [9]:
n_component_array = [10, 100]
for n_components in n_component_array:
    create_pickled_models(n_components)

Beginning fit for subject_10
Beginning fit:  18:57:20.904807
Fitted:  18:57:47.322654
Variance Explained:  0.2277375963375185
Transformed Model: (4426, 10)
Beginning fit for subject_10_stimulus_0
Beginning fit:  18:57:54.656683
Fitted:  18:57:55.605206
Variance Explained:  0.28340653
Transformed Model: (204, 10)
Beginning fit for subject_10_stimulus_10
Beginning fit:  18:57:56.037857
Fitted:  18:57:56.761782
Variance Explained:  0.48495638
Transformed Model: (220, 10)
Beginning fit for subject_10_stimulus_11
Beginning fit:  18:57:57.199384
Fitted:  18:57:57.947874
Variance Explained:  0.39229643
Transformed Model: (220, 10)
Beginning fit for subject_10_stimulus_12
Beginning fit:  18:57:58.438680
Fitted:  18:57:59.946189
Variance Explained:  0.31091976
Transformed Model: (220, 10)
Beginning fit for subject_10_stimulus_13
Beginning fit:  18:58:00.528738
Fitted:  18:58:01.479766
Variance Explained:  0.30149522
Transformed Model: (202, 10)
Beginning fit for subject_10_stimulus_14
Beginning

Fitted:  19:00:34.516537
Variance Explained:  0.5319177
Transformed Model: (30, 10)
Beginning fit for subject_15
Beginning fit:  19:00:38.200276
Fitted:  19:00:51.713218
Variance Explained:  0.25467824050607457
Transformed Model: (4880, 10)
Beginning fit for subject_15_stimulus_0
Beginning fit:  19:00:55.636148
Fitted:  19:00:56.232735
Variance Explained:  0.25388137
Transformed Model: (330, 10)
Beginning fit for subject_15_stimulus_10
Beginning fit:  19:00:56.594196
Fitted:  19:00:57.018324
Variance Explained:  0.5363014
Transformed Model: (240, 10)
Beginning fit for subject_15_stimulus_11
Beginning fit:  19:00:57.307091
Fitted:  19:00:57.751273
Variance Explained:  0.2690565
Transformed Model: (240, 10)
Beginning fit for subject_15_stimulus_12
Beginning fit:  19:00:58.040041
Fitted:  19:00:58.482722
Variance Explained:  0.24152443
Transformed Model: (240, 10)
Beginning fit for subject_15_stimulus_14
Beginning fit:  19:00:58.650666
Fitted:  19:00:58.816607
Variance Explained:  0.57656

Transformed Model: (720, 10)
Beginning fit for subject_4_stimulus_3
Beginning fit:  19:04:51.199955
Fitted:  19:04:53.570758
Variance Explained:  0.13217765
Transformed Model: (720, 10)
Beginning fit for subject_5
Beginning fit:  19:04:58.799998
Fitted:  19:05:18.872556
Variance Explained:  0.1786630171992071
Transformed Model: (2880, 10)
Beginning fit for subject_5_stimulus_0
Beginning fit:  19:05:24.842147
Fitted:  19:05:26.952259
Variance Explained:  0.22013062
Transformed Model: (720, 10)
Beginning fit for subject_5_stimulus_1
Beginning fit:  19:05:29.091448
Fitted:  19:05:31.248684
Variance Explained:  0.16767932
Transformed Model: (720, 10)
Beginning fit for subject_5_stimulus_2
Beginning fit:  19:05:32.652667
Fitted:  19:05:34.437414
Variance Explained:  0.17475393
Transformed Model: (720, 10)
Beginning fit for subject_5_stimulus_3
Beginning fit:  19:05:36.491877
Fitted:  19:05:38.538333
Variance Explained:  0.2103066
Transformed Model: (720, 10)
Beginning fit for subject_6
Begi

Beginning fit:  19:10:14.881724
Fitted:  19:10:21.260185
Variance Explained:  0.34350383
Transformed Model: (2090, 25)
Beginning fit for subject_13_stimulus_4
Beginning fit:  19:10:23.379721
Fitted:  19:10:27.916285
Variance Explained:  0.25559977
Transformed Model: (1200, 25)
Beginning fit for subject_13_stimulus_9
Beginning fit:  19:10:28.781585
Fitted:  19:10:29.205713
Variance Explained:  0.6284918
Transformed Model: (50, 25)
Beginning fit for subject_14
Beginning fit:  19:10:33.428932
Fitted:  19:10:50.742471
Variance Explained:  0.35559396782534736
Transformed Model: (3890, 25)
Beginning fit for subject_14_stimulus_0
Beginning fit:  19:10:53.959286
Fitted:  19:10:54.664661
Variance Explained:  0.6200943
Transformed Model: (175, 25)
Beginning fit for subject_14_stimulus_10
Beginning fit:  19:10:54.863190
Fitted:  19:10:54.994538
Variance Explained:  0.93899465
Transformed Model: (30, 25)
Beginning fit for subject_14_stimulus_11
Beginning fit:  19:10:55.111851
Fitted:  19:10:55.351

Beginning fit:  19:13:56.243420
Fitted:  19:13:58.959157
Variance Explained:  0.32190582
Transformed Model: (720, 25)
Beginning fit for subject_2
Beginning fit:  19:14:03.873290
Fitted:  19:14:21.706212
Variance Explained:  0.22231098493718127
Transformed Model: (3520, 25)
Beginning fit for subject_2_stimulus_0
Beginning fit:  19:14:26.617067
Fitted:  19:14:30.179047
Variance Explained:  0.24937408
Transformed Model: (880, 25)
Beginning fit for subject_2_stimulus_1
Beginning fit:  19:14:31.729542
Fitted:  19:14:34.560575
Variance Explained:  0.2446628
Transformed Model: (880, 25)
Beginning fit for subject_2_stimulus_2
Beginning fit:  19:14:36.529313
Fitted:  19:14:39.926344
Variance Explained:  0.22196332
Transformed Model: (880, 25)
Beginning fit for subject_2_stimulus_3
Beginning fit:  19:14:41.366052
Fitted:  19:14:44.647277
Variance Explained:  0.29993802
Transformed Model: (880, 25)
Beginning fit for subject_3
Beginning fit:  19:14:48.487468
Fitted:  19:15:01.713209
Variance Expla