# Create PCA Models:
This code creates PCA models from which components and loadings can be extracted.

In [1]:
from sklearn.decomposition import PCA
import pandas as pd
from util_functions import load_file, pickle_file

### Notebook Parameters

In [2]:
input_data_dir = 'data_raw_pickled/'
output_data_dir = 'pca_models_pickled/'
data_raw_pickled_df = load_file('data_meta_pickled/data_raw_pickled_df.pickle')
n_components = 10
model_df = pd.DataFrame(columns=['subject', 'stimulus', 'model_path', 'orig_data_path', 'n_components', 'sum_explained_variance'])

### Notebook Functions

In [3]:
def create_model(n_components, data):
    pca_model = PCA(n_components=n_components, random_state=10)
    pca_model.fit(data)
    return pca_model, pca_model.explained_variance_ratio_.sum()

### Main Code Execution

In [4]:
# n_component_array = [10]
# for n_components in n_component_array:
for row in data_raw_pickled_df.itertuples():
    data_file = load_file(row.rel_path)
    n_components = min(min(data_file.shape), n_components)
    model, sum_explained_variance = create_model(n_components, data_file)
    stimulus_string = '_stimulus_' + row.stimulus if row.stimulus != None else ''
    save_path = output_data_dir + row.subject + stimulus_string + '_PCA_model_num_comp_' + str(n_components) + '.pickle'
    pickle_file(save_path, model)
    model_df.loc[row.subject + stimulus_string] = [row.subject, row.stimulus, save_path, row.rel_path, n_components, sum_explained_variance]
pickle_file('data_meta_pickled/pca_models_pickled_df.pickle', model_df)