# Notebook to capture results of ICA/NMF factor clustering experiments

In [None]:
import matplotlib.pyplot as plt
import sys
%matplotlib inline

In [None]:
sys.path += ['../Src']

In [None]:
import importlib
import factor_clustering
importlib.reload(factor_clustering)
from factor_clustering import FactorClustering
from factorizer_wrappers import NMF_Factorizer, ICA_Factorizer, PCA_Factorizer

## Perform multiple bootstrap repeat calculations of ICA, NMF and PCA and look at clustering

In [None]:
possible_datasets = {1: 'Mini_Expression', 2: 'AOCS_Protein_Expression', 3: 'TCGA_OV_VST'}

fc = FactorClustering(possible_datasets[3], n_repeats=50, method='bootstrap')
print("Starting analysis for", fc.basename)
print("Method:", fc.method)

fc.read_expression_matrix()

all_factos = [NMF_Factorizer, ICA_Factorizer, PCA_Factorizer]

In [None]:
start_nc, end_nc = 2, 14

In [None]:
if True:
    # Beware - this could take hours (for the full size dataset)!
    fc.compute_and_cache_multiple_factor_repeats(start_nc, end_nc, force=False)

In [None]:
if True:
    fc.plot_multiple_combined_factors_scatter(start_nc, end_nc)

In [None]:
if True:
    fc.plot_silhouette_scores(start_nc, end_nc, show=True)  
    

In [None]:
if True:
    fc.plot_multiple_single_factors_scatter(NMF_Factorizer, start_nc, end_nc)

In [None]:
if True:
    fc.plot_multiple_single_factors_scatter(ICA_Factorizer, start_nc, end_nc)

In [None]:
if True:
    fc.plot_multiple_single_factors_scatter(PCA_Factorizer, start_nc, end_nc)

In [None]:
if False:
    fc.investigate_multiple_cluster_statistics(start_nc, end_nc)