In [None]:
from sklearn.decomposition import PCA
from sklearn.decomposition import NMF
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.base import BaseEstimator
from sklearn.utils.validation import check_is_fitted
import matplotlib.pyplot as plt
import numpy as np

In [None]:

class BundleReducer(BaseEstimator):
    def __init__(self, 
                 reduction_type,
                 ndimensions):
        self.reduction_type = reduction_type # case-insensitive
        self.ndimensions = ndimensions
        #check_is_fitted = False
        #self.model = self.fit(X)
        
    def print_ndimensions(self):
        print("The number of dimensions is", ndimensions)
        
    def impute(self, X):
        imputer = SimpleImputer()
        self.data_imp_ = imputer.fit_transform(X)
    
    def fit(self, X):
        clf = self.reduction_type.lower();
        
        if clf == "nmf":
            self.clf_ = NMF(n_components=self.ndimensions, init='random', random_state=0)
        elif clf == "pca" :
            self.clf_ = PCA(n_components=self.ndimensions)
            
        self.model_ =  self.clf_.fit_transform(self.data_imp_)
        self.components_ = self.clf_.components_
       
        return self

            
    def reconstruct(self): 
        #[x, H] = model;
        check_is_fitted(self, 'components_')
        # self.recon_ = self.model_@self.components_;
        self.recon_ = self.clf_.inverse_transform(self.model_)
#         return reconstruction
    
    def plot_comparison(self):
        fig, ax = plt.subplots();
#         recon = self.model_@self.components_;
        mean = np.mean(self.recon_, axis = 0);
        data_mean = np.mean(self.data_imp_, axis = 0);
        ax.plot(mean);
        ax.plot(data_mean)
        
    def reconstruction_error(self): 

        loss = np.zeros(len(self.data_imp_));
        for i in range(len(self.data_imp_)):
#             loss[i] = np.sqrt(np.mean(((self.recon_[i, :])-self.data_imp_[i, :])**2))
             loss[i] = np.sqrt(np.mean(((self.recon_[i, :])-self.data_imp_[i, :])**2))
        return loss
        # XXX 
        

In [None]:
import afqinsight.datasets as ad
ad.__file__

In [None]:
data = ad.load_afq_data(fn_nodes= "combined_tract_profiles.csv", fn_subjects="participant_data.tsv", 
                        unsupervised=True,return_bundle_means=False)

In [None]:
dki_fa = data.X[:, 0:1800]
dki_md = data.X[:, 1801:3600]
# dti_fa = data.X[:, 0:1800]
# dki_md = data.X[:, 1801:3600]

## NMF with 2 dimensions (dki_fa)

In [None]:
br1 = BundleReducer("NMF", 15)

In [None]:
br1.impute(dki_fa)


In [None]:
br1.fit(dki_fa)

In [None]:
rec_dki_fa = br1.reconstruct()

In [None]:
plt.plot(br1.data_imp_[1, :])
plt.plot(br1.recon_[1, :])

In [None]:
np.sqrt(np.mean((br1.data_imp_[1, :]-br1.recon_[1, :])**2)) # 2

In [None]:
np.sqrt(np.mean((br1.data_imp_[1, :]-br1.recon_[1, :])**2)) # 5

In [None]:
np.sqrt(np.mean((br1.data_imp_[1, :]-br1.recon_[1, :])**2)) # 15

In [None]:
br1.components_

In [None]:
br1.plot_comparison()

In [None]:
error_br1 = br1.reconstruction_error()
error_br1.shape

## NMF with 3 dimensions (dki_fa)

In [None]:
br3 = BundleReducer("nmf", 3)
br3.impute(dki_fa)
br3.fit(dki_fa)

In [None]:
rec_dki_fa = br3.reconstruct()
br3.plot_comparison()

In [None]:
error_br3 = br3.reconstruction_error()
np.mean(error_br3)

## NMF with 3 dimensions (dki_fa)

In [None]:
br5 = BundleReducer("nmf", 5)
br5.impute(dki_fa)
br5.fit(dki_fa)

In [None]:
rec_dki_fa = br5.reconstruct()
br5.plot_comparison()

## PCA with 2 dimensions (dki_fa)

In [None]:
br2 = BundleReducer("pca", 2)
br2.impute(dki_fa)
br2.fit(dki_fa)
rec_dki_fa = br2.reconstruct()

In [None]:
br2.plot_comparison()

In [None]:
error_br2 = br2.reconstruction_error()
np.mean(error_br2)

## PCA with 3 dimensions (dki_fa)

In [None]:
br4 = BundleReducer("pca", 3)
br4.impute(dki_fa)
br4.fit(dki_fa)
rec_dki_fa = br4.reconstruct()

In [None]:
br4.plot_comparison()

In [None]:
error_br4 = br4.reconstruction_error()
np.mean(error_br4)