In [None]:
from sklearn.decomposition import PCA
from sklearn.decomposition import NMF
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.base import BaseEstimator
from sklearn.utils.validation import check_is_fitted
import matplotlib.pyplot as plt
import numpy as np

In [None]:

class BundleReducer(BaseEstimator):
    def __init__(self, 
                 reduction_type,
                 ndimensions):
        self.reduction_type = reduction_type # case-insensitive
        self.ndimensions = ndimensions
        #check_is_fitted = False
        #self.model = self.fit(X)
        
    def print_ndimensions(self):
        print("The number of dimensions is", ndimensions)
        
    def impute(self, X):
        imputer = SimpleImputer()
        self.data_imp_ = imputer.fit_transform(X)
    
    def fit(self, X):
        clf = self.reduction_type.lower();
        
        if clf == "nmf":
            self.clf_ = NMF(n_components=self.ndimensions, init='random', random_state=0)
        elif clf == "pca" :
            self.clf_ = PCA(n_components=self.ndimensions)
            
        self.model_ =  self.clf_.fit_transform(self.data_imp_)
        self.components_ = self.clf_.components_
       
        return self

            
    def reconstruct(self): 
        #[x, H] = model;
        check_is_fitted(self, 'components_')
        # self.recon_ = self.model_@self.components_;
        self.recon_ = self.clf_.inverse_transform(self.model_)
#         return reconstruction
    
    def plot_comparison(self):
        fig, ax = plt.subplots();
#         recon = self.model_@self.components_;
        mean = np.mean(self.recon_, axis = 0);
        data_mean = np.mean(self.data_imp_, axis = 0);
        ax.plot(mean);
        ax.plot(data_mean)
        
    def reconstruction_error(self): 

        loss = np.zeros(len(self.data_imp_));
        for i in range(len(self.data_imp_)):
#             loss[i] = np.sqrt(np.mean(((self.recon_[i, :])-self.data_imp_[i, :])**2))
             loss[i] = np.sqrt(np.mean(((self.recon_[i, :])-self.data_imp_[i, :])**2))
        return loss
        # XXX 
        

In [None]:
import afqinsight.datasets as ad
ad.__file__

In [None]:
data = ad.load_afq_data(fn_nodes= "combined_tract_profiles.csv", fn_subjects="participant_data.tsv", 
                        unsupervised=True,return_bundle_means=False)

In [None]:
dki_fa = data.X[:, 0:1800]
dki_md = data.X[:, 1801:3600]
# dti_fa = data.X[:, 0:1800]
# dki_md = data.X[:, 1801:3600]

## NMF with 2 dimensions (dki_fa)

In [None]:
br1 = BundleReducer("NMF", 15)

In [None]:
br1.impute(dki_fa)


In [None]:
br1.fit(dki_fa)

In [None]:
rec_dki_fa_1 = br1.reconstruct()

### analysis for each individual participant

In [None]:
plt.plot(br1.data_imp_[1, :])
plt.plot(br1.recon_[1, :])

In [None]:
np.sqrt(np.mean((br1.data_imp_[1, :]-br1.recon_[1, :])**2)) # 2

In [None]:
np.sqrt(np.mean((br1.data_imp_[1, :]-br1.recon_[1, :])**2)) # 5

In [None]:
np.sqrt(np.mean((br1.data_imp_[1, :]-br1.recon_[1, :])**2)) # 15

In [None]:
br1.components_

In [None]:
br1.plot_comparison()

In [None]:
error_br1 = br1.reconstruction_error()
error_br1.shape

## NMF with 3 dimensions (dki_fa)

In [None]:
br3 = BundleReducer("nmf", 3)
br3.impute(dki_fa)
br3.fit(dki_fa)

In [None]:
rec_dki_fa_3 = br3.reconstruct()
br3.plot_comparison()

In [None]:
error_br3 = br3.reconstruction_error()
np.mean(error_br3)

## NMF with 5 dimensions (dki_fa)

In [None]:
br5 = BundleReducer("nmf", 5)
br5.impute(dki_fa)
br5.fit(dki_fa)

In [None]:
rec_dki_fa_5 = br5.reconstruct()
br5.plot_comparison()

In [None]:
num_dim = [6, 8, 10]
br = np.zeros(3)
rec = np.zeros(3)
for i in num_dim:
    ind = index(i);
    br[ind] = BundleReducer("nmf", i)
    br[ind].impute(dki_fa)
    br[ind].fit(dki_fa)
    rec[ind] = br[ind].reconstruct()


In [None]:
def diff_bundles(data_imp, data_recon, sample):
    
    diff_bundle = np.zeros(18)
    for i in range(18):
        ind = 100*i;
        ind1 = ind +100;
        diff_bundle[i] = np.sqrt(np.mean((data_imp[sample, ind:ind1]-data_recon[sample, ind:ind1])**2))
        
    diff = np.zeros([641, 18])
    
    return diff_bundle

In [None]:
diff_2 = np.zeros([641, 18])
for i in range(641):
    diff_2[i, :] = diff_bundles(br1.data_imp_, br1.recon_, i)
diff_2 = diff_2.mean(axis = 0)
diff_3 = np.zeros([641, 18])
for i in range(641):
    diff_3[i, :] = diff_bundles(br3.data_imp_, br3.recon_, i)
diff_3 = diff_3.mean(axis = 0)
diff_5 = np.zeros([641, 18])
for i in range(641):
    diff_5[i, :] = diff_bundles(br5.data_imp_, br5.recon_, i)
diff_5 = diff_5.mean(axis = 0)
diff_6 = np.zeros([641, 18])
for i in range(641):
    diff_6[i, :] = diff_bundles(br6.data_imp_, br6.recon_, i)
diff_6 = diff_6.mean(axis = 0)
diff_8 = np.zeros([641, 18])
for i in range(641):
    diff_8[i, :] = diff_bundles(br8.data_imp_, br8.recon_, i)
diff_8 = diff_8.mean(axis = 0)
diff_10 = np.zeros([641, 18])
for i in range(641):
    diff_10[i, :] = diff_bundles(br10.data_imp_, br10.recon_, i)
diff_10 = diff_10.mean(axis = 0)

In [None]:
diff_2

In [None]:
diff = np.reshape(np.array([diff_2, diff_3, diff_5, diff_6, diff_8, diff_10]), (6, 18));

In [None]:
diff.shape[0]

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

import numpy as np

# data = np.random.randn(100)

# scaled_data = np.zeros((10, data.shape[0]))
# for ii in range(1, 11): 
#     scaled_data[ii-1] = data / ii

# scaled_data.shape

p = sns.color_palette("rocket", n_colors=6)
ind = [2, 3, 5, 6, 8, 10]
for dd in range(diff.shape[0]):
    
    plt.plot(diff[dd], color=p[dd], label = ind[dd])
    plt.legend()

In [None]:
scaled_data.shape

In [None]:
# take average for coefficient for ddiferent components

## PCA with 2 dimensions (dki_fa)

In [None]:
br2 = BundleReducer("pca", 2)
br2.impute(dki_fa)
br2.fit(dki_fa)
rec_dki_fa = br2.reconstruct()

In [None]:
br2.plot_comparison()

In [None]:
error_br2 = br2.reconstruction_error()
np.mean(error_br2)

## PCA with 3 dimensions (dki_fa)

In [None]:
br4 = BundleReducer("pca", 3)
br4.impute(dki_fa)
br4.fit(dki_fa)
rec_dki_fa = br4.reconstruct()

In [None]:
br4.plot_comparison()

In [None]:
error_br4 = br4.reconstruction_error()
np.mean(error_br4)

In [None]:
plt.plot(br4.data_imp_[1, :])
plt.plot(br4.recon_[1, :])

In [None]:
np.sqrt(np.mean((br4.data_imp_[1, :]-br4.recon_[1, :])**2)) # 3

In [None]:
np.sqrt(np.mean((br4.data_imp_[1, :100]-br4.recon_[1, :100])**2))

In [None]:
np.sqrt(np.mean((br4.data_imp_[1, 100:200]-br4.recon_[1, 100:200])**2))

In [None]:
np.sqrt(np.mean((br4.data_imp_[1, 1000:1100]-br4.recon_[1, 1000:1100])**2))

In [None]:
def diff_bundles(data_imp, data_recon, sample):
    
    diff_bundle = np.zeros(18)
    for i in range(18):
        ind = 100*i;
        ind1 = ind +100;
        diff_bundle[i] = np.sqrt(np.mean((data_imp[sample, ind:ind1]-data_recon[sample, ind:ind1])**2))
    return diff_bundle

In [None]:
diff = np.zeros([641, 18])
for i in range(641):
    diff[i, :] = diff_bundles(br4.data_imp_, br4.recon_, i)
diff = diff.mean(axis = 0)
plt.plot(diff.T)

In [None]:
diff = np.zeros([641, 18])
for i in range(641):
    diff[i, :] = diff_bundles(br1.data_imp_, br1.recon_, i)
diff = diff.mean(axis = 0)
plt.plot(diff.T)

seaborn continuous color pallete for different dimension lone
different n dimeniosn in one graph


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

import numpy as np

data = np.random.randn(100)

scaled_data = np.zeros((10, data.shape[0]))
for ii in range(1, 11): 
    scaled_data[ii-1] = data / ii

scaled_data.shape

p = sns.color_palette("rocket", n_colors=10)

for dd in range(scaled_data.shape[0]):
    plt.plot(scaled_data[dd], color=p[dd])