# Independent Component Analysis

In [11]:
import pandas as pd
from sklearn.decomposition import FastICA
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib widget
import common

In [12]:
data = common.loadFile("CleanedData").drop(["VISCODE", "RID"], axis=1).dropna()

In [13]:
plt.figure(figsize=[5, 5])
ax = sns.countplot(x=data['DX'])
plt.title("Data Diagnosis")
plt.ylabel("Count")
plt.xlabel("Diagnosis")
plt.xticks(ticks=range(0,3), labels=['CN', 'MCI', 'Dementia'])

for p in ax.patches:
        ax.annotate(f'\n{p.get_height()}', (p.get_x()+p.get_width()/2, p.get_height()), ha='center', 
                    va='top', color='white', size=18)
plt.tight_layout(pad=2)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [19]:
components = 10
# Creating input data numpy array
y =  data.loc[:,['DX']].values
X = data.drop(["DX"], axis=1)
######################################################################

# Creating ICA object
ICA = FastICA(n_components=components)
IndependentComponentValues = ICA.fit_transform(X, y)

def getColumnNames() -> list:
    res = []
    for i in range(1, len(IndependentComponentValues.T)+1):
        res.append('IC'+str(i))
    return res

# Creating the dataframe
ReducedData = pd.DataFrame(data=IndependentComponentValues, columns=getColumnNames())

# Rebuild dataset
finalDf = pd.concat([ReducedData, data[['DX']]], axis = 1)
common.saveFile(finalDf, "ICAData")


print('####### Final Independent Components ######')
print(finalDf.head(10))

####### Final Independent Components ######
        IC1       IC2       IC3       IC4       IC5       IC6       IC7  \
0  0.003896  0.009509  0.068373  0.001015  0.014387 -0.032329 -0.008541   
1  0.011935  0.032215  0.030978 -0.002612 -0.016612  0.008271 -0.018673   
2 -0.000419  0.079719  0.035922 -0.003865 -0.009403 -0.009437  0.015394   
3  0.017481 -0.008359 -0.011015  0.033456 -0.012221 -0.049776 -0.008250   
4  0.035295 -0.007454 -0.038462  0.037519 -0.028620 -0.005766 -0.008397   
5  0.018846  0.035706 -0.037079  0.031748 -0.023810 -0.026166  0.003620   
6 -0.010779 -0.012786  0.040223  0.030553 -0.015396 -0.001233 -0.014011   
7 -0.012468 -0.013219  0.047051  0.030473 -0.006953 -0.009467  0.002130   
8 -0.017845 -0.001989  0.029065  0.033506 -0.006397  0.013559  0.000429   
9 -0.015653 -0.005276 -0.019383  0.014782 -0.003637  0.003767  0.002169   

        IC8       IC9      IC10 DX  
0 -0.045205 -0.053294  0.004299  1  
1 -0.031332 -0.042118  0.014171  1  
2 -0.017890 -0.0156

In [26]:
showX = '2'
showY = '4'

fig = plt.figure(figsize=[18, 10])
ax = fig.add_subplot(1,1,1) 
ax.set_xlabel('Independent Component ' + showX, fontsize = 15)
ax.set_ylabel('Independent Component ' + showY, fontsize = 15)
ax.set_title('2 first ICA components', fontsize = 20)


targets = [0, 1, 2]
colors = ['r', 'y', 'b']
for target, color in zip(targets, colors):
    indicesToKeep = finalDf['DX'] == target
    ax.scatter(finalDf.loc[indicesToKeep, 'IC' + showX]
               , finalDf.loc[indicesToKeep, 'IC' + showY]
               , c = color
               , s = 50)
    
ax.legend(['CN', 'MCI', 'Dementia'])
ax.grid()
plt.tight_layout(pad=2)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [27]:
fig = plt.figure(figsize=[18, 10])
#ax = fig.add_subplot(1,1,1) 
ax = plt.axes(projection='3d')

ax.set_xlabel('IC1', fontsize=12)
ax.set_ylabel('IC2', fontsize=12)
ax.set_zlabel('IC3', fontsize=12)
ax.set_title('3 Component ICA', fontsize = 20)

targets = [0, 1, 2]
colors = ['r', 'y', 'b']
for target, color in zip(targets, colors):
    indicesToKeep = finalDf['DX'] == target
    ax.scatter(finalDf.loc[indicesToKeep, 'IC1']
               , finalDf.loc[indicesToKeep, 'IC2']
               , finalDf.loc[indicesToKeep, 'IC3']
               , c = color
               , s = 50)
    
ax.legend(['CN', 'MCI', 'Dementia'])
ax.grid()
plt.tight_layout(pad=2)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

## Linear Regression

In [7]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

In [8]:
# Use only one feature
X = finalDf.drop("DX", axis=1).to_numpy().astype('int')
y = finalDf.loc[:,['DX']].to_numpy().astype('int').flatten()

# Split the data into training/testing sets
X_train = X[:-20]
X_test = X[-20:]

# Split the targets into training/testing sets
y_train = y[:-20]
y_test = y[-20:]

# Create linear regression object
regr = LinearRegression()

# Train the model using the training sets
regr.fit(X_train, y_train)

# Make predictions using the testing set
y_pred = regr.predict(X_test)

# The coefficients
print('Coefficients: \n', regr.coef_)
# The mean squared error
print('Mean squared error: %.2f' % mean_squared_error(y_test, y_pred))
# The coefficient of determination: 1 is perfect prediction
print('Coefficient of determination: %.2f' % r2_score(y_test, y_pred))

Coefficients: 
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Mean squared error: 0.59
Coefficient of determination: -2.71


In [9]:
finalDf

Unnamed: 0,IC1,IC2,IC3,IC4,IC5,IC6,IC7,IC8,IC9,IC10,DX
0,-0.001517,-0.000184,0.005764,0.013507,0.031072,-0.046192,-0.068473,0.008327,0.053314,-0.009562,1
1,0.001678,-0.009921,0.013914,-0.017165,-0.009604,-0.031579,-0.031112,0.018391,0.042081,-0.032254,1
2,0.003084,-0.027655,0.002117,-0.010510,0.008409,-0.020276,-0.036198,-0.015679,0.015777,-0.079999,1
3,-0.033155,0.007712,0.017913,-0.012490,0.049501,-0.015673,0.011084,0.008163,0.008893,0.008308,1
4,-0.038341,-0.011120,0.037698,-0.029080,0.004639,-0.035694,0.038300,0.007884,0.010811,0.007391,1
...,...,...,...,...,...,...,...,...,...,...,...
1705,-0.020375,-0.038129,0.000536,0.029265,-0.012442,-0.002525,0.024200,0.002461,-0.001154,-0.009646,0
1706,-0.024556,0.033183,-0.030971,0.025985,-0.030162,-0.039693,0.014070,-0.029007,-0.025561,0.013891,0
1707,-0.033495,-0.006445,-0.007457,0.012655,0.054461,0.009355,0.044781,0.008689,-0.008491,0.018904,1
1708,-0.034899,0.015579,-0.013494,0.012844,0.046167,-0.009916,0.041496,0.011457,-0.002428,0.017595,0


##### 