# Independent Component Analysis

In [1]:
import pandas as pd
from sklearn.decomposition import FastICA
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib widget
import common

In [2]:
data = common.loadFile("CleanedData").drop(["VISCODE", "RID"], axis=1).dropna()

In [3]:
plt.figure(figsize=[5, 5])
ax = sns.countplot(x=data['DX'])
plt.title("Data Diagnosis")
plt.ylabel("Count")
plt.xlabel("Diagnosis")
plt.xticks(ticks=range(0,3), labels=['CN', 'MCI', 'Dementia'])

for p in ax.patches:
        ax.annotate(f'\n{p.get_height()}', (p.get_x()+p.get_width()/2, p.get_height()), ha='center', 
                    va='top', color='white', size=18)
plt.tight_layout(pad=2)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [4]:
components = 10
# Creating input data numpy array
y =  data.loc[:,['DX']].values
X = data.drop(["DX"], axis=1)
######################################################################

# Creating ICA object
ICA = FastICA(n_components=components,  random_state=0) # Without this randomstate the results will always be diferent.
IndependentComponentValues = ICA.fit_transform(X, y)

def getColumnNames() -> list:
    res = []
    for i in range(1, len(IndependentComponentValues.T)+1):
        res.append('IC'+str(i))
    return res

# Creating the dataframe
ReducedData = pd.DataFrame(data=IndependentComponentValues, columns=getColumnNames())

# Rebuild dataset
finalDf = pd.concat([ReducedData, data[['DX']]], axis = 1)
common.saveFile(finalDf, "ICAData")


print('####### Final Independent Components ######')
print(finalDf.head(10))

####### Final Independent Components ######
        IC1       IC2       IC3       IC4       IC5       IC6       IC7  \
0 -0.004563  0.053338 -0.013619 -0.001153 -0.068614 -0.031754  0.009565   
1 -0.012612  0.042117  0.017003  0.002385 -0.030882  0.008735  0.032264   
2 -0.000506  0.015700  0.009852  0.003665 -0.036022 -0.009071  0.079807   
3 -0.017587  0.008918  0.012300 -0.033458  0.010938 -0.049676 -0.008352   
4 -0.036075  0.010869  0.028527 -0.037763  0.038536 -0.005521 -0.007436   
5 -0.019023 -0.008061  0.023614 -0.031780  0.037117 -0.026246  0.035718   
6  0.011592 -0.011906  0.015478 -0.030452 -0.039946 -0.001162 -0.012807   
7  0.012926 -0.012467  0.007168 -0.030418 -0.046938 -0.009342 -0.013213   
8  0.017973 -0.017900  0.006453 -0.033524 -0.028918  0.013656 -0.001974   
9  0.015123 -0.012234  0.003524 -0.014875  0.019377  0.003846 -0.005250   

        IC8       IC9      IC10 DX  
0 -0.045461 -0.008527 -0.003025  1  
1 -0.031358 -0.018602 -0.012969  1  
2 -0.018471  0.0154

In [5]:
showX = '2'
showY = '4'

fig = plt.figure(figsize=[18, 10])
ax = fig.add_subplot(1,1,1) 
ax.set_xlabel('Independent Component ' + showX, fontsize = 15)
ax.set_ylabel('Independent Component ' + showY, fontsize = 15)
ax.set_title('2 first ICA components', fontsize = 20)


targets = [0, 1, 2]
colors = ['r', 'y', 'b']
for target, color in zip(targets, colors):
    indicesToKeep = finalDf['DX'] == target
    ax.scatter(finalDf.loc[indicesToKeep, 'IC' + showX]
               , finalDf.loc[indicesToKeep, 'IC' + showY]
               , c = color
               , s = 50)
    
ax.legend(['CN', 'MCI', 'Dementia'])
ax.grid()
plt.tight_layout(pad=2)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [6]:
fig = plt.figure(figsize=[18, 10])
#ax = fig.add_subplot(1,1,1) 
ax = plt.axes(projection='3d')

ax.set_xlabel('IC1', fontsize=12)
ax.set_ylabel('IC2', fontsize=12)
ax.set_zlabel('IC3', fontsize=12)
ax.set_title('3 Component ICA', fontsize = 20)

targets = [0, 1, 2]
colors = ['r', 'y', 'b']
for target, color in zip(targets, colors):
    indicesToKeep = finalDf['DX'] == target
    ax.scatter(finalDf.loc[indicesToKeep, 'IC1']
               , finalDf.loc[indicesToKeep, 'IC2']
               , finalDf.loc[indicesToKeep, 'IC3']
               , c = color
               , s = 50)
    
ax.legend(['CN', 'MCI', 'Dementia'])
ax.grid()
plt.tight_layout(pad=2)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

## Linear Regression

In [7]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

In [8]:
# Use only one feature
X = finalDf.drop("DX", axis=1).to_numpy().astype('int')
y = finalDf.loc[:,['DX']].to_numpy().astype('int').flatten()

# Split the data into training/testing sets
X_train = X[:-20]
X_test = X[-20:]

# Split the targets into training/testing sets
y_train = y[:-20]
y_test = y[-20:]

# Create linear regression object
regr = LinearRegression()

# Train the model using the training sets
regr.fit(X_train, y_train)

# Make predictions using the testing set
y_pred = regr.predict(X_test)

# The coefficients
print('Coefficients: \n', regr.coef_)
# The mean squared error
print('Mean squared error: %.2f' % mean_squared_error(y_test, y_pred))
# The coefficient of determination: 1 is perfect prediction
print('Coefficient of determination: %.2f' % r2_score(y_test, y_pred))

Coefficients: 
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Mean squared error: 0.59
Coefficient of determination: -2.71


In [9]:
finalDf

Unnamed: 0,IC1,IC2,IC3,IC4,IC5,IC6,IC7,IC8,IC9,IC10,DX
0,-0.004563,0.053338,-0.013619,-0.001153,-0.068614,-0.031754,0.009565,-0.045461,-0.008527,-0.003025,1
1,-0.012612,0.042117,0.017003,0.002385,-0.030882,0.008735,0.032264,-0.031358,-0.018602,-0.012969,1
2,-0.000506,0.015700,0.009852,0.003665,-0.036022,-0.009071,0.079807,-0.018471,0.015439,-0.029835,1
3,-0.017587,0.008918,0.012300,-0.033458,0.010938,-0.049676,-0.008352,-0.015894,-0.008206,0.005704,1
4,-0.036075,0.010869,0.028527,-0.037763,0.038536,-0.005521,-0.007436,-0.036291,-0.008239,-0.015538,1
...,...,...,...,...,...,...,...,...,...,...,...
1705,0.000996,-0.001272,-0.030139,-0.019873,0.024213,0.012539,0.009418,-0.000670,-0.002843,-0.037766,0
1706,0.030562,-0.025447,-0.025239,-0.024493,0.013733,0.029970,-0.013629,-0.041659,0.029084,0.032206,0
1707,0.007662,-0.008601,-0.013246,-0.034106,0.044446,-0.053993,-0.019101,0.010674,-0.008716,-0.005121,1
1708,0.013372,-0.002461,-0.012936,-0.035526,0.041069,-0.045988,-0.017614,-0.009831,-0.011393,0.015922,0


##### 