# Independent Component Analysis

In [1]:
import pandas as pd
from sklearn.decomposition import FastICA
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib widget
import common

In [2]:
data = common.loadFile("CleanedData").drop(["VISCODE", "RID"], axis=1).dropna()

In [3]:
plt.close()
ax = sns.countplot(x=data['DX'])
plt.title("Data Diagnosis")
plt.ylabel("Count")
plt.xlabel("Diagnosis")
plt.xticks(ticks=range(0,3), labels=['CN', 'MCI', 'Dementia'])

for p in ax.patches:
        ax.annotate(f'\n{p.get_height()}', (p.get_x()+p.get_width()/2, p.get_height()), ha='center', 
                    va='top', color='white', size=18)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [19]:
components = 10
#Creating input data numpy array
y =  data.loc[:,['DX']].values
X = data.drop(["DX"], axis=1)
######################################################################

# Creating ICA object
ICA = FastICA(n_components=components)
IndependentComponentValues=ICA.fit_transform(X, y)

def getColumnNames() -> list:
    res = []
    for i in range(1, len(IndependentComponentValues.T)+1):
        res.append('IC'+str(i))
    return res

#Creating the dataframe
ReducedData=pd.DataFrame(data=IndependentComponentValues, columns=getColumnNames())

# Rebuild dataset
finalDf = pd.concat([ReducedData, data[['DX']]], axis = 1)
common.saveFile(finalDf, "ICAData")


print('####### Final Independent Components ######')
print(finalDf.head(10))

####### Final Independent Components ######
        IC1       IC2       IC3       IC4       IC5       IC6       IC7  \
0  0.002148  0.001514 -0.009197 -0.052422  0.051852  0.059541 -0.042424   
1 -0.010909 -0.002618 -0.018115 -0.035791  0.042050 -0.001904 -0.032099   
2 -0.012513 -0.003721  0.014995 -0.035659  0.015933  0.013026 -0.018348   
3 -0.028446  0.033129 -0.010626  0.026555  0.008829  0.037045 -0.014265   
4 -0.027854  0.037024 -0.008984  0.035220  0.011645 -0.014427 -0.037835   
5 -0.030595  0.031219  0.001841  0.039846 -0.007060 -0.000828  0.003003   
6 -0.014394  0.030236 -0.013691 -0.037733 -0.012467  0.009709  0.006991   
7 -0.010989  0.030377  0.002167 -0.040593 -0.013198  0.022753  0.000814   
8 -0.002634  0.033484  0.001377 -0.032645 -0.018123 -0.004568 -0.006281   
9 -0.003641  0.014776  0.002196  0.015580 -0.011769 -0.011224 -0.024799   

        IC8       IC9      IC10 DX  
0 -0.002724 -0.011224  0.001498  1  
1 -0.014993 -0.030588  0.009929  1  
2 -0.001855 -0.0799

In [5]:
showX = '1'
showY = '3'

plt.close()
fig = plt.figure(figsize = (8,8))
ax = fig.add_subplot(1,1,1) 
ax.set_xlabel('IC' + showX, fontsize = 15)
ax.set_ylabel('IC' + showY, fontsize = 15)
ax.set_title('2 Component ICA', fontsize = 20)


targets = [0, 1, 2]
colors = ['r', 'y', 'b']
for target, color in zip(targets, colors):
    indicesToKeep = finalDf['DX'] == target
    ax.scatter(finalDf.loc[indicesToKeep, 'IC' + showX]
               , finalDf.loc[indicesToKeep, 'IC' + showY]
               , c = color
               , s = 50)
    
ax.legend(['CN', 'MCI', 'Dementia'])
ax.grid()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [6]:
plt.close()
fig = plt.figure()
#ax = fig.add_subplot(1,1,1) 
ax = plt.axes(projection='3d')

ax.set_xlabel('IC1', fontsize=12)
ax.set_ylabel('IC2', fontsize=12)
ax.set_zlabel('IC3', fontsize=12)
ax.set_title('3 Component ICA', fontsize = 20)

targets = [0, 1, 2]
colors = ['r', 'y', 'b']
for target, color in zip(targets, colors):
    indicesToKeep = finalDf['DX'] == target
    ax.scatter(finalDf.loc[indicesToKeep, 'IC1']
               , finalDf.loc[indicesToKeep, 'IC2']
               , finalDf.loc[indicesToKeep, 'IC3']
               , c = color
               , s = 50)
    
ax.legend(['CN', 'MCI', 'Dementia'])
ax.grid()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

## Linear Regression

In [7]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

In [8]:
# Use only one feature
X = finalDf.drop("DX", axis=1).to_numpy().astype('int')
y = finalDf.loc[:,['DX']].to_numpy().astype('int').flatten()

# Split the data into training/testing sets
X_train = X[:-20]
X_test = X[-20:]

# Split the targets into training/testing sets
y_train = y[:-20]
y_test = y[-20:]

# Create linear regression object
regr = LinearRegression()

# Train the model using the training sets
regr.fit(X_train, y_train)

# Make predictions using the testing set
y_pred = regr.predict(X_test)

# The coefficients
print('Coefficients: \n', regr.coef_)
# The mean squared error
print('Mean squared error: %.2f' % mean_squared_error(y_test, y_pred))
# The coefficient of determination: 1 is perfect prediction
print('Coefficient of determination: %.2f' % r2_score(y_test, y_pred))

Coefficients: 
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Mean squared error: 0.59
Coefficient of determination: -2.71


In [9]:
finalDf

Unnamed: 0,IC1,IC2,IC3,IC4,IC5,IC6,IC7,IC8,IC9,IC10,DX
0,0.014553,-0.004429,0.045026,0.068422,-0.001004,0.032437,0.008602,0.009490,0.003641,0.053274,1
1,-0.016564,-0.014172,0.031353,0.031022,0.002688,-0.008123,0.018743,0.032198,0.011876,0.042112,1
2,-0.009327,-0.030703,0.017692,0.035978,0.003913,0.009512,-0.015304,0.079704,-0.000230,0.015684,1
3,-0.012137,0.004872,0.015934,-0.010955,-0.033552,0.049813,0.008256,-0.008399,0.017282,0.008932,1
4,-0.028609,-0.017147,0.036707,-0.038376,-0.037493,0.005912,0.008467,-0.007504,0.035162,0.010905,1
...,...,...,...,...,...,...,...,...,...,...,...
1705,0.030035,-0.037635,-0.000146,-0.024527,-0.019696,-0.012824,0.002960,0.009348,-0.001585,-0.001329,0
1706,0.024780,0.031625,0.042309,-0.013724,-0.024483,-0.029811,-0.029117,-0.013519,-0.030866,-0.025409,0
1707,0.013080,-0.004640,-0.011471,-0.044700,-0.034343,0.053530,0.008704,-0.019151,-0.007682,-0.008621,1
1708,0.012699,0.015948,0.009531,-0.041213,-0.035769,0.045767,0.011366,-0.017616,-0.013474,-0.002456,0


##### 