# Independent Component Analysis

In [1]:
import pandas as pd
from sklearn.decomposition import FastICA
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib widget
import common

In [2]:
data = common.loadFile("CleanedData").drop(["VISCODE", "RID"], axis=1).dropna()

In [3]:
plt.close()
ax = sns.countplot(x=data['DX'])
plt.title("Data Diagnosis")
plt.ylabel("Count")
plt.xlabel("Diagnosis")
plt.xticks(ticks=range(0,3), labels=['CN', 'MCI', 'Dementia'])

for p in ax.patches:
        ax.annotate(f'\n{p.get_height()}', (p.get_x()+p.get_width()/2, p.get_height()), ha='center', 
                    va='top', color='white', size=18)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [4]:
components = 10
#Creating input data numpy array
y =  data.loc[:,['DX']].values
X = data.drop(["DX"], axis=1)
######################################################################

# Creating ICA object
ICA = FastICA(n_components=components)
IndependentComponentValues=ICA.fit_transform(X, y)

def getColumnNames() -> list:
    res = []
    for i in range(1, len(IndependentComponentValues.T)+1):
        res.append('IC'+str(i))
    return res

#Creating the dataframe
ReducedData=pd.DataFrame(data=IndependentComponentValues, columns=getColumnNames())

# Rebuild dataset
finalDf = pd.concat([ReducedData, data[['DX']]], axis = 1)
common.saveFile(finalDf, "ICAData")


print('####### Final Independent Components ######')
print(finalDf.head(10))

####### Final Independent Components ######
        IC1       IC2       IC3       IC4       IC5       IC6       IC7  \
0  0.053283 -0.045100 -0.014432 -0.009505  0.032330 -0.004179 -0.068448   
1  0.042112 -0.031356  0.016629 -0.032211 -0.008220 -0.013932 -0.031016   
2  0.015685 -0.017823  0.009414 -0.079723  0.009446 -0.030546 -0.035997   
3  0.008932 -0.015926  0.012164  0.008393  0.049791  0.005051  0.010948   
4  0.010901 -0.036644  0.028624  0.007494  0.005848 -0.016803  0.038380   
5 -0.008018  0.003679  0.023799 -0.035675  0.026185 -0.008807  0.037028   
6 -0.011871  0.006029  0.015381  0.012779  0.001171  0.027000 -0.040150   
7 -0.012454 -0.000656  0.006933  0.013211  0.009392  0.015240 -0.047019   
8 -0.017882 -0.006138  0.006410  0.001979 -0.013611  0.003205 -0.029028   
9 -0.012230 -0.023948  0.003637  0.005278 -0.003713 -0.011624  0.019362   

        IC8       IC9      IC10 DX  
0  0.001031 -0.008591  0.003790  1  
1 -0.002633 -0.018719  0.012025  1  
2 -0.003866  0.0153

In [5]:
showX = '1'
showY = '3'

plt.close()
fig = plt.figure(figsize = (8,8))
ax = fig.add_subplot(1,1,1) 
ax.set_xlabel('IC' + showX, fontsize = 15)
ax.set_ylabel('IC' + showY, fontsize = 15)
ax.set_title('2 Component ICA', fontsize = 20)


targets = [0, 1, 2]
colors = ['r', 'y', 'b']
for target, color in zip(targets, colors):
    indicesToKeep = finalDf['DX'] == target
    ax.scatter(finalDf.loc[indicesToKeep, 'IC' + showX]
               , finalDf.loc[indicesToKeep, 'IC' + showY]
               , c = color
               , s = 50)
    
ax.legend(['CN', 'MCI', 'Dementia'])
ax.grid()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [6]:
plt.close()
fig = plt.figure()
#ax = fig.add_subplot(1,1,1) 
ax = plt.axes(projection='3d')

ax.set_xlabel('IC1', fontsize=12)
ax.set_ylabel('IC2', fontsize=12)
ax.set_zlabel('IC3', fontsize=12)
ax.set_title('3 Component ICA', fontsize = 20)

targets = [0, 1, 2]
colors = ['r', 'y', 'b']
for target, color in zip(targets, colors):
    indicesToKeep = finalDf['DX'] == target
    ax.scatter(finalDf.loc[indicesToKeep, 'IC1']
               , finalDf.loc[indicesToKeep, 'IC2']
               , finalDf.loc[indicesToKeep, 'IC3']
               , c = color
               , s = 50)
    
ax.legend(['CN', 'MCI', 'Dementia'])
ax.grid()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

## Linear Regression

In [7]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

In [8]:
# Use only one feature
X = finalDf.drop("DX", axis=1).to_numpy().astype('int')
y = finalDf.loc[:,['DX']].to_numpy().astype('int').flatten()

# Split the data into training/testing sets
X_train = X[:-20]
X_test = X[-20:]

# Split the targets into training/testing sets
y_train = y[:-20]
y_test = y[-20:]

# Create linear regression object
regr = LinearRegression()

# Train the model using the training sets
regr.fit(X_train, y_train)

# Make predictions using the testing set
y_pred = regr.predict(X_test)

# The coefficients
print('Coefficients: \n', regr.coef_)
# The mean squared error
print('Mean squared error: %.2f' % mean_squared_error(y_test, y_pred))
# The coefficient of determination: 1 is perfect prediction
print('Coefficient of determination: %.2f' % r2_score(y_test, y_pred))

Coefficients: 
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Mean squared error: 0.59
Coefficient of determination: -2.71


In [9]:
finalDf

Unnamed: 0,IC1,IC2,IC3,IC4,IC5,IC6,IC7,IC8,IC9,IC10,DX
0,0.053283,-0.045100,-0.014432,-0.009505,0.032330,-0.004179,-0.068448,0.001031,-0.008591,0.003790,1
1,0.042112,-0.031356,0.016629,-0.032211,-0.008220,-0.013932,-0.031016,-0.002633,-0.018719,0.012025,1
2,0.015685,-0.017823,0.009414,-0.079723,0.009446,-0.030546,-0.035997,-0.003866,0.015326,-0.000054,1
3,0.008932,-0.015926,0.012164,0.008393,0.049791,0.005051,0.010948,0.033536,-0.008249,0.017323,1
4,0.010901,-0.036644,0.028624,0.007494,0.005848,-0.016803,0.038380,0.037542,-0.008430,0.035346,1
...,...,...,...,...,...,...,...,...,...,...,...
1705,-0.001316,0.000008,-0.030019,-0.009362,-0.012779,-0.037680,0.024482,0.019730,-0.002940,-0.001419,0
1706,-0.025415,-0.042207,-0.024854,0.013536,-0.029832,0.031649,0.013754,0.024475,0.029103,-0.030895,0
1707,-0.008613,0.011347,-0.013086,0.019145,0.053604,-0.004749,0.044670,0.034299,-0.008710,-0.007665,1
1708,-0.002452,-0.009574,-0.012727,0.017617,0.045803,0.015906,0.041206,0.035720,-0.011376,-0.013486,0


##### 