# Independent Component Analysis

In [1]:
import pandas as pd
from sklearn.decomposition import FastICA
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib widget
import common

In [2]:
data = common.loadFile("CleanedData").drop(["VISCODE", "RID"], axis=1).dropna()

In [3]:
plt.close()
ax = sns.countplot(x=data['DX'])
plt.title("Data Diagnosis")
plt.ylabel("Count")
plt.xlabel("Diagnosis")
plt.xticks(ticks=range(0,3), labels=['CN', 'MCI', 'Dementia'])

for p in ax.patches:
        ax.annotate(f'\n{p.get_height()}', (p.get_x()+p.get_width()/2, p.get_height()), ha='center', 
                    va='top', color='white', size=18)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [4]:
components = 10
# Creating input data numpy array
y =  data.loc[:,['DX']].values
X = data.drop(["DX"], axis=1)
######################################################################

# Creating ICA object
ICA = FastICA(n_components=components)
IndependentComponentValues = ICA.fit_transform(X, y)

def getColumnNames() -> list:
    res = []
    for i in range(1, len(IndependentComponentValues.T)+1):
        res.append('IC'+str(i))
    return res

# Creating the dataframe
ReducedData = pd.DataFrame(data=IndependentComponentValues, columns=getColumnNames())

# Rebuild dataset
finalDf = pd.concat([ReducedData, data[['DX']]], axis = 1)
common.saveFile(finalDf, "ICAData")


print('####### Final Independent Components ######')
print(finalDf.head(10))

####### Final Independent Components ######
        IC1       IC2       IC3       IC4       IC5       IC6       IC7  \
0  0.005642  0.009503 -0.008446 -0.001468 -0.013136 -0.068641  0.053346   
1  0.013706  0.032177 -0.018498  0.001793  0.017239 -0.030951  0.042105   
2  0.001845  0.079966  0.015538  0.003171  0.010482 -0.036154  0.015887   
3  0.017872 -0.008296 -0.008167 -0.033228  0.012526  0.011012  0.008893   
4  0.037392 -0.007431 -0.007942 -0.038242  0.028775  0.038477  0.010771   
5  0.019329  0.035857  0.003721 -0.031665  0.023796  0.037123 -0.007998   
6 -0.012682 -0.012890 -0.014317 -0.030162  0.014942 -0.039722 -0.012038   
7 -0.013530 -0.013207  0.001927 -0.030250  0.006938 -0.046782 -0.012547   
8 -0.018094 -0.001965  0.000359 -0.033645  0.006230 -0.028806 -0.017976   
9 -0.014244 -0.005193  0.002380 -0.015177  0.003701  0.019383 -0.012242   

        IC8       IC9      IC10 DX  
0 -0.031004 -0.046061  0.000641  1  
1  0.009612 -0.031560  0.010549  1  
2 -0.008369 -0.0199

In [5]:
showX = '1'
showY = '3'

plt.close()
fig = plt.figure(figsize = (8,8))
ax = fig.add_subplot(1,1,1) 
ax.set_xlabel('IC' + showX, fontsize = 15)
ax.set_ylabel('IC' + showY, fontsize = 15)
ax.set_title('2 Component ICA', fontsize = 20)


targets = [0, 1, 2]
colors = ['r', 'y', 'b']
for target, color in zip(targets, colors):
    indicesToKeep = finalDf['DX'] == target
    ax.scatter(finalDf.loc[indicesToKeep, 'IC' + showX]
               , finalDf.loc[indicesToKeep, 'IC' + showY]
               , c = color
               , s = 50)
    
ax.legend(['CN', 'MCI', 'Dementia'])
ax.grid()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [6]:
plt.close()
fig = plt.figure()
#ax = fig.add_subplot(1,1,1) 
ax = plt.axes(projection='3d')

ax.set_xlabel('IC1', fontsize=12)
ax.set_ylabel('IC2', fontsize=12)
ax.set_zlabel('IC3', fontsize=12)
ax.set_title('3 Component ICA', fontsize = 20)

targets = [0, 1, 2]
colors = ['r', 'y', 'b']
for target, color in zip(targets, colors):
    indicesToKeep = finalDf['DX'] == target
    ax.scatter(finalDf.loc[indicesToKeep, 'IC1']
               , finalDf.loc[indicesToKeep, 'IC2']
               , finalDf.loc[indicesToKeep, 'IC3']
               , c = color
               , s = 50)
    
ax.legend(['CN', 'MCI', 'Dementia'])
ax.grid()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

## Linear Regression

In [7]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

In [8]:
# Use only one feature
X = finalDf.drop("DX", axis=1).to_numpy().astype('int')
y = finalDf.loc[:,['DX']].to_numpy().astype('int').flatten()

# Split the data into training/testing sets
X_train = X[:-20]
X_test = X[-20:]

# Split the targets into training/testing sets
y_train = y[:-20]
y_test = y[-20:]

# Create linear regression object
regr = LinearRegression()

# Train the model using the training sets
regr.fit(X_train, y_train)

# Make predictions using the testing set
y_pred = regr.predict(X_test)

# The coefficients
print('Coefficients: \n', regr.coef_)
# The mean squared error
print('Mean squared error: %.2f' % mean_squared_error(y_test, y_pred))
# The coefficient of determination: 1 is perfect prediction
print('Coefficient of determination: %.2f' % r2_score(y_test, y_pred))

Coefficients: 
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Mean squared error: 0.59
Coefficient of determination: -2.71


In [9]:
finalDf

Unnamed: 0,IC1,IC2,IC3,IC4,IC5,IC6,IC7,IC8,IC9,IC10,DX
0,0.005642,0.009503,-0.008446,-0.001468,-0.013136,-0.068641,0.053346,-0.031004,-0.046061,0.000641,1
1,0.013706,0.032177,-0.018498,0.001793,0.017239,-0.030951,0.042105,0.009612,-0.031560,0.010549,1
2,0.001845,0.079966,0.015538,0.003171,0.010482,-0.036154,0.015887,-0.008369,-0.019957,0.028086,1
3,0.017872,-0.008296,-0.008167,-0.033228,0.012526,0.011012,0.008893,-0.049510,-0.015743,-0.007338,1
4,0.037392,-0.007431,-0.007942,-0.038242,0.028775,0.038477,0.010771,-0.004822,-0.035827,0.012095,1
...,...,...,...,...,...,...,...,...,...,...,...
1705,0.000214,0.009564,-0.002572,-0.020273,-0.029656,0.024043,-0.001199,0.012300,-0.002140,0.038065,0
1706,-0.030825,-0.013841,0.029061,-0.024503,-0.025977,0.013937,-0.025584,0.030050,-0.040104,-0.032985,0
1707,-0.007533,-0.018907,-0.008678,-0.033616,-0.012872,0.044503,-0.008536,-0.054541,0.009591,0.006144,1
1708,-0.013471,-0.017587,-0.011427,-0.035014,-0.012942,0.041237,-0.002472,-0.046259,-0.009946,-0.015681,0


##### 