In [1]:
# import pandas as pd
import numpy as np

In [2]:
filePath = './gastroenterology_dataset/data.txt'

data = np.genfromtxt(filePath, delimiter=',', skip_header=1)

In [3]:
data

array([[3.0000e+00, 3.0000e+00, 3.0000e+00, ..., 2.0000e+00, 2.0000e+00,
        2.0000e+00],
       [1.0000e+00, 2.0000e+00, 2.0000e+00, ..., 1.0000e+00, 1.0000e+00,
        2.0000e+00],
       [1.3812e+02, 1.2799e+02, 8.0415e+01, ..., 1.5781e+02, 9.3569e+01,
        9.5543e+01],
       ...,
       [1.1377e-02, 1.1377e-02, 2.6310e-03, ..., 2.1000e-04, 1.2000e-05,
        1.2000e-05],
       [1.1198e-02, 1.1198e-02, 2.6100e-03, ..., 2.0600e-04, 1.1000e-05,
        1.1000e-05],
       [1.1131e-02, 1.1131e-02, 2.5310e-03, ..., 1.9400e-04, 1.0000e-05,
        1.0000e-05]])

In [4]:
# transpose data to get features in columns and samples in rows
data_transpose = np.transpose(data)
data_list = data_transpose.tolist()

# now let's seperate data into "White Light Frame (WL)" and "NBI Frame (NBI)"
# 1 for WL and 2 for NBI
data_WL, data_NBI = [],[]
for i in range(len(data_list)):
    if data_list[i][1] == 1:
        data_WL.append(data_list[i])
    elif data_list[i][1] == 2:
        data_NBI.append(data_list[i])

# checking if the separation was done correctely
print(False in [row[1]==1 for row in data_WL])  # should be False
print(False in [row[1]==2 for row in data_NBI]) # should be False
print((len(data_WL)+len(data_NBI))==len(data_list)) # should be True

False
False
True


In [5]:
# separating features and targets out of data_WL and data_NBI
fea_WL = [row[2:] for row in data_WL]
class_WL = [row[0] for row in data_WL]

fea_NBI = [row[2:] for row in data_NBI]
class_NBI = [row[0] for row in data_NBI]

---

In [225]:
from sklearn.metrics.pairwise import cosine_similarity

# X -> features, y -> label
X = np.array(fea_WL+fea_NBI)
y = np.array(class_WL+class_NBI)

similarity_matrix = cosine_similarity(X)

In [7]:
similarity_matrix

array([[1.        , 0.93607026, 0.97028607, ..., 0.99687239, 0.98485682,
        0.91366254],
       [0.93607026, 1.        , 0.98597573, ..., 0.95746692, 0.9496139 ,
        0.98196557],
       [0.97028607, 0.98597573, 1.        , ..., 0.98576319, 0.96913295,
        0.96665076],
       ...,
       [0.99687239, 0.95746692, 0.98576319, ..., 1.        , 0.98455307,
        0.93653564],
       [0.98485682, 0.9496139 , 0.96913295, ..., 0.98455307, 1.        ,
        0.93032114],
       [0.91366254, 0.98196557, 0.96665076, ..., 0.93653564, 0.93032114,
        1.        ]])

In [69]:
adj_matrix = 1/((1/similarity_matrix)-1)
np.fill_diagonal(adj_matrix, 0)
adj_matrix[adj_matrix <= np.mean(adj_matrix[adj_matrix != np.min(adj_matrix)])] = 0

  adj_matrix = 1/((1/similarity_matrix)-1)


In [75]:
np.count_nonzero(adj_matrix)

1454

In [76]:
print(np.max(adj_matrix))
print(np.min(adj_matrix[adj_matrix != np.min(adj_matrix)]))

2879.9687071143035
82.38514782992013


In [39]:
import matplotlib.pyplot as plt
%matplotlib qt

In [None]:
# import networkx as nx

# # create nx graph from sim matrix
# G = nx.to_networkx_graph(similarity_matrix)

# nx.draw(G,with_labels=True)

In [239]:
from sklearn.decomposition import PCA
pca = PCA(n_components=4)
X_pca = pca.fit_transform(X)

In [268]:
from sklearn.decomposition import KernelPCA

kernel_pca = KernelPCA(
    n_components=4, kernel="linear", gamma=10, fit_inverse_transform=True, alpha=0.1
)

X_kpca = kernel_pca.fit_transform(X)

In [269]:
np.shape(X_kpca)

(152, 4)

In [249]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
lda = LDA(n_components=2)
X_lda = lda.fit_transform(X, y)

304


In [274]:
import networkx as nx

from sklearn import preprocessing
from sklearn.metrics.pairwise import euclidean_distances
X_normalized = preprocessing.normalize(X_kpca, norm='l2')
euclidean_dist = euclidean_distances(X_normalized)
squared_euclidean = np.square(euclidean_dist)

# adj_matrix = 10**(1/similarity_matrix)
adj_matrix = squared_euclidean
np.fill_diagonal(adj_matrix, 0)
print('mean = ', np.mean(adj_matrix[adj_matrix != np.min(adj_matrix)]))
print('max = ', np.max(adj_matrix))
print('min = ', np.min(adj_matrix[adj_matrix != np.min(adj_matrix)]))
adj_matrix[adj_matrix >= -1.85+np.mean(adj_matrix[adj_matrix != np.min(adj_matrix)])] = 0
print(np.count_nonzero(adj_matrix))


G = nx.from_numpy_matrix(adj_matrix, create_using=nx.MultiGraph())

mean =  1.9239266259901187
max =  3.9999399648203853
min =  1.4407206584854393e-05
4986


In [None]:
# G.remove_nodes_from(list(nx.isolates(G)))

color_map = []
for i in range(len(G)):
    if y[i] == 1:
        color_map.append('blue')
    elif y[i] == 2: 
        color_map.append('green')
    elif y[i] == 3: 
        color_map.append('red')

nx.draw_networkx(G, pos=nx.spring_layout(G), node_size=20, node_color=color_map, linewidths=0.5)