In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import cv2
import os
import glob
import pandas as pd
import numpy as np
from google.colab.patches import cv2_imshow

In [None]:
#define globals required through out the whole program
edges           = [] #containing all edge tuple
attrs           = [] #countain+ing list of attribute of all nodes
graph_id        = 1 #id of latest graph
node_id         = 1 #id of latest node
graph_indicator = [] #containing graph-id for each node
node_labels     = [] #containing labels for all node
graph_labels    = []#containing labels for all graph

activity_map    = {}
activity_map[1] = 'bacterial'
activity_map[2] = 'normal'
activity_map[3] = 'virus'


def normalize(arr):
    arr = np.array(arr)
    m = np.mean(arr)
    s = np.std(arr)
    return (arr - m)/s

def generate_graphs(filename, node_label, activity_map):
    print(" ... Reading image: "+filename+" ...")
    global node_id, edges, attrs, graph_id, node_labels, graph_indicator
    cnt           = 0
    img           = cv2.imread(filename)
    IMAGE         = cv2.imread(filename)
    dim1, dim2, _ = img.shape
    attrs1        = []

    print("Image type: " + activity_map[node_label] + "\nPixel matrix is of: " + str(dim1) + "x" + str(dim2))
    img1 = img.copy()
    nodes = np.full((dim1, dim2), -1)
    edge = 0
    for i in range(dim1):
        for j in range(dim2):
            b, _, _ = img[i][j]
            if b >= 128:
                nodes[i][j] = node_id
                attrs1.append(b)
                graph_indicator.append(graph_id)
                node_labels.append([node_label, activity_map[node_label]])
                node_id += 1
                cnt += 1
            else:
                img1[i][j] = 0
  
    for i in range(dim1):
        for j in range(dim2):
            if(nodes[i][j] != -1):
                li, ri = max(0, i - 1), min(i + 2, dim1) 
                lj, rj = max(0, j - 1), min(j + 2, dim2)
                for i1 in range(li, ri):
                    for j1 in range(lj, rj):
                        if((i1 != i or j1 != j) and (nodes[i1][j1] != -1)):
                            IMAGE = cv2.circle(IMAGE, (j, i), radius=0, color=(0, 225, 255), thickness=-1) 
                            IMAGE = cv2.circle(IMAGE, (j1, i1), radius=0, color=(0, 225, 255), thickness=-1)   
                            edges.append([nodes[i][j],nodes[i1][j1]])
                            edge += 1
    
    # cv2_imshow(IMAGE)  
    attrs1=normalize(attrs1)
    attrs.extend(attrs1)
    del attrs1
    print("For given image nodes formed: " + str(cnt)+" edges formed: " + str(edge))
    if(cnt != 0): 
        graph_id += 1

def generate_graph_with_labels(dirname, label, activity_map):
    print("\n... Reading Directory: " + dirname+" ...\n")
    global graph_labels
    filenames = glob.glob(dirname + '/*.jpg')
    for filename in filenames:
        generate_graphs(filename, label, activity_map)
        graph_labels.append([label, activity_map[label]])

def process_graphs(Normal_dir, Bac_dir, Vir_dir, activity_map):
    global node_labels, graph_labels
    generate_graph_with_labels(Bac_dir,    1, activity_map)
    generate_graph_with_labels(Normal_dir, 2, activity_map)
    generate_graph_with_labels(Vir_dir,    3, activity_map)

    print("Processing done")
    print("Total nodes formed: " + str(len(node_labels)) + "Total graphs formed: "+str(len(graph_labels)))


In [None]:
root = '/content/drive/MyDrive/COURSES/CS331/FinalProject/dataset/GraphCXR/'
Normal_dir = os.path.join(root, 'CXR_Reformat_Preprocess', 'test', 'normal')
Bac_dir    = os.path.join(root, 'CXR_Reformat_Preprocess', 'test', 'bacteria')
Vir_dir    = os.path.join(root, 'CXR_Reformat_Preprocess', 'test', 'virus')

In [None]:
process_graphs(Bac_dir, Normal_dir, Vir_dir, activity_map)


... Reading Directory: /content/drive/MyDrive/COURSES/CS331/FinalProject/dataset/GraphCXR/CXR_Reformat_Preprocess/test/normal ...

 ... Reading image: /content/drive/MyDrive/COURSES/CS331/FinalProject/dataset/GraphCXR/CXR_Reformat_Preprocess/test/normal/img-1.jpg ...
Image type: bacterial
Pixel matrix is of: 512x512
For given image nodes formed: 1180 edges formed: 6256
 ... Reading image: /content/drive/MyDrive/COURSES/CS331/FinalProject/dataset/GraphCXR/CXR_Reformat_Preprocess/test/normal/img-2.jpg ...
Image type: bacterial
Pixel matrix is of: 512x512




For given image nodes formed: 1 edges formed: 0
 ... Reading image: /content/drive/MyDrive/COURSES/CS331/FinalProject/dataset/GraphCXR/CXR_Reformat_Preprocess/test/normal/img-3.jpg ...
Image type: bacterial
Pixel matrix is of: 512x512
For given image nodes formed: 324 edges formed: 1500
 ... Reading image: /content/drive/MyDrive/COURSES/CS331/FinalProject/dataset/GraphCXR/CXR_Reformat_Preprocess/test/normal/img-4.jpg ...
Image type: bacterial
Pixel matrix is of: 512x512
For given image nodes formed: 141 edges formed: 718
 ... Reading image: /content/drive/MyDrive/COURSES/CS331/FinalProject/dataset/GraphCXR/CXR_Reformat_Preprocess/test/normal/img-5.jpg ...
Image type: bacterial
Pixel matrix is of: 512x512
For given image nodes formed: 552 edges formed: 2602
 ... Reading image: /content/drive/MyDrive/COURSES/CS331/FinalProject/dataset/GraphCXR/CXR_Reformat_Preprocess/test/normal/img-6.jpg ...
Image type: bacterial
Pixel matrix is of: 512x512
For given image nodes formed: 285 edges formed

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  keepdims=keepdims, where=where)
  subok=False)
  ret = ret.dtype.type(ret / rcount)


For given image nodes formed: 0 edges formed: 0
 ... Reading image: /content/drive/MyDrive/COURSES/CS331/FinalProject/dataset/GraphCXR/CXR_Reformat_Preprocess/test/normal/img-86.jpg ...
Image type: bacterial
Pixel matrix is of: 512x512
For given image nodes formed: 492 edges formed: 2070
 ... Reading image: /content/drive/MyDrive/COURSES/CS331/FinalProject/dataset/GraphCXR/CXR_Reformat_Preprocess/test/normal/img-87.jpg ...
Image type: bacterial
Pixel matrix is of: 512x512
For given image nodes formed: 481 edges formed: 1916
 ... Reading image: /content/drive/MyDrive/COURSES/CS331/FinalProject/dataset/GraphCXR/CXR_Reformat_Preprocess/test/normal/img-88.jpg ...
Image type: bacterial
Pixel matrix is of: 512x512
For given image nodes formed: 384 edges formed: 1668
 ... Reading image: /content/drive/MyDrive/COURSES/CS331/FinalProject/dataset/GraphCXR/CXR_Reformat_Preprocess/test/normal/img-89.jpg ...
Image type: bacterial
Pixel matrix is of: 512x512
For given image nodes formed: 728 edges f

In [None]:
print(len(node_labels))
print(len(graph_labels))
print(len(edges))
print(len(attrs))

df_A = pd.DataFrame(columns = ["node-1","node-2"], data = np.array(edges))
print("Shape of edge dataframe: " + str(df_A.shape))
print("\n--summary of dataframe--\n", df_A.head())

df_node_label = pd.DataFrame(data = np.array(node_labels), columns=["label", "activity-name"])
print("shape of node-label dataframe: " + str(df_node_label.shape))
print("\n--summary of dataframe--\n", df_node_label)

df_graph_label = pd.DataFrame(data = np.array(graph_labels), columns = ["label","activity-name"])
print("shape of node-label dataframe: " + str(df_graph_label.shape))
print("\n--summary of dataframe--\n", df_graph_label.head())

df_node_attr = pd.DataFrame(data = np.array(attrs), columns=["gray-val"])
print("shape of node-attribute dataframe: " + str(df_node_attr.shape))
print("\n--summary of dataframe--\n", df_node_attr.head())

df_graph_indicator = pd.DataFrame(data = np.array(graph_indicator), columns=["graph-id"])
print("shape of graph-indicator dataframe: " + str(df_graph_indicator.shape))
print("\n--summary of dataframe--\n", df_graph_indicator.head())

df_node_label = df_node_label.drop(["activity-name"],axis=1)
print(df_node_label.head())

df_graph_label = df_graph_label.drop(["activity-name"],axis=1)
print(df_graph_label.head()) 

244500
624
1107444
244500
Shape of edge dataframe: (1107444, 2)

--summary of dataframe--
    node-1  node-2
0       1       2
1       1      13
2       1      14
3       1      15
4       2       1
shape of node-label dataframe: (244500, 2)

--summary of dataframe--
        label activity-name
0          1     bacterial
1          1     bacterial
2          1     bacterial
3          1     bacterial
4          1     bacterial
...      ...           ...
244495     3         virus
244496     3         virus
244497     3         virus
244498     3         virus
244499     3         virus

[244500 rows x 2 columns]
shape of node-label dataframe: (624, 2)

--summary of dataframe--
   label activity-name
0     1     bacterial
1     1     bacterial
2     1     bacterial
3     1     bacterial
4     1     bacterial
shape of node-attribute dataframe: (244500, 1)

--summary of dataframe--
    gray-val
0 -0.401532
1 -0.194105
2 -0.493721
3 -0.816384
4 -0.977715
shape of graph-indicator dataframe:

In [None]:
def save_dataframe_to_txt(df,filepath):
    df.to_csv(filepath, header=None, index=None, sep=',', mode='w')

In [None]:
# sourcepath='/home/linh/Downloads/CXR/CXR_Prewitt_v2/raw'
sourcepath = '/content/drive/MyDrive/COURSES/CS331/FinalProject/dataset/GraphCXR/CXR_Reformat_128_CXR_Prewitt_Testv2/raw'
os.makedirs(sourcepath, exist_ok=False)
print("The new directory is created!")
save_dataframe_to_txt(df_A, sourcepath + '/CXR_Reformat_128_CXR_Prewitt_Testv2_A.txt')
save_dataframe_to_txt(df_graph_indicator, sourcepath + '/CXR_Reformat_128_CXR_Prewitt_Testv2_graph_indicator.txt')
save_dataframe_to_txt(df_graph_label, sourcepath + '/CXR_Reformat_128_CXR_Prewitt_Testv2_graph_labels.txt')
save_dataframe_to_txt(df_node_attr, sourcepath + '/CXR_Reformat_128_CXR_Prewitt_Testv2_node_attributes.txt')
save_dataframe_to_txt(df_node_label, sourcepath + '/CXR_Reformat_128_CXR_Prewitt_Testv2_node_labels.txt') 

The new directory is created!
