In [1]:
"""Edge_preparation_2class.ipynb
Automatically generated by Colaboratory.
Original file is located at
    https://colab.research.google.com/drive/1DeoT7rAA9vUzHCVU_jvMvr3moVYx3Rge
"""

#mounting on drive
# from google.colab import drive
# drive.mount('/content/drive',force_remount=True)

import cv2
#from google.colab.patches import cv2_imshow
import glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from statistics import stdev,mean

#define globals required through out the whole program
edges=[] #containing all edge tuple
attrs=[] #countaining list of attribute of all nodes
graph_id=1 #id of latest graph
node_id=1 #id of latest node
graph_indicator=[] #containing graph-id for each node
node_labels=[] #containing labels for all node
graph_labels=[]#containing labels for all graph

#activity-label vs activity-name mapping (2-class)
activity_map={}
activity_map[1]='COVID'
activity_map[2]='NON-COVID'

#z-score normalization
def normalize(arr):
    arr=np.array(arr)
    m=np.mean(arr)
    s=np.std(arr)
    return (arr-m)/s

#generate graph for a given edge-image file
def generate_graphs(filename,node_label,activity_map):
    print(" ... Reading image: "+filename+" ...")
    global node_id,edges,attrs,graph_id,node_labels,graph_indicator
    cnt=0
    img=cv2.imread(filename)
    dim1,dim2,_=img.shape
    attrs1=[]

    print("Image type: " + activity_map[node_label] + "\nPixel matrix is of: "+str(dim1)+"x"+str(dim2))
    img1=img.copy()
    nodes=np.full((dim1,dim2),-1)
    edge=0
    for i in range(dim1):
        for j in range(dim2):
      #considering pixel as node if pixel-value>=128
            b,_,_=img[i][j]
            if(b>=128):
                nodes[i][j]=node_id
                attrs1.append(b)
                graph_indicator.append(graph_id)
                node_labels.append([node_label,activity_map[node_label]])
                node_id+=1
                cnt+=1
            else:
                img1[i][j]=0
    for i in range(dim1):
        for j in range(dim2):
      #forming edge between all adjacent pixels which are node
            if(nodes[i][j]!=-1):
                li=max(0,i-1)
                ri=min(i+2,dim1)
                lj=max(0,j-1)
                rj=min(j+2,dim2)
                for i1 in range(li,ri):
                    for j1 in range(lj,rj):
                        if((i1!=i or j1!=j) and (nodes[i1][j1]!=-1)):
                            edges.append([nodes[i][j],nodes[i1][j1]])
                            edge+=1
    attrs1=normalize(attrs1)
    attrs.extend(attrs1)
    del attrs1
    print("For given image nodes formed: "+str(cnt)+" edges formed: "+str(edge))
    if(cnt!=0): graph_id+=1

#generate graphs for all edge-image under given dir along with proper label
def generate_graph_with_labels(dirname,label,activity_map):
    print("\n... Reading Directory: "+dirname+" ...\n")
    global graph_labels
    filenames=glob.glob(dirname+'/*.jpg')
    for filename in filenames:
        generate_graphs(filename,label,activity_map)
        graph_labels.append([label,activity_map[label]])

#generate graphs for all directories
def process_graphs(covid_dir,ncovid_dir,activity_map):
    global node_labels,graph_labels
    generate_graph_with_labels(covid_dir,1,activity_map)
    generate_graph_with_labels(ncovid_dir,2,activity_map)
    print("Processing done")
    print("Total nodes formed: "+str(len(node_labels))+"Total graphs formed: "+str(len(graph_labels)))

#working directories
covid_dir='brain/Pretwitt/COVID' #for covid
ncovid_dir='brain/Pretwitt/NON-COVID' #for non-covid

process_graphs(covid_dir,ncovid_dir,activity_map)

#check all the lengths of globals
#comment if not necessary
print(len(node_labels))
print(len(graph_labels))
print(len(edges))
print(len(attrs))


#create adjacency dataframe
df_A=pd.DataFrame(columns=["node-1","node-2"],data=np.array(edges))
print("Shape of edge dataframe: "+str(df_A.shape))
print("\n--summary of dataframe--\n" ,df_A.head(300))

#create node label dataframe
df_node_label=pd.DataFrame(data=np.array(node_labels),columns=["label","activity-name"])
print("shape of node-label dataframe: "+str(df_node_label.shape))
print("\n--summary of dataframe--\n" ,df_node_label.head(200))

#create graph label dataframe
df_graph_label=pd.DataFrame(data=np.array(graph_labels),columns=["label","activity-name"])
print("shape of node-label dataframe: "+str(df_graph_label.shape))
print("\n--summary of dataframe--\n" ,df_graph_label.head(200))

#create node-attribute dataframe (normalized grayscale value)
df_node_attr=pd.DataFrame(data=np.array(attrs),columns=["gray-val"])
print("shape of node-attribute dataframe: "+str(df_node_attr.shape))
print("\n--summary of dataframe--\n" ,df_node_attr.head(200))

#create graph-indicator datframe
df_graph_indicator=pd.DataFrame(data=np.array(graph_indicator),columns=["graph-id"])
print("shape of graph-indicator dataframe: "+str(df_graph_indicator.shape))
print("\n--summary of dataframe--\n" ,df_graph_indicator.head(200))

#omit activity name later for graph-label and node-label
#since GIN model will only accept the label
df_node_label=df_node_label.drop(["activity-name"],axis=1)
print(df_node_label.head(50))

df_graph_label=df_graph_label.drop(["activity-name"],axis=1)
print(df_graph_label.head(50))

def save_dataframe_to_txt(df,filepath):
     df.to_csv(filepath,header=None,index=None,sep=',',mode='w')


#save all the dataframes to .txt file
#path name: .../GraphTrain/dataset/<dataset_name>/raw/<dataset_name>_<type>.txt
# <type>:
# A--> adjancency matrix
#graph_indicator--> graph-ids of all node
#graph_labels--> labels for all graph
#node_attributes--> attribute(s) for all node
#node_labels--> labels for all node


sourcepath='brain/Github_Pretwitt/raw'
save_dataframe_to_txt(df_A,sourcepath+'/Github_Pretwitt_A.txt')
save_dataframe_to_txt(df_graph_indicator,sourcepath+'/Github_Pretwitt_graph_indicator.txt')
save_dataframe_to_txt(df_graph_label,sourcepath+'/Github_Pretwitt_graph_labels.txt')
save_dataframe_to_txt(df_node_attr,sourcepath+'/Github_Pretwitt_node_attributes.txt')
save_dataframe_to_txt(df_node_label,sourcepath+'/Github_Pretwitt_node_labels.txt')


... Reading Directory: brain/Pretwitt/COVID ...

 ... Reading image: brain/Pretwitt/COVID\img-1.jpg ...
Image type: COVID
Pixel matrix is of: 218x180
For given image nodes formed: 5328 edges formed: 27902
 ... Reading image: brain/Pretwitt/COVID\img-10.jpg ...
Image type: COVID
Pixel matrix is of: 512x512
For given image nodes formed: 19425 edges formed: 107354
 ... Reading image: brain/Pretwitt/COVID\img-100.jpg ...
Image type: COVID
Pixel matrix is of: 938x911
For given image nodes formed: 5307 edges formed: 29880
 ... Reading image: brain/Pretwitt/COVID\img-101.jpg ...
Image type: COVID
Pixel matrix is of: 938x911
For given image nodes formed: 18678 edges formed: 97740
 ... Reading image: brain/Pretwitt/COVID\img-102.jpg ...
Image type: COVID
Pixel matrix is of: 219x230
For given image nodes formed: 6153 edges formed: 34120
 ... Reading image: brain/Pretwitt/COVID\img-103.jpg ...
Image type: COVID
Pixel matrix is of: 325x300
For given image nodes formed: 11834 edges formed: 62528
 

For given image nodes formed: 5604 edges formed: 29286
 ... Reading image: brain/Pretwitt/COVID\img-146.jpg ...
Image type: COVID
Pixel matrix is of: 344x279
For given image nodes formed: 8250 edges formed: 44188
 ... Reading image: brain/Pretwitt/COVID\img-147.jpg ...
Image type: COVID
Pixel matrix is of: 355x294
For given image nodes formed: 9564 edges formed: 53182
 ... Reading image: brain/Pretwitt/COVID\img-148.jpg ...
Image type: COVID
Pixel matrix is of: 225x225
For given image nodes formed: 4834 edges formed: 23584
 ... Reading image: brain/Pretwitt/COVID\img-149.jpg ...
Image type: COVID
Pixel matrix is of: 938x864
For given image nodes formed: 15036 edges formed: 83974
 ... Reading image: brain/Pretwitt/COVID\img-15.jpg ...
Image type: COVID
Pixel matrix is of: 366x310
For given image nodes formed: 11269 edges formed: 65592
 ... Reading image: brain/Pretwitt/COVID\img-150.jpg ...
Image type: COVID
Pixel matrix is of: 355x272
For given image nodes formed: 5628 edges formed: 30

For given image nodes formed: 16008 edges formed: 92834
 ... Reading image: brain/Pretwitt/COVID\img-56.jpg ...
Image type: COVID
Pixel matrix is of: 879x766
For given image nodes formed: 26847 edges formed: 172946
 ... Reading image: brain/Pretwitt/COVID\img-57.jpg ...
Image type: COVID
Pixel matrix is of: 342x273
For given image nodes formed: 12603 edges formed: 63582
 ... Reading image: brain/Pretwitt/COVID\img-58.jpg ...
Image type: COVID
Pixel matrix is of: 351x262
For given image nodes formed: 12335 edges formed: 65750
 ... Reading image: brain/Pretwitt/COVID\img-59.jpg ...
Image type: COVID
Pixel matrix is of: 256x256
For given image nodes formed: 5486 edges formed: 27894
 ... Reading image: brain/Pretwitt/COVID\img-6.jpg ...
Image type: COVID
Pixel matrix is of: 993x825
For given image nodes formed: 24142 edges formed: 149052
 ... Reading image: brain/Pretwitt/COVID\img-60.jpg ...
Image type: COVID
Pixel matrix is of: 340x314
For given image nodes formed: 11185 edges formed: 64

Image type: NON-COVID
Pixel matrix is of: 250x201
For given image nodes formed: 6279 edges formed: 32414
 ... Reading image: brain/Pretwitt/NON-COVID\img-13.jpg ...
Image type: NON-COVID
Pixel matrix is of: 250x201
For given image nodes formed: 6279 edges formed: 32414
 ... Reading image: brain/Pretwitt/NON-COVID\img-14.jpg ...
Image type: NON-COVID
Pixel matrix is of: 225x225
For given image nodes formed: 6021 edges formed: 29248
 ... Reading image: brain/Pretwitt/NON-COVID\img-15.jpg ...
Image type: NON-COVID
Pixel matrix is of: 222x227
For given image nodes formed: 6395 edges formed: 31122
 ... Reading image: brain/Pretwitt/NON-COVID\img-16.jpg ...
Image type: NON-COVID
Pixel matrix is of: 251x201
For given image nodes formed: 7558 edges formed: 41192
 ... Reading image: brain/Pretwitt/NON-COVID\img-17.jpg ...
Image type: NON-COVID
Pixel matrix is of: 197x177
For given image nodes formed: 7917 edges formed: 43460
 ... Reading image: brain/Pretwitt/NON-COVID\img-18.jpg ...
Image type

For given image nodes formed: 18567 edges formed: 110266
 ... Reading image: brain/Pretwitt/NON-COVID\img-59.jpg ...
Image type: NON-COVID
Pixel matrix is of: 442x409
For given image nodes formed: 14763 edges formed: 88326
 ... Reading image: brain/Pretwitt/NON-COVID\img-6.jpg ...
Image type: NON-COVID
Pixel matrix is of: 225x225
For given image nodes formed: 8114 edges formed: 47418
 ... Reading image: brain/Pretwitt/NON-COVID\img-60.jpg ...
Image type: NON-COVID
Pixel matrix is of: 301x275
For given image nodes formed: 14388 edges formed: 83666
 ... Reading image: brain/Pretwitt/NON-COVID\img-61.jpg ...
Image type: NON-COVID
Pixel matrix is of: 537x472
For given image nodes formed: 9898 edges formed: 56810
 ... Reading image: brain/Pretwitt/NON-COVID\img-62.jpg ...
Image type: NON-COVID
Pixel matrix is of: 540x504
For given image nodes formed: 7602 edges formed: 36714
 ... Reading image: brain/Pretwitt/NON-COVID\img-63.jpg ...
Image type: NON-COVID
Pixel matrix is of: 449x359
For giv