In [22]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import matplotlib.cm as cm

from random import randint
from skimage.transform import radon

import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from sklearn.cluster import KMeans



def single_square_data_generator(side_size : "size of the square's side",
                                     ) -> " (x,y) tuple " :


    img = np.zeros(side_size**2) # single 0s arr with all the points 
    img[: randint(0,side_size**2)]  = 1  # transform some random number of points into 1s
    np.random.shuffle(img) # shuffle arr
    img = np.reshape(img,(side_size,side_size)) # transform arr to a double arr

    sinogram = radon(img) # sinogram transformation from skimage.transform library [ sinogram transf is called radon transform]
    
    return (sinogram, img)




def square_data_generator(n : "number of data to be generated",
                          side_size : "size of the square's side",
                          ) -> " (x,y) where x - list of sinograms, y - list of corresponding images" :

    x, y = [], []    
    for _ in range(n):
        sinogram, img = single_square_data_generator(side_size)
        x.append(sinogram)
        y.append(img)
        
    x = np.array(x)
    y = np.array(y)
    
    
    
    #### To make it fit the NN we need to transform these to tensors, change their dimensions, and cast into double [float32]
    
    x = torch.from_numpy(x)
    x = x.view(-1,1,64,180)
    x = x.to(dtype=torch.float32)
    
    
    #### Do we need to convert y to tensor as well? Probs yes so we use GPU to calculate Cross Entropy loss ?

    return (x,y)

In [2]:
# 8983 - nr of images used in CNN_paper, 64 - size of them
# The nr of channels used is the question tho ! 
sinograms, images = square_data_generator(50,64)  

  warn('Radon transform: image must be zero outside the '


In [3]:
sinograms.shape

torch.Size([50, 1, 64, 180])

## Model time 

In [4]:
class CNN(nn.Module):
    def __init__(self, side_size = 64, chan1 = 5, chan2 = 10, chan3 = 15) :
        super(CNN, self).__init__()
        
        self.final_side_size = int(((side_size-4)//3 - 2) //3 - 2)
        
        
        self.conv1 = nn.Conv2d(1, chan1, 5)
        self.conv2 = nn.Conv2d(chan1, chan2, 3)
        self.conv3 = nn.Conv2d(chan2, chan3, 3)
        self.pool = nn.MaxPool2d(3,3) 
        
        self.fc1 = nn.Linear(15 * 16  * self.final_side_size ,500) # what's the first thing coming in ?
        self.fc2 = nn.Linear(500,180) # sigmoid act ! 
        self.sigmoid = nn.Sigmoid()
        
        
    def forward(self,x) :
        x = F.relu(self.conv1(x))
        x = self.pool(x)
        x = F.relu(self.conv2(x))
        x = self.pool(x)
        x = F.relu(self.conv3(x))
        # x = self.pool(x)
        
        
        # 16 is what 180 becomes after all the operations, if we apply 1 more maxpool(3,3) at the end we 180 becomes 5 instead
        # 15 is the number of channels [ chan3 ! ]
        
        x = x.view(-1, 15 * 16 * self.final_side_size)
        x = F.relu(self.fc1(x))
        x = self.sigmoid(self.fc2(x)) # SIGMOID ! 
        return x
        
        
#So the counting will go sth like this 
# Conv2d(input_chan, output chann) -> The next conv2D has got to have input chan equal to prev output channel
# Count the ending size like this : 
# side_size - kernel_size + 1 -> //2 -> -kernel + 1 -> //2 
# What chan size should be used !?

In [5]:
Net = CNN()

In [26]:
# Here, test the forward function - it should output 180 probabilities 
sinogram, img = square_data_generator(1,64)




probabilities = Net(sinogram) 
print(probabilities)






tensor([[0.4917, 0.5031, 0.4860, 0.4961, 0.5062, 0.4855, 0.4898, 0.5145, 0.4913,
         0.4964, 0.5141, 0.4939, 0.4952, 0.5101, 0.4847, 0.5093, 0.4976, 0.4945,
         0.4963, 0.5003, 0.5055, 0.5039, 0.5109, 0.5043, 0.5120, 0.5163, 0.5086,
         0.4969, 0.4875, 0.4960, 0.4934, 0.4923, 0.5100, 0.4983, 0.4897, 0.4962,
         0.5016, 0.5036, 0.5051, 0.5134, 0.5044, 0.5009, 0.4929, 0.4931, 0.5044,
         0.4963, 0.4936, 0.4928, 0.5084, 0.4910, 0.4935, 0.4901, 0.5032, 0.4924,
         0.5117, 0.4998, 0.4944, 0.5055, 0.5002, 0.5051, 0.5014, 0.5023, 0.4987,
         0.5050, 0.5017, 0.4931, 0.4923, 0.5020, 0.5075, 0.5024, 0.4937, 0.5047,
         0.4967, 0.4880, 0.4940, 0.4937, 0.4893, 0.5105, 0.5082, 0.5097, 0.5051,
         0.5044, 0.5129, 0.5098, 0.4988, 0.5091, 0.5037, 0.4973, 0.5083, 0.4938,
         0.5050, 0.4996, 0.5033, 0.5119, 0.4998, 0.5144, 0.5021, 0.4875, 0.4951,
         0.4985, 0.4910, 0.5073, 0.4916, 0.4926, 0.4874, 0.4975, 0.5019, 0.4967,
         0.4979, 0.4918, 0.5

In [52]:
# Define threshold 
threshold = 0.51


# Choose all projections FROM THAT SINGLE SINOGRAM, that were above given probability
projection_list = []
for counter, probability in enumerate(probabilities[0]):   # Because probabilities is 
    if probability >= threshold :
        projection = sinogram[0, 0, :, counter]
        projection_list.append(projection)
        
# MAKE THIS LOOP OUTPUT TENSOR OR NUMPY ARRAY, BUT NOT A LIST ??? READ ON KMEANS WITH TENSOR, DOES IT WORKS ?  
        
        
# k-means over all these projections, choose 2 best clusters 



kmeans = KMeans(n_clusters = 3).fit(projection_list)
final_project_list = kmeans.cluster_centers_

final_project_list




ValueError: only one element tensors can be converted to Python scalars

In [51]:
len(projection_list)

23

## Define reconstruction function !!! 

## Some notes & to-dos 

->Make sure everyhing is in tensors ! Send numpy arrays to tensors ! [ otherwise the backprob wont work properly ! ] 

->Is the generator allright? I mean, it's so random? Shouldn't it be more like a square with some white elements within in ? 

->Why do we use sigmoid ? Instead of Softmax? Softmax would give us probabilities over all of these ! 

->Do we need to convert y[ img in generator]  to tensor as well? Probs yes so we use GPU to calculate Cross Entropy loss ?

->What about normalizing in generator and batch norm layers ? 
The paper is from 2019, the batch norm has been popular since 2018, so maybe they skipped it on purpose ? 

->We have to add reconstruction and k-means within the model if we want to feed the batches into it, otherwise we get couple of sinograms but only 1 outputs probabilites, which just doesnt make sense ! 

->Idea for optimization, as we learn more, we could increase the threshold becaue the model should be more sure about its decisions ??

In [32]:
from math import sqrt 


# !!!! Wrong assumptions !!!!, you know that the 1st dimension is 180, so what's the second one ? 
def calc_start_img_size(kernel_size) :
    img_size = 500
    if 500%kernel_size!=0 :
        #print("Wrong kernel size !")
        a=2
    else : 
        img_size = 500//kernel_size
        if not sqrt(img_size).is_integer() :
            #print("Wrong kernel size !")
            a=2
        else :
            img_size = sqrt(img_size)
            #print("img_size : {}, kernel_size : {}".format(img_size, kernel_size))
            #Kernel size is either  5, 20, 125, 500 [ so probs 20 or 5]
            
            img_size = ((3*img_size+2)*3 + 2)*3 + 4
            print("img_size : {}, kernel_size : {}".format(img_size, kernel_size))
            
        

In [33]:
for i in range(500):
    calc_start_img_size(i+1)

img_size : 298.0, kernel_size : 5
img_size : 163.0, kernel_size : 20
img_size : 82.0, kernel_size : 125
img_size : 55.0, kernel_size : 500


In [39]:
((180-4)//3 - 2) //3 - 2

16