Import all necessary libraries and databases:

In [None]:
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib qt
import numpy as np
from sklearn.decomposition import PCA

In [None]:
df = pd.read_json('unzipped_path/data.json')
df

Set variables:

In [10]:
VAR_calibration1 = 735
VAR_calibration2 = 747
VAR_threshold = 600

Function to remove centre beam:

In [11]:
def remove_centre_beam(xray_image):
    threshold_value = VAR_threshold
    primary_beam_mask = xray_image > threshold_value
    true_indices = np.argwhere(primary_beam_mask)
    min_row = true_indices[:, 0].min()
    max_row = true_indices[:, 0].max()
    min_col = true_indices[:, 1].min()
    max_col = true_indices[:, 1].max()
    isolated_beam = np.zeros_like(xray_image)
    isolated_beam[min_row:max_row+1, min_col:max_col+1] = xray_image[min_row:max_row+1, min_col:max_col+1]
    return isolated_beam

Create an averaged matrix of all samples within the calibrations:

In [12]:
totals= np.zeros([256, 256]) 
number_samples=0

for index,row in df.iterrows():
    if row['calibration_measurement_id']==VAR_calibration1:
        xray_image = np.array(row['measurement_data'])
        isolated_beam = remove_centre_beam(xray_image)
        xray_image = np.fft.fft2(xray_image)
        isolated_beam = np.fft.fft2(isolated_beam)
        img = np.absolute(np.subtract(xray_image,isolated_beam)) ## full pic in fourier space
        img = np.divide(img, img[0][0]) ## normalisation
        totals = np.add(totals,img)  ## create an averaged matrix of all samples
        number_samples+=1
for index,row in df.iterrows():
    if row['calibration_measurement_id']==VAR_calibration2:
        xray_image = np.array(row['measurement_data'])
        isolated_beam = remove_centre_beam(xray_image)
        xray_image = np.fft.fft2(xray_image)
        isolated_beam = np.fft.fft2(isolated_beam)
        img = np.absolute(np.subtract(xray_image,isolated_beam)) ## full pic in fourier space
        img = np.divide(img, img[0][0]) ## normalisation
        totals = np.add(totals,img)  ## create an averaged matrix of all samples
        number_samples+=1
totals = np.divide(totals, number_samples)

Create a flattened matrix of all standardised samples:

In [13]:
matrix = np.empty([number_samples, 65536])
counter = 0
for index,row in df.iterrows():
    if row['calibration_measurement_id']==735:
        xray_image = np.array(row['measurement_data'])
        isolated_beam = remove_centre_beam(xray_image)
        xray_image = np.fft.fft2(xray_image)
        isolated_beam = np.fft.fft2(isolated_beam)
        img = np.absolute(np.subtract(xray_image,isolated_beam)) ## full pic in fourier space
        img = np.divide(img, img[0][0]) ## normalisation
        img = np.divide(img,totals) ## divide by average - standartisation
        img = np.ravel(img) ## flatten
        matrix[counter] = img
        counter+=1
for index,row in df.iterrows():
    if row['calibration_measurement_id']==729:
        xray_image = np.array(row['measurement_data'])
        isolated_beam = remove_centre_beam(xray_image)
        xray_image = np.fft.fft2(xray_image)
        isolated_beam = np.fft.fft2(isolated_beam)
        img = np.absolute(np.subtract(xray_image,isolated_beam)) ## full pic in fourier space
        img = np.divide(img, img[0][0]) ## normalisation
        img = np.divide(img,totals) ## divide by average - standartisation
        img = np.ravel(img) ## flatten
        matrix[counter] = img
        counter+=1

Create a "key" for all t/f values of the samples:

In [14]:
key1 = []
for index,row in df.iterrows():
    if row['calibration_measurement_id']==VAR_calibration1:
        key1+= [row['cancer_tissue']]
key2 = []
for index,row in df.iterrows():
    if row['calibration_measurement_id']==VAR_calibration2:
        key2+= [row['cancer_tissue']]

Perform PCA:

In [15]:
pca = PCA(n_components=3)
fitted = pca.fit(matrix)
transformed = pca.transform(matrix)

Graph PCA:

In [16]:
x = transformed[:, 0]
y = transformed[:, 1]
z = transformed[:, 2]
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')

counter = 0
for item in key1:
    if item:
        ax.scatter(transformed[counter, 0], transformed[counter, 1], transformed[counter, 2], c='blue', label='Cancer')
    else:
        ax.scatter(transformed[counter, 0], transformed[counter, 1], transformed[counter, 2], c='red', label='Non-Cancer') 
    counter+=1

for item in key2:
    if item:
        ax.scatter(transformed[counter, 0], transformed[counter, 1], transformed[counter, 2], c='purple', label='Cancer')
    else:
        ax.scatter(transformed[counter, 0], transformed[counter, 1], transformed[counter, 2], c='orange', label='Non-Cancer') 
    counter+=1

ax.set_xlabel('pc0')
ax.set_ylabel('pc1')
ax.set_zlabel('pc2')
plt.title('calibrations '+str(VAR_calibration1) + ', ' + str(VAR_calibration2))
plt.show()