# Image Features Exploration
#### Authors: Shanduojiao Jiang (sj99@stanford.edu), Kelly Brennan (kbrenn@stanford.edu)

In [1]:
import sys
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn import metrics
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, recall_score, precision_score

In [2]:
# Section 1: Data Processing

In [3]:
# Read in the clinical data
dfClinic = pd.read_excel("../data/Clinical_and_Other_Features.xlsx", sheet_name="Data", header=1, skiprows=[2])
# dfClinic[:5]

In [4]:
# Read in Image Features file
imageFeat = pd.read_excel("../data/Imaging_Features.xlsx")
imageFeat[:5]

Unnamed: 0,Patient ID,"F1_DT_POSTCON (T11=0.05,T12=0.5)","F1_DT_POSTCON (T11=0.05,T12=0.1)","F1_DT_POSTCON (T11=0.02,T12=0.5)","F1_DT_POSTCON (T11=0.02,T12=0.8)","F1_DT_POSTCON (T11=0.05,T12=0.8)","F1_DT_POSTCON (T11=0.1,T12=0.5)","F1_DT_POSTCON (T11=0.1,T12=0.8)","F1_DT_POSTCON (T11=0.2,T12=0.5)","F1_DT_POSTCON (T11=0.2,T12=0.8)",...,WashinRate_map_difference_entropy_tissue_PostCon,WashinRate_map_information_measure_correlation1_tissue_PostCon,WashinRate_map_information_measure_correlation2_tissue_PostCon,WashinRate_map_inverse_difference_is_homom_tissue_PostCon,WashinRate_map_inverse_difference_normalized_tissue_PostCon,WashinRate_map_inverse_difference_moment_normalized_tissue_PostCon,WashinRate_map_mean_tissue_PostCon,WashinRate_map_std_dev_tissue_PostCon,WashinRate_map_skewness_tissue_PostCon,WashinRate_map_kurtosis_tissue_PostCon
0,Breast_MRI_001,1.0,0.120721,0.530395,1.0,1.0,1.0,1.0,1.0,1.0,...,3.380663,-0.025575,0.422391,0.171959,0.960359,0.996829,14.517894,20.347506,1.62587,11.406955
1,Breast_MRI_002,1.0,0.129546,0.485217,1.0,1.0,1.0,1.0,1.0,1.0,...,3.444474,-0.036063,0.505652,0.177087,0.959067,0.996363,47.29795,83.909561,0.251498,5.659428
2,Breast_MRI_003,0.174775,0.062051,0.06991,0.132265,0.330662,0.34955,0.661324,0.6991,1.0,...,3.478455,-0.04373,0.546674,0.170507,0.957527,0.995981,114.171582,129.252343,1.928743,11.554948
3,Breast_MRI_004,0.086546,0.045111,0.034619,0.051265,0.128162,0.173093,0.256325,0.346185,0.51265,...,3.389678,-0.017802,0.363818,0.17721,0.960705,0.996827,33.499175,69.164227,1.171314,8.493319
4,Breast_MRI_005,0.289669,0.052031,0.115868,0.378575,0.839984,0.579338,1.0,0.958287,1.0,...,4.009938,-0.049294,0.603426,0.117966,0.930624,0.989135,34.406635,26.951415,0.985464,4.331451


In [5]:
# List of columns
imageFeat.columns

Index(['Patient ID', 'F1_DT_POSTCON (T11=0.05,T12=0.5)',
       'F1_DT_POSTCON (T11=0.05,T12=0.1)', 'F1_DT_POSTCON (T11=0.02,T12=0.5)',
       'F1_DT_POSTCON (T11=0.02,T12=0.8)', 'F1_DT_POSTCON (T11=0.05,T12=0.8)',
       'F1_DT_POSTCON (T11=0.1,T12=0.5)', 'F1_DT_POSTCON (T11=0.1,T12=0.8)',
       'F1_DT_POSTCON (T11=0.2,T12=0.5)', 'F1_DT_POSTCON (T11=0.2,T12=0.8)',
       ...
       'WashinRate_map_difference_entropy_tissue_PostCon',
       'WashinRate_map_information_measure_correlation1_tissue_PostCon',
       'WashinRate_map_information_measure_correlation2_tissue_PostCon',
       'WashinRate_map_inverse_difference_is_homom_tissue_PostCon',
       'WashinRate_map_inverse_difference_normalized_tissue_PostCon',
       'WashinRate_map_inverse_difference_moment_normalized_tissue_PostCon',
       'WashinRate_map_mean_tissue_PostCon',
       'WashinRate_map_std_dev_tissue_PostCon',
       'WashinRate_map_skewness_tissue_PostCon',
       'WashinRate_map_kurtosis_tissue_PostCon'],
      dt

#### Generating binary labels for recurrence, readmission, and death

In [6]:
local_recur = dfClinic["Days to local recurrence (from the date of diagnosis) "].tolist()
local_recur_label = [0 if r == "NP" else 1 for r in local_recur]

In [7]:
dist_recur = dfClinic['Days to distant recurrence(from the date of diagnosis) '].to_list()
dist_recur_label = [0 if r == "NP" else 1 for r in dist_recur]

In [8]:
death = dfClinic['Days to death (from the date of diagnosis) '].to_list()
death_label = [0 if r == "NP" else 1 for r in death]

In [9]:
labels = pd.DataFrame()

labels["local_recur_label"] = local_recur_label
labels["dist_recur_label"] = dist_recur_label
labels["death_label"] = death_label

# Combining local and distance recurrence
labels["recurrence"] = labels["local_recur_label"] + labels["dist_recur_label"]
labels.loc[labels["recurrence"] == 2] = 1
print(min(labels["recurrence"]), max(labels["recurrence"]))

0 1


Investigating the distribution of the labels and our data

In [10]:
N = len(imageFeat)
print(N)
print(np.sum(labels["local_recur_label"]))
print(np.sum(labels["dist_recur_label"]))
print(np.sum(labels["death_label"]))
print(imageFeat.shape)

922
16
76
62
(922, 530)


Shaping the Data into data(x) and labels(Y)

In [11]:
data = imageFeat.loc[:, imageFeat.columns != "Patient ID"]
data.head()
X = tf.convert_to_tensor(data)
Y = tf.convert_to_tensor(labels["recurrence"])

2022-12-05 21:45:51.294556: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-12-05 21:45:51.404074: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-12-05 21:45:51.405861: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-12-05 21:45:51.409687: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compil

# Section 3: Exploring a model

Model setup

In [25]:
model = tf.keras.applications.resnet50.ResNet50()


ValueError: When setting `include_top=True` and loading `imagenet` weights, `input_shape` should be (224, 224, 3).

In [19]:
model.compile(
    optimizer = tf.keras.optimizers.Adam(), 
    loss = tf.keras.losses.BinaryCrossentropy(), 
    metrics=["accuracy"]
)

In [20]:
model.fit(
    X, 
    Y,
    epochs = 10
)

Epoch 1/10


2022-12-05 21:59:52.744570: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


ValueError: in user code:

    /opt/conda/lib/python3.7/site-packages/keras/engine/training.py:853 train_function  *
        return step_function(self, iterator)
    /opt/conda/lib/python3.7/site-packages/keras/engine/training.py:842 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    /opt/conda/lib/python3.7/site-packages/tensorflow/python/distribute/distribute_lib.py:1286 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    /opt/conda/lib/python3.7/site-packages/tensorflow/python/distribute/distribute_lib.py:2849 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    /opt/conda/lib/python3.7/site-packages/tensorflow/python/distribute/distribute_lib.py:3632 _call_for_each_replica
        return fn(*args, **kwargs)
    /opt/conda/lib/python3.7/site-packages/keras/engine/training.py:835 run_step  **
        outputs = model.train_step(data)
    /opt/conda/lib/python3.7/site-packages/keras/engine/training.py:787 train_step
        y_pred = self(x, training=True)
    /opt/conda/lib/python3.7/site-packages/keras/engine/base_layer.py:1020 __call__
        input_spec.assert_input_compatibility(self.input_spec, inputs, self.name)
    /opt/conda/lib/python3.7/site-packages/keras/engine/input_spec.py:269 assert_input_compatibility
        ', found shape=' + display_shape(x.shape))

    ValueError: Input 0 is incompatible with layer resnet50: expected shape=(None, 224, 224, 3), found shape=(None, 529)
