# Experiment 5.0 - Inception Resnet v2 - extract Features

Reproduce Results of [Transfer learning with deep convolutional neural network for liver steatosis assessment in ultrasound images](https://pubmed.ncbi.nlm.nih.gov/30094778/). We used a pre-trained CNN to extract features based on B-mode images. 

The CNNfeatures are extracted using the pretrained Inception-Resnet-v2 implemented in Keras.
See reference: https://jkjung-avt.github.io/keras-inceptionresnetv2/

![Screen Shot 2020-10-12 at 2 57 22 PM](https://user-images.githubusercontent.com/23482039/95781182-47437700-0c9b-11eb-8826-594811ba3322.png)


In [29]:
import sys
import random
sys.path.append('../src')

import warnings
warnings.filterwarnings("ignore") 

import pickle
import pandas as pd
import numpy as np
import mlflow
import matplotlib.pyplot as plt
import tensorflow as tf
from tqdm import tqdm

from utils.compute_metrics import get_metrics, get_majority_vote,log_test_metrics
from utils.dataframe_creation import create_dataframe_preproccessing
from sklearn.preprocessing import StandardScaler
from tensorflow.python.keras.applications.inception_resnet_v2 import InceptionResNetV2, preprocess_input
from sklearn.svm import LinearSVC
from sklearn.svm import SVC
from sklearn.model_selection import GroupKFold
from tqdm import tqdm
from pprint import pprint
from itertools import product
from tensorflow import keras
from keras.preprocessing.image import ImageDataGenerator


  and should_run_async(code)


## 1. Feature Extraction

In [32]:
M, N= 434, 636 # ultrasound image dimension
#In the paper, the authors extract the features directly from the pretrained inception resnet_v2
# pooling: Optional pooling mode for feature extraction when include_top is False.
#'max' means that global max pooling will be applied.
net = InceptionResNetV2(include_top=False,
                        weights='imagenet',
                        input_tensor=None,
                        input_shape=(M,N,3),
                        pooling= 'max')
net.trainable = False
dataset = create_dataframe_preproccessing()
dataset.head()

Unnamed: 0,id,labels,fat,fname
0,1,0,3,../data/01_raw/raw_images/P1_image1.jpg
1,1,0,3,../data/01_raw/raw_images/P1_image2.jpg
2,1,0,3,../data/01_raw/raw_images/P1_image3.jpg
3,1,0,3,../data/01_raw/raw_images/P1_image4.jpg
4,1,0,3,../data/01_raw/raw_images/P1_image5.jpg


In [43]:
dataset = dataset.astype({"labels": str})
datagen = ImageDataGenerator(rescale=1./255)
generator = datagen.flow_from_dataframe(dataset, x_col='fname',y_col='labels',target_size=(M,N),shuffle=False, batch_size =25)

Found 550 validated image filenames belonging to 2 classes.


In [44]:
max_pool_dim = 1536
features = np.zeros(shape=(len(dataset), max_pool_dim))
batch_size = 25
assert len(dataset)%batch_size ==0, '550 should be divisable by batch_SIZE'

for i, (inputs_batch, labels_batch) in enumerate(tqdm(generator)):
    features[i*batch_size:(i+1)*batch_size] = net(inputs_batch)
    if (i+1)*batch_size == 550:
        break

 95%|█████████▌| 21/22 [05:43<00:16, 16.37s/it]


In [None]:
features.shape

## 2. Save features

In [62]:
df_features = pd.DataFrame(features)
df_features= pd.concat([dataset['id'], dataset['labels'], df_features], axis=1)
df_features[["labels"]] = df_features[["labels"]].apply(pd.to_numeric)
df_features.head()

Unnamed: 0,id,labels,0,1,2,3,4,5,6,7,...,1526,1527,1528,1529,1530,1531,1532,1533,1534,1535
0,1,0,2.308496,1.377344,2.677886,0.873284,0.765629,1.705909,3.408358,1.842248,...,0.391646,3.605376,1.503199,1.748474,3.826392,1.899497,0.761513,1.708645,2.588735,2.746312
1,1,0,2.256733,1.818705,2.325612,0.761692,0.849174,1.940296,3.300263,1.809696,...,0.455811,4.099022,2.056846,2.406654,4.398932,1.945405,0.641796,1.523787,2.445662,2.617398
2,1,0,2.460423,2.025961,2.874427,0.729016,0.773342,1.886959,3.529279,1.932057,...,0.667012,4.026766,1.547058,2.230511,4.60905,1.914829,0.638776,1.566685,2.25143,3.049433
3,1,0,2.392491,1.863037,2.225823,0.884124,1.109694,2.026623,3.00141,1.578742,...,0.462886,4.100451,1.464487,2.062542,4.497901,1.899501,0.814888,1.404126,1.700386,2.840468
4,1,0,1.743439,1.70739,2.137489,0.666928,0.654343,2.059746,3.975016,1.743472,...,0.426692,3.789214,1.363746,2.309665,5.107124,1.96231,0.688884,1.454412,1.855297,2.486797


In [64]:
# Create a dict for the scattering features, and parameters
inception_dict_tensor = {
                'features':df_features,
                'label' : dataset['labels'],
                'pid' : dataset['id'],}

with open('../data/03_features/inception_dict_tensor.pickle', 'wb') as handle:
    pickle.dump(inception_dict_tensor, handle, protocol=pickle.HIGHEST_PROTOCOL)