## Problem 6.12

In [51]:
import tensorflow as tf
import numpy as np
import PIL
import cv2
import os
import sklearn
import pandas as pd
import pickle
from tqdm.notebook import tqdm
from sklearn import preprocessing
from sklearn import svm
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

### Computational Environment

In [2]:
physical_devices = tf.config.list_physical_devices('GPU')
print(physical_devices)

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


### Helper function

In [3]:
def load_image(path, width=484, preprocess_input=tf.keras.applications.vgg16.preprocess_input):
    """
    Load and Preprocessing image
    """
    img = tf.keras.utils.load_img(path)
    x = tf.keras.utils.img_to_array(img)
    x = x[0:width,:,:]
    x = np.expand_dims(x, axis=0)
    return tf.keras.applications.vgg16.preprocess_input(x)

### Data inspectation

In [4]:
dpath = os.path.join("data", "CMU-UHCS_Dataset")
pic_path = os.path.join(dpath, "images")
df_micro = pd.read_csv( os.path.join(dpath, "micrograph.csv"))
df_micro = df_micro[["path", "primary_microconstituent"]]

for i in range(0, len(df_micro)):
    img_ph = os.path.join(pic_path,df_micro.iloc[i][0])
    assert os.path.exists(img_ph)
    df_micro.iloc[i][0] = img_ph

CLS_rm = ["pearlite+widmanstatten", "martensite", "pearlite+spheroidite"] #(type, sample size)

In [5]:
for c in CLS_rm:
    df_micro.drop(df_micro[df_micro["primary_microconstituent"] == c].index, inplace=True)

In [6]:
# labels
name_lbs = df_micro["primary_microconstituent"].unique()
le = preprocessing.LabelEncoder()
le.fit(name_lbs)
list(le.classes_)

['network', 'pearlite', 'spheroidite', 'spheroidite+widmanstatten']

In [7]:
dlabel = le.transform(df_micro["primary_microconstituent"])
df_micro.insert(2, "label", dlabel)
df_micro

Unnamed: 0,path,primary_microconstituent,label
0,data/CMU-UHCS_Dataset/images/micrograph1.tif,pearlite,1
1,data/CMU-UHCS_Dataset/images/micrograph2.tif,spheroidite,2
3,data/CMU-UHCS_Dataset/images/micrograph5.tif,pearlite,1
4,data/CMU-UHCS_Dataset/images/micrograph6.tif,spheroidite,2
5,data/CMU-UHCS_Dataset/images/micrograph7.tif,spheroidite+widmanstatten,3
...,...,...,...
955,data/CMU-UHCS_Dataset/images/micrograph1722.tif,spheroidite,2
957,data/CMU-UHCS_Dataset/images/micrograph1726.tif,spheroidite+widmanstatten,3
958,data/CMU-UHCS_Dataset/images/micrograph1730.png,spheroidite,2
959,data/CMU-UHCS_Dataset/images/micrograph1731.tif,pearlite,1


### Data Processing

In [8]:
# Train-test split
df_test = df_micro.copy()
df_train = pd.DataFrame(columns = df_micro.keys())

split_info = [("spheroidite", 100),\
              ("network", 100),\
              ("pearlite", 100),\
              ("spheroidite+widmanstatten", 60)] #(type, sample size)



for ln in split_info:
    label, n = ln
    id_train = df_micro[df_micro["primary_microconstituent"] == label][0:n].index
    df_test.drop(id_train, axis=0, inplace=True)
    df_train = pd.concat([df_train, df_micro.loc[id_train]])

In [9]:
df_train

Unnamed: 0,path,primary_microconstituent,label
1,data/CMU-UHCS_Dataset/images/micrograph2.tif,spheroidite,2
4,data/CMU-UHCS_Dataset/images/micrograph6.tif,spheroidite,2
8,data/CMU-UHCS_Dataset/images/micrograph10.png,spheroidite,2
9,data/CMU-UHCS_Dataset/images/micrograph11.tif,spheroidite,2
20,data/CMU-UHCS_Dataset/images/micrograph29.tif,spheroidite,2
...,...,...,...
596,data/CMU-UHCS_Dataset/images/micrograph1093.tif,spheroidite+widmanstatten,3
618,data/CMU-UHCS_Dataset/images/micrograph1129.tif,spheroidite+widmanstatten,3
631,data/CMU-UHCS_Dataset/images/micrograph1156.tif,spheroidite+widmanstatten,3
672,data/CMU-UHCS_Dataset/images/micrograph1218.tif,spheroidite+widmanstatten,3


In [10]:
df_test

Unnamed: 0,path,primary_microconstituent,label
237,data/CMU-UHCS_Dataset/images/micrograph436.png,spheroidite,2
238,data/CMU-UHCS_Dataset/images/micrograph437.tif,spheroidite,2
239,data/CMU-UHCS_Dataset/images/micrograph440.png,spheroidite,2
241,data/CMU-UHCS_Dataset/images/micrograph442.tif,spheroidite,2
242,data/CMU-UHCS_Dataset/images/micrograph443.tif,spheroidite,2
...,...,...,...
955,data/CMU-UHCS_Dataset/images/micrograph1722.tif,spheroidite,2
957,data/CMU-UHCS_Dataset/images/micrograph1726.tif,spheroidite+widmanstatten,3
958,data/CMU-UHCS_Dataset/images/micrograph1730.png,spheroidite,2
959,data/CMU-UHCS_Dataset/images/micrograph1731.tif,pearlite,1


### Feature Extraction

In [11]:
# VGG16

base_model = tf.keras.applications.vgg16.VGG16(
    include_top=False,
    weights='imagenet',
    input_tensor=None,
    input_shape=None,
    pooling=None,
    classes=1000,
    classifier_activation='softmax'
)

base_model.summary()

Model: "vgg16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, None, None, 3)]   0         
                                                                 
 block1_conv1 (Conv2D)       (None, None, None, 64)    1792      
                                                                 
 block1_conv2 (Conv2D)       (None, None, None, 64)    36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, None, None, 64)    0         
                                                                 
 block2_conv1 (Conv2D)       (None, None, None, 128)   73856     
                                                                 
 block2_conv2 (Conv2D)       (None, None, None, 128)   147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, None, None, 128)   0     

Use five layers

In [12]:
out_layer_ns = ["block{}_pool".format(i) for i in range(1,6)]
out_layer_ns

['block1_pool', 'block2_pool', 'block3_pool', 'block4_pool', 'block5_pool']

In [13]:
extmodel = dict(zip(out_layer_ns, [tf.keras.Model(
    inputs= base_model.input,
    outputs=base_model.get_layer(bk_name).output
) for bk_name in out_layer_ns]))

extmodel

{'block1_pool': <keras.engine.functional.Functional at 0x29f411e20>,
 'block2_pool': <keras.engine.functional.Functional at 0x2af72ecd0>,
 'block3_pool': <keras.engine.functional.Functional at 0x2b06b63d0>,
 'block4_pool': <keras.engine.functional.Functional at 0x2b06be5b0>,
 'block5_pool': <keras.engine.functional.Functional at 0x2b06bedf0>}

In [14]:
out_shapes = [extmodel[m].output_shape[-1] for m in extmodel.keys()]
out_shapes

[64, 128, 256, 512, 512]

In [15]:
n_imgs = df_train.shape[0]
fs = [np.zeros((n_imgs, n_f)) for n_f in out_shapes]
features = dict(zip(out_layer_ns, fs))

features

{'block1_pool': array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]]),
 'block2_pool': array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]]),
 'block3_pool': array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]]),
 'block4_pool': array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ...

In [20]:
for m in tqdm(extmodel.keys()):
    for j, ph in tqdm(enumerate(df_train["path"])):
        x = load_image(ph)
        xb = extmodel[m].predict(x, verbose = 0) # silence output
        F = np.mean(xb,axis=(0,1,2))
        features[m][j, :] = F

  0%|          | 0/5 [00:00<?, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

In [34]:
# save extracted features
path_feature= os.path.join(dpath, "feature.pkl")
f = open(path_feature, "wb")
pickle.dump(features, f)
f.close()

### SVM

In [41]:
# load data
fo = open(path_feature, "rb")
feats = pickle.load(fo)

In [44]:
feats["block1_pool"].shape

(360, 64)

In [56]:
y = df_train["label"].to_numpy()
y.shape

(360,)

In [57]:
y

array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3,

In [59]:
feats["block1_pool"].shape

(360, 64)

In [53]:
clf = svm.SVC(kernel="rbf")
clf.fit(feats["block1_pool"], y)

ValueError: Unknown label type: 'unknown'