## Problem 6.12

In [257]:
import tensorflow as tf
import numpy as np
import PIL
import cv2
import os
import sklearn
import pandas as pd
import pickle
import platform
from tqdm.notebook import tqdm
from sklearn import preprocessing
from sklearn import svm
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from scipy import stats as st

### Computational Environment

In [96]:
physical_devices = tf.config.list_physical_devices('GPU')
my_system = platform.uname()
print(physical_devices)
print(f"System: {my_system.system}")
print(f"Node Name: {my_system.node}")
print(f"Release: {my_system.release}")
print(f"Version: {my_system.version}")
print(f"Machine: {my_system.machine}")
print(f"Processor: {my_system.processor}")

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
System: Darwin
Node Name: client-10-228-18-202.tamulink.tamu.edu
Release: 21.5.0
Version: Darwin Kernel Version 21.5.0: Tue Apr 26 21:08:29 PDT 2022; root:xnu-8020.121.3~4/RELEASE_ARM64_T8101
Machine: arm64
Processor: i386


### Helper function

In [3]:
def load_image(path, width=484, preprocess_input=tf.keras.applications.vgg16.preprocess_input):
    """
    Load and Preprocessing image
    """
    img = tf.keras.utils.load_img(path)
    x = tf.keras.utils.img_to_array(img)
    x = x[0:width,:,:]
    x = np.expand_dims(x, axis=0)
    return tf.keras.applications.vgg16.preprocess_input(x)

### Data inspectation

In [4]:
dpath = os.path.join("data", "CMU-UHCS_Dataset")
pic_path = os.path.join(dpath, "images")
df_micro = pd.read_csv( os.path.join(dpath, "micrograph.csv"))
df_micro = df_micro[["path", "primary_microconstituent"]]

for i in range(0, len(df_micro)):
    img_ph = os.path.join(pic_path,df_micro.iloc[i][0])
    assert os.path.exists(img_ph)
    df_micro.iloc[i][0] = img_ph

CLS_rm = ["pearlite+widmanstatten", "martensite", "pearlite+spheroidite"] #(type, sample size)

In [5]:
for c in CLS_rm:
    df_micro.drop(df_micro[df_micro["primary_microconstituent"] == c].index, inplace=True)

In [6]:
# labels
name_lbs = df_micro["primary_microconstituent"].unique()
le = preprocessing.LabelEncoder()
le.fit(name_lbs)
list(le.classes_)

['network', 'pearlite', 'spheroidite', 'spheroidite+widmanstatten']

In [7]:
dlabel = le.transform(df_micro["primary_microconstituent"])
df_micro.insert(2, "label", dlabel)
df_micro

Unnamed: 0,path,primary_microconstituent,label
0,data/CMU-UHCS_Dataset/images/micrograph1.tif,pearlite,1
1,data/CMU-UHCS_Dataset/images/micrograph2.tif,spheroidite,2
3,data/CMU-UHCS_Dataset/images/micrograph5.tif,pearlite,1
4,data/CMU-UHCS_Dataset/images/micrograph6.tif,spheroidite,2
5,data/CMU-UHCS_Dataset/images/micrograph7.tif,spheroidite+widmanstatten,3
...,...,...,...
955,data/CMU-UHCS_Dataset/images/micrograph1722.tif,spheroidite,2
957,data/CMU-UHCS_Dataset/images/micrograph1726.tif,spheroidite+widmanstatten,3
958,data/CMU-UHCS_Dataset/images/micrograph1730.png,spheroidite,2
959,data/CMU-UHCS_Dataset/images/micrograph1731.tif,pearlite,1


### Data Processing

In [8]:
# Train-test split
df_test = df_micro.copy()
df_train = pd.DataFrame(columns = df_micro.keys())

split_info = [("spheroidite", 100),\
              ("network", 100),\
              ("pearlite", 100),\
              ("spheroidite+widmanstatten", 60)] #(type, sample size)



for ln in split_info:
    label, n = ln
    id_train = df_micro[df_micro["primary_microconstituent"] == label][0:n].index
    df_test.drop(id_train, axis=0, inplace=True)
    df_train = pd.concat([df_train, df_micro.loc[id_train]])

In [9]:
df_train

Unnamed: 0,path,primary_microconstituent,label
1,data/CMU-UHCS_Dataset/images/micrograph2.tif,spheroidite,2
4,data/CMU-UHCS_Dataset/images/micrograph6.tif,spheroidite,2
8,data/CMU-UHCS_Dataset/images/micrograph10.png,spheroidite,2
9,data/CMU-UHCS_Dataset/images/micrograph11.tif,spheroidite,2
20,data/CMU-UHCS_Dataset/images/micrograph29.tif,spheroidite,2
...,...,...,...
596,data/CMU-UHCS_Dataset/images/micrograph1093.tif,spheroidite+widmanstatten,3
618,data/CMU-UHCS_Dataset/images/micrograph1129.tif,spheroidite+widmanstatten,3
631,data/CMU-UHCS_Dataset/images/micrograph1156.tif,spheroidite+widmanstatten,3
672,data/CMU-UHCS_Dataset/images/micrograph1218.tif,spheroidite+widmanstatten,3


In [10]:
df_test

Unnamed: 0,path,primary_microconstituent,label
237,data/CMU-UHCS_Dataset/images/micrograph436.png,spheroidite,2
238,data/CMU-UHCS_Dataset/images/micrograph437.tif,spheroidite,2
239,data/CMU-UHCS_Dataset/images/micrograph440.png,spheroidite,2
241,data/CMU-UHCS_Dataset/images/micrograph442.tif,spheroidite,2
242,data/CMU-UHCS_Dataset/images/micrograph443.tif,spheroidite,2
...,...,...,...
955,data/CMU-UHCS_Dataset/images/micrograph1722.tif,spheroidite,2
957,data/CMU-UHCS_Dataset/images/micrograph1726.tif,spheroidite+widmanstatten,3
958,data/CMU-UHCS_Dataset/images/micrograph1730.png,spheroidite,2
959,data/CMU-UHCS_Dataset/images/micrograph1731.tif,pearlite,1


### Feature Extraction

In [11]:
# VGG16

base_model = tf.keras.applications.vgg16.VGG16(
    include_top=False,
    weights='imagenet',
    input_tensor=None,
    input_shape=None,
    pooling=None,
    classes=1000,
    classifier_activation='softmax'
)

base_model.summary()

Model: "vgg16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, None, None, 3)]   0         
                                                                 
 block1_conv1 (Conv2D)       (None, None, None, 64)    1792      
                                                                 
 block1_conv2 (Conv2D)       (None, None, None, 64)    36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, None, None, 64)    0         
                                                                 
 block2_conv1 (Conv2D)       (None, None, None, 128)   73856     
                                                                 
 block2_conv2 (Conv2D)       (None, None, None, 128)   147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, None, None, 128)   0     

Use five layers

In [12]:
out_layer_ns = ["block{}_pool".format(i) for i in range(1,6)]
out_layer_ns

['block1_pool', 'block2_pool', 'block3_pool', 'block4_pool', 'block5_pool']

In [13]:
# Construct 5 models for feature extraction
extmodel = dict(zip(out_layer_ns, [tf.keras.Model(
    inputs= base_model.input,
    outputs=base_model.get_layer(bk_name).output
) for bk_name in out_layer_ns]))

extmodel

{'block1_pool': <keras.engine.functional.Functional at 0x29f411e20>,
 'block2_pool': <keras.engine.functional.Functional at 0x2af72ecd0>,
 'block3_pool': <keras.engine.functional.Functional at 0x2b06b63d0>,
 'block4_pool': <keras.engine.functional.Functional at 0x2b06be5b0>,
 'block5_pool': <keras.engine.functional.Functional at 0x2b06bedf0>}

In [14]:
# Display output dimensions
out_shapes = [extmodel[m].output_shape[-1] for m in extmodel.keys()]
out_shapes

[64, 128, 256, 512, 512]

In [67]:
# Initiate feature maps for testing and training
fs_train = [np.zeros((df_train.shape[0], n_f)) for n_f in out_shapes]
fs_test = [np.zeros((df_test.shape[0], n_f)) for n_f in out_shapes]

features_train = dict(zip(out_layer_ns, fs_train))
features_test = dict(zip(out_layer_ns, fs_test))

features_train

{'block1_pool': array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]]),
 'block2_pool': array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]]),
 'block3_pool': array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]]),
 'block4_pool': array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ...

In [68]:
# Feature extraction with VGG16
for m in tqdm(extmodel.keys()):
    for i, df in enumerate([df_train, df_test]):
        for j, ph in tqdm(enumerate(df["path"])):
            x = load_image(ph)
            xb = extmodel[m].predict(x, verbose = 0) # silence output
            F = np.mean(xb,axis=(0,1,2))
            # Save features
            if i ==0:
                features_train[m][j, :] = F
            else:
                features_test[m][j, :] = F
            
            

  0%|          | 0/5 [00:00<?, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

In [76]:
# save extracted features
paths =  dict(zip(["train", "test"],\
        [os.path.join(dpath, "feature_{}.pkl".format(n))\
         for n in ["train", "test"]]))

In [77]:
## Create new files
f_train = open(paths["train"], "wb")
f_test = open(paths["test"], "wb")
## Write
pickle.dump(features_train, f_train)
pickle.dump(features_test, f_test)
## Close files
f_train.close()
f_test.close()

### SVM

In [131]:
# load data
ftn = open(paths["train"], "rb")
ftt = open(paths["test"], "rb")
featn = pickle.load(ftn) # train feature
featt = pickle.load(ftt) # test feature
ftn.close()
ftt.close()

# label
ltrain = df_train[["primary_microconstituent", "label"]].reset_index()
ltest = df_test[["primary_microconstituent", "label"]].reset_index()

In [132]:
ltrain

Unnamed: 0,index,primary_microconstituent,label
0,1,spheroidite,2
1,4,spheroidite,2
2,8,spheroidite,2
3,9,spheroidite,2
4,20,spheroidite,2
...,...,...,...
355,596,spheroidite+widmanstatten,3
356,618,spheroidite+widmanstatten,3
357,631,spheroidite+widmanstatten,3
358,672,spheroidite+widmanstatten,3


In [233]:
ltest["label"].to_numpy()

array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 2,
       0, 2, 2, 0, 2, 0, 2, 0, 2, 2, 2, 2, 2, 2, 0, 2, 2, 0, 2, 2, 2, 0,
       0, 0, 2, 2, 0, 2, 2, 0, 2, 0, 2, 2, 0, 0, 0, 0, 2, 0, 0, 2, 2, 2,
       2, 2, 0, 0, 2, 2, 0, 2, 2, 2, 2, 0, 2, 2, 0, 0, 2, 2, 2, 2, 0, 2,
       0, 2, 0, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 0,
       0, 2, 2, 2, 2, 0, 0, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2,
       2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 0, 0,
       2, 0, 2, 2, 0, 2, 0, 2, 2, 3, 0, 2, 2, 0, 3, 2, 2, 0, 0, 2, 0, 2,
       0, 2, 2, 2, 2, 0, 2, 3, 0, 2, 0, 2, 0, 3, 2, 0, 2, 0, 2, 2, 2, 3,
       2, 2, 0, 0, 2, 2, 0, 2, 2, 2, 3, 2, 0, 2, 0, 2, 2, 2, 2, 2, 2, 0,
       0, 2, 0, 2, 0, 0, 2, 0, 3, 0, 2, 2, 2, 2, 3,

In [88]:
featn["block1_pool"].shape

(360, 64)

In [89]:
y = df_train["label"].to_numpy().astype(int)
y.shape

(360,)

In [93]:
clf = svm.SVC(kernel="rbf", C=1., gamma="auto")
clf.fit(featn["block1_pool"], y)

In [92]:
clf.predict(featt["block1_pool"])

array([2, 3, 2, 1, 2, 1, 2, 3, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2,
       2, 3, 0, 2, 2, 2, 2, 0, 2, 2, 2, 3, 2, 2, 1, 2, 1, 2, 3, 2, 3, 2,
       2, 2, 3, 2, 3, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 0,
       3, 2, 2, 0, 1, 1, 1, 2, 3, 2, 0, 0, 3, 0, 2, 2, 0, 0, 0, 0, 2, 2,
       3, 2, 1, 3, 2, 2, 0, 0, 2, 1, 2, 2, 2, 3, 0, 2, 2, 0, 3, 2, 2, 2,
       3, 3, 2, 2, 0, 2, 2, 3, 2, 2, 2, 2, 2, 0, 0, 3, 0, 3, 0, 2, 2, 0,
       2, 1, 2, 0, 2, 2, 3, 2, 1, 2, 2, 0, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2,
       0, 2, 0, 0, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 2, 0, 2, 1, 2, 2, 2, 2,
       0, 2, 2, 1, 2, 2, 0, 2, 2, 2, 0, 0, 3, 2, 2, 2, 2, 0, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 1, 3, 2, 2, 2, 2, 2, 1, 2, 0,
       2, 2, 3, 2, 0, 0, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 0, 2, 2, 0, 2,
       0, 0, 3, 2, 2, 0, 2, 3, 2, 2, 0, 2, 0, 3, 2, 2, 2, 2, 1, 3, 3, 0,
       1, 2, 2, 0, 0, 2, 0, 2, 3, 2, 2, 2, 0, 2, 0, 2, 2, 1, 2, 2, 2, 0,
       0, 2, 2, 2, 3, 2, 2, 0, 2, 0, 2, 2, 3, 0, 3,

In [264]:
class One2OneSVM:
    def __init__(self, n_class=4):
        self.n_class = n_class
        self.clfs = [[svm.SVC(kernel="rbf", C=1., gamma="auto")\
                     for i in range(0,self.n_class)]\
                     for j in range(0,self.n_class)]
    def train(self, ltrain, feature):
        # traversal all features
        for i in range(0, self.n_class-1):
            lis = ltrain[ltrain["label"] == i].index.to_numpy()
            for j in range(i+1, self.n_class):
                ljs = ltrain[ltrain["label"] == j].index.to_numpy()
                # Data
                X = np.concatenate(\
                  (feature[lis,:],\
                   feature[ljs,:]), axis=0)
                Y = np.concatenate((np.ones(len(lis))*i,np.ones(len(ljs))*j))
                # Train SVM
                self.clfs[i][j].fit(X,Y)    
    def test(self, feature):
        predM = np.zeros(( int(self.n_class * (self.n_class -1)/2) , feature.shape[0]))
        c = 0
        for i in range(0, self.n_class-1):
            for j in range(i+1, self.n_class):
                predM[c,:] = self.clfs[i][j].predict(feature)
                c += 1
        return st.mode(predM, axis=0, keepdims=True).mode

In [265]:
ltrain["label"].to_numpy()

array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3,

In [271]:
a = One2OneSVM()
a.train(ltrain, features_train["block5_pool"])
pd = a.test(features_test["block5_pool"])
pd

array([[2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.,
        2., 2., 2., 2., 2., 2., 2., 3., 2., 2., 2., 3., 2., 2., 2., 2.,
        2., 2., 2., 2., 2., 2., 2., 2., 3., 2., 2., 2., 2., 2., 2., 2.,
        2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.,
        2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 0., 0., 2., 0.,
        2., 0., 0., 2., 0., 2., 0., 2., 0., 2., 2., 0., 2., 2., 2., 0.,
        2., 2., 2., 2., 2., 2., 0., 2., 2., 0., 3., 2., 2., 0., 0., 0.,
        2., 2., 0., 2., 2., 0., 2., 0., 2., 2., 0., 0., 2., 0., 2., 0.,
        0., 2., 2., 2., 2., 2., 0., 0., 2., 2., 0., 2., 2., 2., 2., 0.,
        3., 2., 0., 0., 2., 2., 2., 2., 0., 2., 0., 2., 0., 2., 2., 2.,
        2., 2., 2., 0., 0., 0., 2., 0., 0., 2., 2., 2., 2., 2., 2., 0.,
        0., 2., 2., 2., 2., 0., 0., 2., 2., 2., 2., 0., 2., 2., 2., 2.,
        2., 0., 2., 2., 2., 2., 2., 2., 0., 2., 2., 2., 2., 2., 2., 2.,
        2., 2., 2., 0., 2., 2., 2., 2., 2., 0., 0., 0., 2., 0., 

In [267]:
ltrain["label"].to_numpy() - pd

array([[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
        0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
        0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
        0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
        0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
        0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
        0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
        0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
        0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
        0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
        0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
        0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
        0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
        0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0

In [125]:
d.reset_index()

Unnamed: 0,level_0,index,path,primary_microconstituent,label
0,0,1,data/CMU-UHCS_Dataset/images/micrograph2.tif,spheroidite,2
1,1,4,data/CMU-UHCS_Dataset/images/micrograph6.tif,spheroidite,2
2,2,8,data/CMU-UHCS_Dataset/images/micrograph10.png,spheroidite,2
3,3,9,data/CMU-UHCS_Dataset/images/micrograph11.tif,spheroidite,2
4,4,20,data/CMU-UHCS_Dataset/images/micrograph29.tif,spheroidite,2
...,...,...,...,...,...
355,355,596,data/CMU-UHCS_Dataset/images/micrograph1093.tif,spheroidite+widmanstatten,3
356,356,618,data/CMU-UHCS_Dataset/images/micrograph1129.tif,spheroidite+widmanstatten,3
357,357,631,data/CMU-UHCS_Dataset/images/micrograph1156.tif,spheroidite+widmanstatten,3
358,358,672,data/CMU-UHCS_Dataset/images/micrograph1218.tif,spheroidite+widmanstatten,3


In [152]:
a = np.array([1,2,3])
np.concatenate((a,a))

array([1, 2, 3, 1, 2, 3])

In [114]:
features_train["block1_pool"].shape

(360, 64)

In [104]:
for i in range(0, 5-1):
    lis = ltrain[ltrain["label"] == i].index.to_numpy()
    for j in range(i+1, 5):
        ljs = ltrain[ltrain["label"] == j].index.to_numpy()
        

0 1
0 2
0 3
0 4
1 2
1 3
1 4
2 3
2 4
3 4


In [138]:
ltrain[ltrain["label"] == 2].index.to_numpy()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
       51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67,
       68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
       85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99])

In [150]:
np.concatenate((features_train["block1_pool"][ltrain[ltrain["label"] == 3].index.to_numpy(), :],\
               features_train["block1_pool"][ltrain[ltrain["label"] == 3].index.to_numpy(), :]), axis=0).shape

(120, 64)

In [142]:
features_train

{'block1_pool': array([[452.22186279, 236.91465759, 160.32466125, ..., 128.34408569,
         391.23461914, 341.81277466],
        [238.73849487, 113.43798828, 140.67536926, ...,  48.859375  ,
         177.37490845, 144.7532959 ],
        [260.73648071, 238.5995636 , 145.67814636, ...,  80.27812195,
         262.85009766, 186.44360352],
        ...,
        [588.95788574, 246.63072205, 132.81652832, ..., 156.07931519,
         530.91351318, 373.74145508],
        [559.06378174, 297.08312988, 159.35540771, ..., 168.19374084,
         523.49584961, 391.05258179],
        [244.14228821, 140.29975891, 155.90449524, ...,  55.92124939,
         205.18037415, 167.65242004]]),
 'block2_pool': array([[ 358.43338013,  633.6449585 ,  634.77783203, ..., 1495.25610352,
         1332.69543457,  529.44287109],
        [ 195.20909119,  346.07540894,  379.4145813 , ...,  926.10565186,
          788.3324585 ,  389.27670288],
        [ 318.48815918,  441.97177124,  378.34817505, ..., 1833.49255371,
     