In [1]:
!pip install tensorflow



In [3]:
pip install --upgrade keras

Note: you may need to restart the kernel to use updated packages.


In [1]:
import pandas as pd
from keras.applications import VGG16
from keras.models import Model
from keras.preprocessing import image
from keras.applications.vgg16 import preprocess_input
import numpy as np
import os
from sklearn import svm
from sklearn.model_selection import cross_val_score

In [5]:

df = pd.read_csv('micrograph.csv')  
image_dir = 'images'


In [7]:
base_model = VGG16(weights='imagenet', include_top=False)
layer_features = []


In [9]:

layer = 'block1_pool'
layer_features = []  
layer_labels = []    
model = Model(inputs=base_model.input, outputs=base_model.get_layer(layer).output)

for index, row in df.iterrows():
    file_name = row['path']
    label = row['primary_microconstituent'] 
    
   
    img_path = os.path.join(image_dir, file_name)
    img = image.load_img(img_path)  
    x = image.img_to_array(img)      
    x = x[0:484, :, :]               
    x = np.expand_dims(x, axis=0)    
    x = preprocess_input(x)           

    xb = model.predict(x)
    F = np.mean(xb, axis=(0, 1, 2))
    
    layer_features.append(F)
    layer_labels.append(label)

block1_pool_features = np.array(layer_features)
block1_pool_labels = np.array(layer_labels) 

print(f"block1_pool feature set shape: {block1_pool_features.shape}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 123ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 89ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 86ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 92ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 96ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 95ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 89ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 84ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 84ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 84ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 87ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 94ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 93ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9

In [11]:

layer = 'block2_pool'
layer_features = []  
layer_labels = []   
model = Model(inputs=base_model.input, outputs=base_model.get_layer(layer).output)

for index, row in df.iterrows():
    file_name = row['path']
    label = row['primary_microconstituent'] 
    

    img_path = os.path.join(image_dir, file_name)
    img = image.load_img(img_path)
    x = image.img_to_array(img)
    x = x[0:484, :, :]
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)

    xb = model.predict(x)
    F = np.mean(xb, axis=(0, 1, 2))
    
    layer_features.append(F)
    layer_labels.append(label)

block2_pool_features = np.array(layer_features)
block2_pool_labels = np.array(layer_labels)  

print(f"block2_pool feature set shape: {block2_pool_features.shape}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 248ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 178ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 205ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 211ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 209ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 221ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 231ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 192ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 196ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 183ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 192ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 187ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 188ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

In [13]:

layer = 'block3_pool'
layer_features = []  
layer_labels = []  
model = Model(inputs=base_model.input, outputs=base_model.get_layer(layer).output)

for index, row in df.iterrows():
    file_name = row['path']
    label = row['primary_microconstituent']  
    

    img_path = os.path.join(image_dir, file_name)
    img = image.load_img(img_path)
    x = image.img_to_array(img)
    x = x[0:484, :, :]
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)

    # Extract features
    xb = model.predict(x)
    F = np.mean(xb, axis=(0, 1, 2))
    

    layer_features.append(F)
    layer_labels.append(label)


block3_pool_features = np.array(layer_features)
block3_pool_labels = np.array(layer_labels)  

print(f"block3_pool feature set shape: {block3_pool_features.shape}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 422ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 349ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 324ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 336ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 345ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 350ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 319ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 325ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 322ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 318ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 329ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 369ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 358ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

In [15]:

layer = 'block4_pool'
layer_features = []  
layer_labels = []    
model = Model(inputs=base_model.input, outputs=base_model.get_layer(layer).output)

for index, row in df.iterrows():
    file_name = row['path']
    label = row['primary_microconstituent'] 

    img_path = os.path.join(image_dir, file_name)
    img = image.load_img(img_path)
    x = image.img_to_array(img)
    x = x[0:484, :, :]
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)


    xb = model.predict(x)
    F = np.mean(xb, axis=(0, 1, 2))
    

    layer_features.append(F)
    layer_labels.append(label)

block4_pool_features = np.array(layer_features)
block4_pool_labels = np.array(layer_labels)  
print(f"block4_pool feature set shape: {block4_pool_features.shape}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 529ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 475ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 503ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 486ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 573ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 665ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 546ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 570ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 521ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 521ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 472ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 472ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 473ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

In [17]:

layer = 'block5_pool'
layer_features = []  
layer_labels = []    
model = Model(inputs=base_model.input, outputs=base_model.get_layer(layer).output)

for index, row in df.iterrows():
    file_name = row['path']
    label = row['primary_microconstituent']  
    
    img_path = os.path.join(image_dir, file_name)
    img = image.load_img(img_path)
    x = image.img_to_array(img)
    x = x[0:484, :, :]
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)


    xb = model.predict(x)
    F = np.mean(xb, axis=(0, 1, 2))

    layer_features.append(F)
    layer_labels.append(label)

block5_pool_features = np.array(layer_features)
block5_pool_labels = np.array(layer_labels)  

print(f"block5_pool feature set shape: {block5_pool_features.shape}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 610ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 529ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 544ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 578ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 602ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 645ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 570ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 570ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 563ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 598ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 559ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 506ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 503ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

In [21]:
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score

In [29]:
target_labels = ['spheroidite', 'network']

def filter_data(features, labels, target_labels, count_per_label=100):
    filtered_features = []
    filtered_labels = []
    
    for label in target_labels:
        indices = np.where(labels == label)[0][:count_per_label]  # Take the first 'count_per_label' samples
        filtered_features.append(features[indices])
        filtered_labels.append(labels[indices])
    
    return np.vstack(filtered_features), np.concatenate(filtered_labels)

block1_pool_filtered_features, block1_pool_filtered_labels = filter_data(block1_pool_features, block1_pool_labels, target_labels)
block2_pool_filtered_features, block2_pool_filtered_labels = filter_data(block2_pool_features, block2_pool_labels, target_labels)
block3_pool_filtered_features, block3_pool_filtered_labels = filter_data(block3_pool_features, block3_pool_labels, target_labels)
block4_pool_filtered_features, block4_pool_filtered_labels = filter_data(block4_pool_features, block4_pool_labels, target_labels)
block5_pool_filtered_features, block5_pool_filtered_labels = filter_data(block5_pool_features, block5_pool_labels, target_labels)

block1_pool_filtered_labels = np.where(block1_pool_filtered_labels == 'spheroidite', 1, 0)
block2_pool_filtered_labels = np.where(block2_pool_filtered_labels == 'spheroidite', 1, 0)
block3_pool_filtered_labels = np.where(block3_pool_filtered_labels == 'spheroidite', 1, 0)
block4_pool_filtered_labels = np.where(block4_pool_filtered_labels == 'spheroidite', 1, 0)
block5_pool_filtered_labels = np.where(block5_pool_filtered_labels == 'spheroidite', 1, 0)


def train_svm(features, labels):
    svm_classifier = SVC(kernel='rbf', C=1, gamma='auto')
    cv_scores = cross_val_score(svm_classifier, features, labels, cv=10)  # 10-fold cross-validation
    return cv_scores

# Step 3: Evaluate using 10-Fold Cross-Validation
block1_pool_cv_scores = train_svm(block1_pool_filtered_features, block1_pool_filtered_labels)
block2_pool_cv_scores = train_svm(block2_pool_filtered_features, block2_pool_filtered_labels)
block3_pool_cv_scores = train_svm(block3_pool_filtered_features, block3_pool_filtered_labels)
block4_pool_cv_scores = train_svm(block4_pool_filtered_features, block4_pool_filtered_labels)
block5_pool_cv_scores = train_svm(block5_pool_filtered_features, block5_pool_filtered_labels)

# Print the results
print("Spheroidite and Network\n")
print("Block 1 Pool CV Scores:", block1_pool_cv_scores)
print("Block 1 Pool Mean CV Score:", np.mean(block1_pool_cv_scores))

print("Block 2 Pool CV Scores:", block2_pool_cv_scores)
print("Block 2 Pool Mean CV Score:", np.mean(block2_pool_cv_scores))

print("Block 3 Pool CV Scores:", block3_pool_cv_scores)
print("Block 3 Pool Mean CV Score:", np.mean(block3_pool_cv_scores))

print("Block 4 Pool CV Scores:", block4_pool_cv_scores)
print("Block 4 Pool Mean CV Score:", np.mean(block4_pool_cv_scores))

print("Block 5 Pool CV Scores:", block5_pool_cv_scores)
print("Block 5 Pool Mean CV Score:", np.mean(block5_pool_cv_scores))

Spheroidite and Network

Block 1 Pool CV Scores: [0.5  0.5  0.55 0.5  0.5  0.5  0.5  0.5  0.5  0.5 ]
Block 1 Pool Mean CV Score: 0.505
Block 2 Pool CV Scores: [0.5  0.5  0.65 0.5  0.5  0.5  0.5  0.5  0.5  0.5 ]
Block 2 Pool Mean CV Score: 0.515
Block 3 Pool CV Scores: [0.5  0.5  0.5  0.5  0.55 0.6  0.5  0.5  0.5  0.5 ]
Block 3 Pool Mean CV Score: 0.515
Block 4 Pool CV Scores: [0.5  0.5  0.5  0.5  0.55 0.55 0.5  0.5  0.5  0.5 ]
Block 4 Pool Mean CV Score: 0.51
Block 5 Pool CV Scores: [0.95 0.95 0.95 1.   0.95 0.9  1.   1.   1.   1.  ]
Block 5 Pool Mean CV Score: 0.97


In [31]:
target_labels = ['spheroidite', 'pearlite']

# Function to filter features and labels
def filter_data(features, labels, target_labels, count_per_label=100):
    filtered_features = []
    filtered_labels = []
    
    for label in target_labels:
        # Get indices for the current label
        indices = np.where(labels == label)[0][:count_per_label]  # Take the first 'count_per_label' samples
        filtered_features.append(features[indices])
        filtered_labels.append(labels[indices])
    
    # Concatenate the features and labels for both classes
    return np.vstack(filtered_features), np.concatenate(filtered_labels)

# Filter data for each feature set
block1_pool_filtered_features, block1_pool_filtered_labels = filter_data(block1_pool_features, block1_pool_labels, target_labels)
block2_pool_filtered_features, block2_pool_filtered_labels = filter_data(block2_pool_features, block2_pool_labels, target_labels)
block3_pool_filtered_features, block3_pool_filtered_labels = filter_data(block3_pool_features, block3_pool_labels, target_labels)
block4_pool_filtered_features, block4_pool_filtered_labels = filter_data(block4_pool_features, block4_pool_labels, target_labels)
block5_pool_filtered_features, block5_pool_filtered_labels = filter_data(block5_pool_features, block5_pool_labels, target_labels)

block1_pool_filtered_labels = np.where(block1_pool_filtered_labels == 'spheroidite', 1, 0)
block2_pool_filtered_labels = np.where(block2_pool_filtered_labels == 'spheroidite', 1, 0)
block3_pool_filtered_labels = np.where(block3_pool_filtered_labels == 'spheroidite', 1, 0)
block4_pool_filtered_labels = np.where(block4_pool_filtered_labels == 'spheroidite', 1, 0)
block5_pool_filtered_labels = np.where(block5_pool_filtered_labels == 'spheroidite', 1, 0)


def train_svm(features, labels):
    svm_classifier = SVC(kernel='rbf', C=1, gamma='auto')
    cv_scores = cross_val_score(svm_classifier, features, labels, cv=10)  # 10-fold cross-validation
    return cv_scores

# Step 3: Evaluate using 10-Fold Cross-Validation
block1_pool_cv_scores = train_svm(block1_pool_filtered_features, block1_pool_filtered_labels)
block2_pool_cv_scores = train_svm(block2_pool_filtered_features, block2_pool_filtered_labels)
block3_pool_cv_scores = train_svm(block3_pool_filtered_features, block3_pool_filtered_labels)
block4_pool_cv_scores = train_svm(block4_pool_filtered_features, block4_pool_filtered_labels)
block5_pool_cv_scores = train_svm(block5_pool_filtered_features, block5_pool_filtered_labels)

# Print the results
print("Spheroidite and Pearlite\n")
print("Block 1 Pool CV Scores:", block1_pool_cv_scores)
print("Block 1 Pool Mean CV Score:", np.mean(block1_pool_cv_scores))

print("Block 2 Pool CV Scores:", block2_pool_cv_scores)
print("Block 2 Pool Mean CV Score:", np.mean(block2_pool_cv_scores))

print("Block 3 Pool CV Scores:", block3_pool_cv_scores)
print("Block 3 Pool Mean CV Score:", np.mean(block3_pool_cv_scores))

print("Block 4 Pool CV Scores:", block4_pool_cv_scores)
print("Block 4 Pool Mean CV Score:", np.mean(block4_pool_cv_scores))

print("Block 5 Pool CV Scores:", block5_pool_cv_scores)
print("Block 5 Pool Mean CV Score:", np.mean(block5_pool_cv_scores))

Spheroidite and Pearlite

Block 1 Pool CV Scores: [0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5]
Block 1 Pool Mean CV Score: 0.5
Block 2 Pool CV Scores: [0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5]
Block 2 Pool Mean CV Score: 0.5
Block 3 Pool CV Scores: [0.5  0.5  0.5  0.5  0.5  0.55 0.5  0.5  0.55 0.5 ]
Block 3 Pool Mean CV Score: 0.51
Block 4 Pool CV Scores: [0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5]
Block 4 Pool Mean CV Score: 0.5
Block 5 Pool CV Scores: [0.95 1.   1.   0.95 1.   0.95 1.   0.85 0.95 1.  ]
Block 5 Pool Mean CV Score: 0.9649999999999999


In [33]:
target_labels = ['network', 'pearlite']

# Function to filter features and labels
def filter_data(features, labels, target_labels, count_per_label=100):
    filtered_features = []
    filtered_labels = []
    
    for label in target_labels:
        # Get indices for the current label
        indices = np.where(labels == label)[0][:count_per_label]  # Take the first 'count_per_label' samples
        filtered_features.append(features[indices])
        filtered_labels.append(labels[indices])
    
    # Concatenate the features and labels for both classes
    return np.vstack(filtered_features), np.concatenate(filtered_labels)

# Filter data for each feature set
block1_pool_filtered_features, block1_pool_filtered_labels = filter_data(block1_pool_features, block1_pool_labels, target_labels)
block2_pool_filtered_features, block2_pool_filtered_labels = filter_data(block2_pool_features, block2_pool_labels, target_labels)
block3_pool_filtered_features, block3_pool_filtered_labels = filter_data(block3_pool_features, block3_pool_labels, target_labels)
block4_pool_filtered_features, block4_pool_filtered_labels = filter_data(block4_pool_features, block4_pool_labels, target_labels)
block5_pool_filtered_features, block5_pool_filtered_labels = filter_data(block5_pool_features, block5_pool_labels, target_labels)

block1_pool_filtered_labels = np.where(block1_pool_filtered_labels == 'network', 1, 0)
block2_pool_filtered_labels = np.where(block2_pool_filtered_labels == 'network', 1, 0)
block3_pool_filtered_labels = np.where(block3_pool_filtered_labels == 'network', 1, 0)
block4_pool_filtered_labels = np.where(block4_pool_filtered_labels == 'network', 1, 0)
block5_pool_filtered_labels = np.where(block5_pool_filtered_labels == 'network', 1, 0)


def train_svm(features, labels):
    svm_classifier = SVC(kernel='rbf', C=1, gamma='auto')
    cv_scores = cross_val_score(svm_classifier, features, labels, cv=10)  # 10-fold cross-validation
    return cv_scores

# Step 3: Evaluate using 10-Fold Cross-Validation
block1_pool_cv_scores = train_svm(block1_pool_filtered_features, block1_pool_filtered_labels)
block2_pool_cv_scores = train_svm(block2_pool_filtered_features, block2_pool_filtered_labels)
block3_pool_cv_scores = train_svm(block3_pool_filtered_features, block3_pool_filtered_labels)
block4_pool_cv_scores = train_svm(block4_pool_filtered_features, block4_pool_filtered_labels)
block5_pool_cv_scores = train_svm(block5_pool_filtered_features, block5_pool_filtered_labels)

# Print the results
print("Network and Pearlite\n")
print("Block 1 Pool CV Scores:", block1_pool_cv_scores)
print("Block 1 Pool Mean CV Score:", np.mean(block1_pool_cv_scores))

print("Block 2 Pool CV Scores:", block2_pool_cv_scores)
print("Block 2 Pool Mean CV Score:", np.mean(block2_pool_cv_scores))

print("Block 3 Pool CV Scores:", block3_pool_cv_scores)
print("Block 3 Pool Mean CV Score:", np.mean(block3_pool_cv_scores))

print("Block 4 Pool CV Scores:", block4_pool_cv_scores)
print("Block 4 Pool Mean CV Score:", np.mean(block4_pool_cv_scores))

print("Block 5 Pool CV Scores:", block5_pool_cv_scores)
print("Block 5 Pool Mean CV Score:", np.mean(block5_pool_cv_scores))

Network and Pearlite

Block 1 Pool CV Scores: [0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5]
Block 1 Pool Mean CV Score: 0.5
Block 2 Pool CV Scores: [0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5]
Block 2 Pool Mean CV Score: 0.5
Block 3 Pool CV Scores: [0.5  0.5  0.5  0.5  0.5  0.55 0.5  0.5  0.5  0.5 ]
Block 3 Pool Mean CV Score: 0.505
Block 4 Pool CV Scores: [0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5]
Block 4 Pool Mean CV Score: 0.5
Block 5 Pool CV Scores: [0.95 0.95 0.95 1.   0.95 0.95 1.   1.   1.   1.  ]
Block 5 Pool Mean CV Score: 0.975


In [37]:
# Function to filter features and labels
target_labels = {
    'spheroidite+widmanstatten': 60,
    'spheroidite': 100
}

# Function to filter features and labels
def filter_data(features, labels, target_labels):
    filtered_features = []
    filtered_labels = []
    
    for label, count in target_labels.items():
        # Get indices for the current label
        indices = np.where(labels == label)[0][:count]  # Take the first 'count' samples
        filtered_features.append(features[indices])
        filtered_labels.append(labels[indices])
    
    # Concatenate the features and labels for both classes
    return np.vstack(filtered_features), np.concatenate(filtered_labels)


# Filter data for each feature set
block1_pool_filtered_features, block1_pool_filtered_labels = filter_data(block1_pool_features, block1_pool_labels, target_labels)
block2_pool_filtered_features, block2_pool_filtered_labels = filter_data(block2_pool_features, block2_pool_labels, target_labels)
block3_pool_filtered_features, block3_pool_filtered_labels = filter_data(block3_pool_features, block3_pool_labels, target_labels)
block4_pool_filtered_features, block4_pool_filtered_labels = filter_data(block4_pool_features, block4_pool_labels, target_labels)
block5_pool_filtered_features, block5_pool_filtered_labels = filter_data(block5_pool_features, block5_pool_labels, target_labels)

block1_pool_filtered_labels = np.where(block1_pool_filtered_labels == 'spheroidite+widmanstatten', 1, 0)
block2_pool_filtered_labels = np.where(block2_pool_filtered_labels == 'spheroidite+widmanstatten', 1, 0)
block3_pool_filtered_labels = np.where(block3_pool_filtered_labels == 'spheroidite+widmanstatten', 1, 0)
block4_pool_filtered_labels = np.where(block4_pool_filtered_labels == 'spheroidite+widmanstatten', 1, 0)
block5_pool_filtered_labels = np.where(block5_pool_filtered_labels == 'spheroidite+widmanstatten', 1, 0)


def train_svm(features, labels):
    svm_classifier = SVC(kernel='rbf', C=1, gamma='auto')
    cv_scores = cross_val_score(svm_classifier, features, labels, cv=10)  # 10-fold cross-validation
    return cv_scores

# Step 3: Evaluate using 10-Fold Cross-Validation
block1_pool_cv_scores = train_svm(block1_pool_filtered_features, block1_pool_filtered_labels)
block2_pool_cv_scores = train_svm(block2_pool_filtered_features, block2_pool_filtered_labels)
block3_pool_cv_scores = train_svm(block3_pool_filtered_features, block3_pool_filtered_labels)
block4_pool_cv_scores = train_svm(block4_pool_filtered_features, block4_pool_filtered_labels)
block5_pool_cv_scores = train_svm(block5_pool_filtered_features, block5_pool_filtered_labels)

# Print the results
print("Spheroilite and spheroidite+Widmanstatten\n")
print("Block 1 Pool CV Scores:", block1_pool_cv_scores)
print("Block 1 Pool Mean CV Score:", np.mean(block1_pool_cv_scores))

print("Block 2 Pool CV Scores:", block2_pool_cv_scores)
print("Block 2 Pool Mean CV Score:", np.mean(block2_pool_cv_scores))

print("Block 3 Pool CV Scores:", block3_pool_cv_scores)
print("Block 3 Pool Mean CV Score:", np.mean(block3_pool_cv_scores))

print("Block 4 Pool CV Scores:", block4_pool_cv_scores)
print("Block 4 Pool Mean CV Score:", np.mean(block4_pool_cv_scores))

print("Block 5 Pool CV Scores:", block5_pool_cv_scores)
print("Block 5 Pool Mean CV Score:", np.mean(block5_pool_cv_scores))

Spheroilite and spheroidite+Widmanstatten

Block 1 Pool CV Scores: [0.625 0.625 0.625 0.625 0.625 0.625 0.625 0.625 0.625 0.625]
Block 1 Pool Mean CV Score: 0.625
Block 2 Pool CV Scores: [0.625 0.625 0.625 0.625 0.625 0.625 0.625 0.625 0.625 0.625]
Block 2 Pool Mean CV Score: 0.625
Block 3 Pool CV Scores: [0.625 0.625 0.625 0.625 0.625 0.625 0.625 0.625 0.625 0.625]
Block 3 Pool Mean CV Score: 0.625
Block 4 Pool CV Scores: [0.625 0.625 0.625 0.625 0.625 0.625 0.625 0.625 0.625 0.625]
Block 4 Pool Mean CV Score: 0.625
Block 5 Pool CV Scores: [0.875  0.875  0.8125 0.8125 0.6875 0.75   0.75   0.6875 0.75   0.8125]
Block 5 Pool Mean CV Score: 0.78125


In [39]:
# Function to filter features and labels
target_labels = {
    'spheroidite+widmanstatten': 60,
    'network': 100
}

# Function to filter features and labels
def filter_data(features, labels, target_labels):
    filtered_features = []
    filtered_labels = []
    
    for label, count in target_labels.items():
        # Get indices for the current label
        indices = np.where(labels == label)[0][:count]  # Take the first 'count' samples
        filtered_features.append(features[indices])
        filtered_labels.append(labels[indices])
    
    # Concatenate the features and labels for both classes
    return np.vstack(filtered_features), np.concatenate(filtered_labels)


# Filter data for each feature set
block1_pool_filtered_features, block1_pool_filtered_labels = filter_data(block1_pool_features, block1_pool_labels, target_labels)
block2_pool_filtered_features, block2_pool_filtered_labels = filter_data(block2_pool_features, block2_pool_labels, target_labels)
block3_pool_filtered_features, block3_pool_filtered_labels = filter_data(block3_pool_features, block3_pool_labels, target_labels)
block4_pool_filtered_features, block4_pool_filtered_labels = filter_data(block4_pool_features, block4_pool_labels, target_labels)
block5_pool_filtered_features, block5_pool_filtered_labels = filter_data(block5_pool_features, block5_pool_labels, target_labels)

block1_pool_filtered_labels = np.where(block1_pool_filtered_labels == 'spheroidite+widmanstatten', 1, 0)
block2_pool_filtered_labels = np.where(block2_pool_filtered_labels == 'spheroidite+widmanstatten', 1, 0)
block3_pool_filtered_labels = np.where(block3_pool_filtered_labels == 'spheroidite+widmanstatten', 1, 0)
block4_pool_filtered_labels = np.where(block4_pool_filtered_labels == 'spheroidite+widmanstatten', 1, 0)
block5_pool_filtered_labels = np.where(block5_pool_filtered_labels == 'spheroidite+widmanstatten', 1, 0)


def train_svm(features, labels):
    svm_classifier = SVC(kernel='rbf', C=1, gamma='auto')
    cv_scores = cross_val_score(svm_classifier, features, labels, cv=10)  # 10-fold cross-validation
    return cv_scores

# Step 3: Evaluate using 10-Fold Cross-Validation
block1_pool_cv_scores = train_svm(block1_pool_filtered_features, block1_pool_filtered_labels)
block2_pool_cv_scores = train_svm(block2_pool_filtered_features, block2_pool_filtered_labels)
block3_pool_cv_scores = train_svm(block3_pool_filtered_features, block3_pool_filtered_labels)
block4_pool_cv_scores = train_svm(block4_pool_filtered_features, block4_pool_filtered_labels)
block5_pool_cv_scores = train_svm(block5_pool_filtered_features, block5_pool_filtered_labels)

# Print the results
print("Network and spheroidite+Widmanstatten\n")
print("Block 1 Pool CV Scores:", block1_pool_cv_scores)
print("Block 1 Pool Mean CV Score:", np.mean(block1_pool_cv_scores))

print("Block 2 Pool CV Scores:", block2_pool_cv_scores)
print("Block 2 Pool Mean CV Score:", np.mean(block2_pool_cv_scores))

print("Block 3 Pool CV Scores:", block3_pool_cv_scores)
print("Block 3 Pool Mean CV Score:", np.mean(block3_pool_cv_scores))

print("Block 4 Pool CV Scores:", block4_pool_cv_scores)
print("Block 4 Pool Mean CV Score:", np.mean(block4_pool_cv_scores))

print("Block 5 Pool CV Scores:", block5_pool_cv_scores)
print("Block 5 Pool Mean CV Score:", np.mean(block5_pool_cv_scores))

Network and spheroidite+Widmanstatten

Block 1 Pool CV Scores: [0.625 0.625 0.625 0.625 0.625 0.625 0.625 0.625 0.625 0.625]
Block 1 Pool Mean CV Score: 0.625
Block 2 Pool CV Scores: [0.625 0.625 0.625 0.625 0.625 0.625 0.625 0.625 0.625 0.625]
Block 2 Pool Mean CV Score: 0.625
Block 3 Pool CV Scores: [0.625 0.625 0.625 0.625 0.625 0.625 0.625 0.625 0.625 0.625]
Block 3 Pool Mean CV Score: 0.625
Block 4 Pool CV Scores: [0.625 0.625 0.625 0.625 0.625 0.625 0.625 0.625 0.625 0.625]
Block 4 Pool Mean CV Score: 0.625
Block 5 Pool CV Scores: [0.9375 1.     1.     1.     0.9375 1.     1.     1.     1.     1.    ]
Block 5 Pool Mean CV Score: 0.9875


In [41]:
# Function to filter features and labels
target_labels = {
    'spheroidite+widmanstatten': 60,
    'pearlite': 100
}

# Function to filter features and labels
def filter_data(features, labels, target_labels):
    filtered_features = []
    filtered_labels = []
    
    for label, count in target_labels.items():
        # Get indices for the current label
        indices = np.where(labels == label)[0][:count]  # Take the first 'count' samples
        filtered_features.append(features[indices])
        filtered_labels.append(labels[indices])
    
    # Concatenate the features and labels for both classes
    return np.vstack(filtered_features), np.concatenate(filtered_labels)


# Filter data for each feature set
block1_pool_filtered_features, block1_pool_filtered_labels = filter_data(block1_pool_features, block1_pool_labels, target_labels)
block2_pool_filtered_features, block2_pool_filtered_labels = filter_data(block2_pool_features, block2_pool_labels, target_labels)
block3_pool_filtered_features, block3_pool_filtered_labels = filter_data(block3_pool_features, block3_pool_labels, target_labels)
block4_pool_filtered_features, block4_pool_filtered_labels = filter_data(block4_pool_features, block4_pool_labels, target_labels)
block5_pool_filtered_features, block5_pool_filtered_labels = filter_data(block5_pool_features, block5_pool_labels, target_labels)

block1_pool_filtered_labels = np.where(block1_pool_filtered_labels == 'spheroidite+widmanstatten', 1, 0)
block2_pool_filtered_labels = np.where(block2_pool_filtered_labels == 'spheroidite+widmanstatten', 1, 0)
block3_pool_filtered_labels = np.where(block3_pool_filtered_labels == 'spheroidite+widmanstatten', 1, 0)
block4_pool_filtered_labels = np.where(block4_pool_filtered_labels == 'spheroidite+widmanstatten', 1, 0)
block5_pool_filtered_labels = np.where(block5_pool_filtered_labels == 'spheroidite+widmanstatten', 1, 0)


def train_svm(features, labels):
    svm_classifier = SVC(kernel='rbf', C=1, gamma='auto')
    cv_scores = cross_val_score(svm_classifier, features, labels, cv=10)  # 10-fold cross-validation
    return cv_scores

# Step 3: Evaluate using 10-Fold Cross-Validation
block1_pool_cv_scores = train_svm(block1_pool_filtered_features, block1_pool_filtered_labels)
block2_pool_cv_scores = train_svm(block2_pool_filtered_features, block2_pool_filtered_labels)
block3_pool_cv_scores = train_svm(block3_pool_filtered_features, block3_pool_filtered_labels)
block4_pool_cv_scores = train_svm(block4_pool_filtered_features, block4_pool_filtered_labels)
block5_pool_cv_scores = train_svm(block5_pool_filtered_features, block5_pool_filtered_labels)

# Print the results
print("Pearlite and spheroidite+Widmanstatten\n")
print("Block 1 Pool CV Scores:", block1_pool_cv_scores)
print("Block 1 Pool Mean CV Score:", np.mean(block1_pool_cv_scores))

print("Block 2 Pool CV Scores:", block2_pool_cv_scores)
print("Block 2 Pool Mean CV Score:", np.mean(block2_pool_cv_scores))

print("Block 3 Pool CV Scores:", block3_pool_cv_scores)
print("Block 3 Pool Mean CV Score:", np.mean(block3_pool_cv_scores))

print("Block 4 Pool CV Scores:", block4_pool_cv_scores)
print("Block 4 Pool Mean CV Score:", np.mean(block4_pool_cv_scores))

print("Block 5 Pool CV Scores:", block5_pool_cv_scores)
print("Block 5 Pool Mean CV Score:", np.mean(block5_pool_cv_scores))

Pearlite and spheroidite+Widmanstatten

Block 1 Pool CV Scores: [0.625 0.625 0.625 0.625 0.625 0.625 0.625 0.625 0.625 0.625]
Block 1 Pool Mean CV Score: 0.625
Block 2 Pool CV Scores: [0.625 0.625 0.625 0.625 0.625 0.625 0.625 0.625 0.625 0.625]
Block 2 Pool Mean CV Score: 0.625
Block 3 Pool CV Scores: [0.625 0.625 0.625 0.625 0.625 0.625 0.625 0.625 0.625 0.625]
Block 3 Pool Mean CV Score: 0.625
Block 4 Pool CV Scores: [0.625 0.625 0.625 0.625 0.625 0.625 0.625 0.625 0.625 0.625]
Block 4 Pool Mean CV Score: 0.625
Block 5 Pool CV Scores: [1.     0.875  0.9375 0.9375 0.875  0.8125 0.9375 0.75   0.8125 0.875 ]
Block 5 Pool Mean CV Score: 0.88125


In [69]:
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score

# Assuming block5_pool_features and block5_pool_labels are defined

training_counts = {
    'spheroidite': 100,
    'network': 100,
    'pearlite': 100,
    'spheroidite+widmanstatten': 60  # Use 60 samples for this class
}

# Define the labels used for classification
train_labels = list(training_counts.keys())

# Function to prepare training and test data
def prepare_data(features, labels, training_counts):
    train_features = []
    train_labels_list = []
    test_features = []
    test_labels = []

    # Track indices for the training data
    used_indices = set()

    # Prepare training data
    for label in train_labels:
        indices = np.where(labels == label)[0]

        # Determine how many training samples to use
        train_size = min(training_counts[label], len(indices))
        train_indices = indices[:train_size]

        # Collect training data
        train_features.extend(features[train_indices])
        train_labels_list.extend([label] * train_size)  # Append the corresponding label for each feature
        
        # Mark these indices as used
        used_indices.update(train_indices)

    # Prepare test data from remaining data and all other labels
    for label in np.unique(labels):
        if label not in training_counts:  # Exclude labels in training_counts
            indices = np.where(labels == label)[0]
            test_features.extend(features[indices])
            test_labels.extend([label] * len(indices))  # Append the corresponding label for each feature
        else:
            # Add remaining indices from the label included in training counts to the test set
            remaining_indices = np.setdiff1d(np.where(labels == label)[0], list(used_indices))
            test_features.extend(features[remaining_indices])
            test_labels.extend([label] * len(remaining_indices))  # Append the corresponding label for each feature

    return np.array(train_features), np.array(train_labels_list), np.array(test_features), np.array(test_labels)

# Prepare the data
train_features, train_labels, test_features, test_labels = prepare_data(block5_pool_features, block5_pool_labels, training_counts)


# Function to filter data for each pair of labels
def filter_pair(features, labels, label1, label2):
    indices1 = np.where(labels == label1)[0]
    indices2 = np.where(labels == label2)[0]
    if len(indices1) == 0 or len(indices2) == 0:
        print(f"Warning: Not enough samples for {label1} or {label2}.")
        return None, None
    # Concatenate training data for both classes
    filtered_features = np.vstack((features[indices1], features[indices2]))
    filtered_labels = np.concatenate((labels[indices1], labels[indices2]))

    # Binary encoding: label1 as 1, label2 as 0
    binary_labels = np.where(filtered_labels == label1, 1, 0)
    
    return filtered_features, binary_labels

# Define pairs for one-vs-one classification
pairs = [
    ('spheroidite', 'network'),
    ('spheroidite', 'pearlite'),
    ('spheroidite', 'spheroidite+widmanstatten'),
    ('network', 'pearlite'),
    ('network', 'spheroidite+widmanstatten'),
    ('pearlite', 'spheroidite+widmanstatten')
]

# Train SVM classifiers for each pair
classifiers = {}
for label1, label2 in pairs:
    features_pair, binary_labels = filter_pair(train_features, train_labels, label1, label2)
    svm_classifier = SVC(kernel='rbf', C=1, gamma='auto')
    svm_classifier.fit(features_pair, binary_labels)
    classifiers[(label1, label2)] = svm_classifier

# Function to predict using the classifiers
def predict_with_voting(classifiers, features):
    # Initialize an array to hold the votes for each classifier
    votes = np.zeros((features.shape[0], len(classifiers)))  # Shape: (num_samples, num_classifiers)

    # Iterate over each classifier and make predictions
    for idx, (label1, label2) in enumerate(classifiers.keys()):
        pred = classifiers[(label1, label2)].predict(features)
        votes[:, idx] = pred  # Store predictions for this classifier
    
    # Determine final prediction based on majority vote
    # Here, we need to get the most common class in each row of votes
    final_predictions = []
    for row in votes:
        # Count the occurrences of each predicted label
        unique, counts = np.unique(row, return_counts=True)
        # Get the label with the maximum votes
        max_index = np.argmax(counts)
        final_predictions.append(unique[max_index])  # Get the corresponding label
    
    return np.array(final_predictions)

# Use the test features to predict



In [59]:
predictions = predict_with_voting(classifiers, test_features)

# Evaluate the accuracy
accuracy = accuracy_score(test_labels, predictions)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

Test Accuracy: 0.00%


In [61]:
print(classifiers)

{('spheroidite', 'network'): SVC(C=1, gamma='auto'), ('spheroidite', 'pearlite'): SVC(C=1, gamma='auto'), ('spheroidite', 'spheroidite+widmanstatten'): SVC(C=1, gamma='auto'), ('network', 'pearlite'): SVC(C=1, gamma='auto'), ('network', 'spheroidite+widmanstatten'): SVC(C=1, gamma='auto'), ('pearlite', 'spheroidite+widmanstatten'): SVC(C=1, gamma='auto')}


In [79]:
filtered_indices = np.where(np.isin(test_labels, ['network', 'pearlite']))[0]
filtered_test_features = test_features[filtered_indices]
filtered_test_labels = test_labels[filtered_indices]
binary_filtered_test_labels = np.where(filtered_test_labels == 'network', 1, 0)

filtered_predictions = classifiers[('network', 'pearlite')].predict(filtered_test_features)

test_accuracy = accuracy_score(binary_filtered_test_labels, filtered_predictions)
test_error_rate = 1 - test_accuracy

print(f"Test Accuracy (Network vs Pearlite): {test_accuracy:.4f}")
print(f"Test Error Rate (Network vs Pearlite): {test_error_rate:.4f}")

Test Accuracy (Network vs Pearlite): 0.9265
Test Error Rate (Network vs Pearlite): 0.0735


In [81]:
filtered_indices = np.where(np.isin(test_labels, ['spheroidite', 'pearlite']))[0]
filtered_test_features = test_features[filtered_indices]
filtered_test_labels = test_labels[filtered_indices]
binary_filtered_test_labels = np.where(filtered_test_labels == 'spheroidite', 1, 0)

# Step 3: Make predictions on the filtered test data
filtered_predictions = classifiers[('spheroidite', 'pearlite')].predict(filtered_test_features)

# Step 4: Calculate the test error rate
test_accuracy = accuracy_score(binary_filtered_test_labels, filtered_predictions)
test_error_rate = 1 - test_accuracy

print(f"Test Accuracy (Spheroidite vs Pearlite): {test_accuracy:.4f}")
print(f"Test Error Rate (Spheroidite vs Pearlite): {test_error_rate:.4f}")

Test Accuracy (Network vs Pearlite): 1.0000
Test Error Rate (Network vs Pearlite): 0.0000


In [91]:
filtered_indices = np.where(np.isin(test_labels, ['spheroidite', 'network']))[0]
filtered_test_features = test_features[filtered_indices]
filtered_test_labels = test_labels[filtered_indices]
binary_filtered_test_labels = np.where(filtered_test_labels == 'spheroidite', 1, 0)

# Step 3: Make predictions on the filtered test data
filtered_predictions = classifiers[('spheroidite', 'network')].predict(filtered_test_features)

# Step 4: Calculate the test error rate
test_accuracy = accuracy_score(binary_filtered_test_labels, filtered_predictions)
test_error_rate = 1 - test_accuracy

print(f"Test Accuracy (Spheroidite vs network): {test_accuracy:.4f}")
print(f"Test Error Rate (Spheroidite vs network): {test_error_rate:.4f}")

Test Accuracy (Spheroidite vs network): 0.9663
Test Error Rate (Spheroidite vs network): 0.0337


In [95]:
filtered_indices = np.where(np.isin(test_labels, ['spheroidite', 'spheroidite+widmanstatten']))[0]
filtered_test_features = test_features[filtered_indices]
filtered_test_labels = test_labels[filtered_indices]
binary_filtered_test_labels = np.where(filtered_test_labels == 'spheroidite', 1, 0)

# Step 3: Make predictions on the filtered test data
filtered_predictions = classifiers[('spheroidite', 'spheroidite+widmanstatten')].predict(filtered_test_features)

# Step 4: Calculate the test error rate
test_accuracy = accuracy_score(binary_filtered_test_labels, filtered_predictions)
test_error_rate = 1 - test_accuracy

print(f"Test Accuracy (Spheroidite vs spheroidite+widmanstatten): {test_accuracy:.4f}")
print(f"Test Error Rate (Spheroidite vs spheroidite+widmanstatten): {test_error_rate:.4f}")

Test Accuracy (Spheroidite vs spheroidite+widmanstatten): 0.9390
Test Error Rate (Spheroidite vs spheroidite+widmanstatten): 0.0610


In [93]:
filtered_indices = np.where(np.isin(test_labels, ['network', 'spheroidite+widmanstatten']))[0]
filtered_test_features = test_features[filtered_indices]
filtered_test_labels = test_labels[filtered_indices]
binary_filtered_test_labels = np.where(filtered_test_labels == 'network', 1, 0)

# Step 3: Make predictions on the filtered test data
filtered_predictions = classifiers[('network', 'spheroidite+widmanstatten')].predict(filtered_test_features)

# Step 4: Calculate the test error rate
test_accuracy = accuracy_score(binary_filtered_test_labels, filtered_predictions)
test_error_rate = 1 - test_accuracy

print(f"Test Accuracy (Network vs spheroidite+widmanstatten): {test_accuracy:.4f}")
print(f"Test Error Rate (Network vs spheroidite+widmanstatten): {test_error_rate:.4f}")

Test Accuracy (Network vs spheroidite+widmanstatten): 0.9398
Test Error Rate (Network vs spheroidite+widmanstatten): 0.0602


In [97]:
filtered_indices = np.where(np.isin(test_labels, ['pearlite', 'spheroidite+widmanstatten']))[0]
filtered_test_features = test_features[filtered_indices]
filtered_test_labels = test_labels[filtered_indices]
binary_filtered_test_labels = np.where(filtered_test_labels == 'pearlite', 1, 0)

# Step 3: Make predictions on the filtered test data
filtered_predictions = classifiers[('pearlite', 'spheroidite+widmanstatten')].predict(filtered_test_features)

# Step 4: Calculate the test error rate
test_accuracy = accuracy_score(binary_filtered_test_labels, filtered_predictions)
test_error_rate = 1 - test_accuracy

print(f"Test Accuracy (Pearlite vs spheroidite+widmanstatten): {test_accuracy:.4f}")
print(f"Test Error Rate (Pearlite vs spheroidite+widmanstatten): {test_error_rate:.4f}")

Test Accuracy (Pearlite vs spheroidite+widmanstatten): 0.9111
Test Error Rate (Pearlite vs spheroidite+widmanstatten): 0.0889


In [103]:
def predict_with_voting(classifiers, features):
    votes = np.zeros((features.shape[0], len(classifiers))) 

    
    class_mapping = {}
    for idx, (label1, label2) in enumerate(classifiers.keys()):
        class_mapping[idx] = (label1, label2)  
        pred = classifiers[(label1, label2)].predict(features)
        votes[:, idx] = pred 

  
    final_predictions = []
    for row in votes:
        vote_count = {label: 0 for label in training_counts.keys()}  
        for idx, vote in enumerate(row):
            if vote == 1:  
                vote_count[class_mapping[idx][0]] += 1  
            else:  
                vote_count[class_mapping[idx][1]] += 1 
        
 
        final_prediction = max(vote_count, key=vote_count.get)
        final_predictions.append(final_prediction)

    return np.array(final_predictions)


filtered_indices = np.where(np.isin(test_labels, ['pearlite', 'spheroidite+widmanstatten', 'spheroidite', 'network']))[0]
filtered_test_features = test_features[filtered_indices]
filtered_test_labels = test_labels[filtered_indices]

final_predictions = predict_with_voting(classifiers, filtered_test_features) 

test_accuracy = accuracy_score(filtered_test_labels, final_predictions)
test_error_rate = 1 - test_accuracy

print(f"Test Accuracy: {test_accuracy:.4f}")
print(f"Test Error Rate: {test_error_rate:.4f}")


Test Accuracy: 0.9281
Test Error Rate: 0.0719


In [121]:
mixed_indices = np.where(test_labels == 'pearlite+spheroidite')[0]
mixed_test_features_filtered = test_features[mixed_indices]
mixed_test_labels_filtered = test_labels[mixed_indices]

# Step 2: Make predictions using the pairwise classifier for pearlite vs spheroidite
pearlite_spheroidite_classifier = classifiers[('spheroidite','pearlite')]
pearlite_spheroidite_predictions = pearlite_spheroidite_classifier.predict(mixed_test_features_filtered)

# Step 3: Make predictions using the multilabel voting classifier
voting_predictions = predict_with_voting(classifiers, mixed_test_features_filtered)

print("True Label                     | Pearlite vs Spheroidite Classifier | Multilabel Voting Classifier")
print("-----------------------------------------------------------------------------------------------")
for true_label, p1, p2 in zip(mixed_test_labels_filtered, pearlite_spheroidite_predictions, voting_predictions):
    # Print "spheroidite" if prediction is 1, otherwise print "pearlite"
    p1_output = "spheroidite" if p1 == 1 else "pearlite"
    
    # Ensure p2_output is defined
    print(f"{true_label:<30} | {p1_output:<35} | {p2}")

True Label                     | Pearlite vs Spheroidite Classifier | Multilabel Voting Classifier
-----------------------------------------------------------------------------------------------
pearlite+spheroidite           | spheroidite                         | spheroidite
pearlite+spheroidite           | spheroidite                         | spheroidite
pearlite+spheroidite           | spheroidite                         | spheroidite
pearlite+spheroidite           | pearlite                            | pearlite
pearlite+spheroidite           | spheroidite                         | spheroidite
pearlite+spheroidite           | spheroidite                         | spheroidite
pearlite+spheroidite           | spheroidite                         | spheroidite
pearlite+spheroidite           | spheroidite                         | spheroidite
pearlite+spheroidite           | spheroidite                         | spheroidite
pearlite+spheroidite           | pearlite                    

In [123]:
mixed_indices = np.where(np.isin(test_labels, ['pearlite+widmanstatten', 'martensite']))[0]
mixed_test_features_filtered = test_features[mixed_indices]
mixed_test_labels_filtered = test_labels[mixed_indices]



# Step 3: Make predictions using the multilabel voting classifier
voting_predictions = predict_with_voting(classifiers, mixed_test_features_filtered)

print("True Label                     | Multilabel Voting Classifier")
print("-------------------------------------------------------------")
for true_label, p1 in zip(mixed_test_labels_filtered, voting_predictions):
    # Print "spheroidite" if prediction is 1, otherwise print "pearlite"
    
    # Ensure p2_output is defined
    print(f"{true_label:<30}  | {p1}")

True Label                     | Multilabel Voting Classifier
-------------------------------------------------------------
martensite                      | spheroidite
martensite                      | network
martensite                      | pearlite
martensite                      | spheroidite
martensite                      | spheroidite
martensite                      | network
martensite                      | spheroidite
martensite                      | pearlite
martensite                      | spheroidite
martensite                      | spheroidite
martensite                      | pearlite
martensite                      | pearlite
martensite                      | pearlite
martensite                      | pearlite
martensite                      | spheroidite
martensite                      | pearlite
martensite                      | spheroidite
martensite                      | pearlite
martensite                      | spheroidite
martensite                      | 