In [None]:
!nvidia-smi 

# Segmentation model - Version 1

The classification backbone of the model is VGG-16 network

In [3]:
import keras
import tensorflow as tf
import keras.backend as K 

In [None]:
filename = 'casting_dataset.tar.xz'
tar_file = os.path.join(dataset_dir, filename)

my_tar = tarfile.open(tar_file)
my_tar.extractall(dataset_dir) # specify which folder to extract to
my_tar.close()

## Building model

In [9]:
def build_model(optimizer, learning_rate, out_type):

    policy = tf.keras.mixed_precision.experimental.Policy('mixed_float16')#sets values to be float16 for nvidia 2000,3000 series GPUs, plus others im sure
    
    input_img = keras.layers.Input(shape=(224, 224, 3))
  
    x = keras.layers.Conv2D(8, 3,padding='valid',activation='selu', kernel_initializer='lecun_normal',dtype=policy)(input_img)
    x = keras.layers.Conv2D(8, 3,padding='valid',activation='selu', kernel_initializer='lecun_normal',dtype=policy)(x)
    x = keras.layers.MaxPooling2D(2)(x)
    x = keras.layers.BatchNormalization()(x)

    x = keras.layers.Conv2D(16, 3,padding='valid',activation='selu', kernel_initializer='lecun_normal',dtype=policy)(x)
    x = keras.layers.Conv2D(16, 3,padding='valid',activation='selu', kernel_initializer='lecun_normal',dtype=policy)(x)
    x = keras.layers.MaxPooling2D(2)(x)
    x = keras.layers.BatchNormalization()(x)
    
    x = keras.layers.Conv2D(16, 2,padding='valid',activation='selu', kernel_initializer='lecun_normal',dtype=policy)(x)
    x = keras.layers.Conv2D(16, 3,padding='valid',activation='selu', kernel_initializer='lecun_normal',dtype=policy)(x)
    x = keras.layers.MaxPooling2D(2)(x)
    x = keras.layers.BatchNormalization()(x)
    
    x = keras.layers.Conv2D(32, 2,padding='valid',activation='selu', kernel_initializer='lecun_normal',dtype=policy)(x)
    x = keras.layers.Conv2D(32, 3,padding='valid',activation='selu', kernel_initializer='lecun_normal',dtype=policy)(x)
    x = keras.layers.MaxPooling2D(2)(x)
    x = keras.layers.BatchNormalization()(x)    

    x = keras.layers.Conv2D(16, 1, activation="selu",kernel_initializer="lecun_normal",dtype=policy)(x)
    x = keras.layers.Conv2D(1, 1, activation='linear')(x)
    x_map = keras.activations.sigmoid(x)

    y = keras.layers.GlobalMaxPooling2D()(x_map)
    
    if optimizer == "SGD":
        #looks like pretty good but noisy results with no momentum, lets check 0.9...
        optimizer = keras.optimizers.SGD(lr=learning_rate)
    if optimizer == "RMSprop":
        optimizer = keras.optimizers.RMSprop(learning_rate=learning_rate)
    if optimizer == "Adam":
        optimizer = keras.optimizers.Adam(learning_rate=learning_rate)

    if out_type == "classify":
        model = keras.models.Model(inputs=input_img, outputs=y)
    if out_type == "map":
        model = keras.models.Model(inputs=input_img, outputs=x_map)
    model.compile(loss="binary_crossentropy",optimizer=optimizer, metrics=['accuracy','AUC'])

    return model


In [10]:
map_model = build_model("Adam", 1e-3, "map")
model = build_model("Adam", 1e-3, "classify")
model.summary()

Your GPU may run slowly with dtype policy mixed_float16 because it does not have compute capability of at least 7.0. Your GPU:
  Tesla P100-PCIE-16GB, compute capability 6.0
See https://developer.nvidia.com/cuda-gpus for a list of GPUs and their compute capabilities.
Instructions for updating:
Use tf.keras.mixed_precision.LossScaleOptimizer instead. LossScaleOptimizer now has all the functionality of DynamicLossScale
Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
conv2d_10 (Conv2D)           (None, 222, 222, 8)       224       
_________________________________________________________________
conv2d_11 (Conv2D)           (None, 220, 220, 8)       584       
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None

## CASTING DATA

### Pre-processing


In [4]:
import os
import tarfile
import shutil
import numpy as np

dataset_dir = '/content/drive/MyDrive/Major Project/Datasets/datasets'

model_dir = '/content/drive/MyDrive/Major Project/Models/V1'

if not os.path.exists(model_dir):
    os.mkdir(model_dir)

In [5]:
dataset = os.path.join(dataset_dir, 'casting_dataset')

In [6]:
generator = keras.preprocessing.image.ImageDataGenerator(
                            rotation_range=10,
                            width_shift_range=0.2,
                            height_shift_range=0.2,
                            brightness_range = [0.5,1.0],
                            zoom_range=0.1,
                            rescale=1./255,
                            fill_mode="nearest",
                            cval=1.0,
                            horizontal_flip=True,
                            vertical_flip=True,
                            validation_split=0.1,
                            dtype='float64',
                        )


In [None]:
print(len(os.listdir(os.path.join(dataset, 'train', 'def_front'))))
print(len(os.listdir(os.path.join(dataset, 'train', 'ok_front'))))
print(len(os.listdir(os.path.join(dataset, 'test', 'def_front'))))
print(len(os.listdir(os.path.join(dataset, 'test', 'ok_front'))))

3753
2734
421
313


In [7]:
batch_size = 64

train_generator = generator.flow_from_directory( os.path.join(dataset, 'train'), 
                                                target_size=(224, 224), 
                                                batch_size=batch_size,
                                                class_mode='binary',
                                                subset='training',
                                                shuffle=True) 
                                            
val_generator = generator.flow_from_directory( os.path.join(dataset, 'train'), 
                                                target_size=(224, 224), 
                                                batch_size=batch_size,
                                                class_mode='binary',
                                                subset='validation',
                                                shuffle=True) 


Found 5839 images belonging to 2 classes.
Found 648 images belonging to 2 classes.


### Training model

In [11]:
epochs = 100

reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', patience=5, min_lr = 1e-7, factor=0.5, verbose=1)

history = model.fit(train_generator, 
                    epochs= epochs,
                    validation_data = val_generator,
                    verbose=1,
                    callbacks=[reduce_lr]) 

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100

Epoch 00052: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100

Epoch 00060: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Ep

In [12]:
model.save(os.path.join(model_dir, 'casting_V3(100 eps).h5'))

In [None]:
import matplotlib.pyplot as plt


plt.plot(history.history['auc'])
plt.plot(history.history['val_auc'])
plt.show()
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.show()
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.show()


### Testing the model

In [None]:
model = keras.models.load_model(os.path.join(model_dir, 'casting_V2.h5'))
model.summary()

Your GPU may run slowly with dtype policy mixed_float16 because it does not have compute capability of at least 7.0. Your GPU:
  Tesla K80, compute capability 3.7
See https://developer.nvidia.com/cuda-gpus for a list of GPUs and their compute capabilities.
Model: "model_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_4 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
conv2d_30 (Conv2D)           (None, 222, 222, 8)       224       
_________________________________________________________________
conv2d_31 (Conv2D)           (None, 220, 220, 8)       584       
_________________________________________________________________
max_pooling2d_12 (MaxPooling (None, 110, 110, 8)       0         
_________________________________________________________________
batch_normalization_12 (Batc (None, 110, 110, 8)       32        


In [14]:
batch_size = 64

test_generator = generator.flow_from_directory( os.path.join(dataset, 'test'), 
                                                target_size=(224, 224), 
                                                batch_size=batch_size,
                                                class_mode='binary') 

Found 734 images belonging to 2 classes.


In [15]:
attribute_to_idx = test_generator.class_indices
idx_to_attribute = {value:key for key,value in attribute_to_idx.items()}

print(attribute_to_idx)
print(idx_to_attribute)

{'def_front': 0, 'ok_front': 1}
{0: 'def_front', 1: 'ok_front'}


In [16]:
scores = model.evaluate_generator(test_generator, verbose=1)
scores_keys = ['loss', 'accuracy', 'auc']

for key,score in zip(scores_keys, scores):

    print(key, ':', score)

"""
50 eps

loss : 0.11778640002012253
accuracy : 0.9632152318954468
auc : 0.9908440709114075
"""

"""
100 eps

loss : 0.04719378054141998
accuracy : 0.9863760471343994
auc : 0.9984898567199707
"""



loss : 0.04719378054141998
accuracy : 0.9863760471343994
auc : 0.9984898567199707


'\n50 eps\n\nloss : 0.11778640002012253\naccuracy : 0.9632152318954468\nauc : 0.9908440709114075\n'

In [None]:
from google.colab.patches import cv2_imshow

for idx, (images, output) in enumerate(test_generator):

    if idx == 2:
        break

    for i in range(batch_size):

        image = images[i]

        cv2_imshow(image * 255)

        preds = model.predict(np.expand_dims(image, axis=0))[0][0]

        actual_value = output[i]

        predicted_value = (preds > 0.75).astype(np.int)

        if predicted_value == 0:

            confidence = (1- preds) * 100
        else:
            confidence = preds * 100

        print("Confidence", confidence, "%")
        print("Actual_value", idx_to_attribute[int(actual_value)])
        print("Predicted value", idx_to_attribute[int(predicted_value)])
    
    idx += 1

#### Get Activation maps

In [17]:
def get_img_array(img_path, size):
    # `img` is a PIL image of size 299x299
    img = keras.preprocessing.image.load_img(img_path, target_size=size)
    # `array` is a float32 Numpy array of shape (299, 299, 3)
    array = keras.preprocessing.image.img_to_array(img)
    # We add a dimension to transform our array into a "batch"
    # of size (1, 299, 299, 3)
    array = np.expand_dims(array, axis=0)
    return array


def make_gradcam_heatmap(
    img_array, model, last_conv_layer, classifier_layer
):
    # First, we create a model that maps the input image to the activations
    # of the last conv layer
    
    last_conv_layer_model = keras.Model(model.inputs, last_conv_layer.output)

    # Second, we create a model that maps the activations of the last conv
    # layer to the final class predictions
    classifier_input = keras.Input(shape=last_conv_layer.output.shape[1:])
    x = classifier_input
    x = classifier_layer(x)
    classifier_model = keras.Model(classifier_input, x)

    # Then, we compute the gradient of the top predicted class for our input image
    # with respect to the activations of the last conv layer
    with tf.GradientTape() as tape:
        # Compute activations of the last conv layer and make the tape watch it
        last_conv_layer_output = last_conv_layer_model(img_array)
        tape.watch(last_conv_layer_output)
        # Compute class predictions
        preds = classifier_model(last_conv_layer_output)
        top_pred_index = tf.argmax(preds[0])
        top_class_channel = preds[:, top_pred_index]

    # This is the gradient of the top predicted class with regard to
    # the output feature map of the last conv layer
    grads = tape.gradient(top_class_channel, last_conv_layer_output)

    # This is a vector where each entry is the mean intensity of the gradient
    # over a specific feature map channel
    pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))

    # We multiply each channel in the feature map array
    # by "how important this channel is" with regard to the top predicted class
    last_conv_layer_output = last_conv_layer_output.numpy()[0]
    pooled_grads = pooled_grads.numpy()
    for i in range(pooled_grads.shape[-1]):
        last_conv_layer_output[:, :, i] *= pooled_grads[i]

    # The channel-wise mean of the resulting feature map
    # is our heatmap of class activation
    heatmap = np.mean(last_conv_layer_output, axis=-1)

    # For visualization purpose, we will also normalize the heatmap between 0 & 1
    heatmap = np.maximum(heatmap, 0) / np.max(heatmap)
    return heatmap

In [None]:
from google.colab.patches import cv2_imshow
import matplotlib.pyplot as plt
import matplotlib.cm as cm

last_conv_layer = model.layers[-3]
classifier_layer = model.layers[-1]

for idx, (images, output) in enumerate(test_generator):

    if idx == 1:
        break

    for i in range(batch_size):
        img = images[i] * 255

        image = np.expand_dims(images[i], axis=0)

        preds = model.predict(image, verbose=1)[0][0]

        actual_value = output[i]

        predicted_value = (preds > 0.6).astype(np.int)

        if predicted_value == 0:

            confidence = (1- preds) * 100
        else:
            confidence = preds * 100

       
        # Generate class activation heatmap
        heatmap = make_gradcam_heatmap(
            image, model, last_conv_layer, classifier_layer
        )


        # We rescale heatmap to a range 0-255
        heatmap = np.uint8(255 * heatmap)

        # We use jet colormap to colorize heatmap
        jet = cm.get_cmap("jet")

        # We use RGB values of the colormap
        jet_colors = jet(np.arange(256))[:, :3]
        jet_heatmap = jet_colors[heatmap]

        # We create an image with RGB colorized heatmap
        jet_heatmap = keras.preprocessing.image.array_to_img(jet_heatmap)
        jet_heatmap = jet_heatmap.resize((img.shape[1], img.shape[0]))
        jet_heatmap = keras.preprocessing.image.img_to_array(jet_heatmap)

        # Superimpose the heatmap on original image
        superimposed_img = jet_heatmap * 0.6 + img
        superimposed_img = keras.preprocessing.image.array_to_img(superimposed_img)

        plt.figure(figsize=(5,5))
        plt.imshow(superimposed_img)
        plt.show()

        print("Confidence", confidence, "%")
        print("Actual_value", idx_to_attribute[int(actual_value)])
        print("Predicted value", idx_to_attribute[int(predicted_value)])
    
        
    idx += 1

## SOLAR PANEL DATA

### Pre-processing

In [None]:
import os
import tarfile
import shutil
import numpy as np

dataset_dir = '/content/drive/MyDrive/Major Project/Datasets/datasets'

model_dir = '/content/drive/MyDrive/Major Project/Models/V1'

if not os.path.exists(model_dir):
    os.mkdir(model_dir)

In [None]:
filename = 'solar_panels_products.tar.xz'
tar_file = os.path.join(dataset_dir, filename)

my_tar = tarfile.open(tar_file)
my_tar.extractall(dataset_dir) # specify which folder to extract to
my_tar.close()

In [None]:
dataset = os.path.join(dataset_dir, 'solar_panels_products')

In [None]:
generator = keras.preprocessing.image.ImageDataGenerator(
                            
                            rotation_range=10,
                            width_shift_range=0.1,
                            height_shift_range=0.1,
                            brightness_range = [0.5,1.0],
                            zoom_range=0.1,
                            rescale=1./255,
                            fill_mode="nearest",
                            cval=0.0,
                            horizontal_flip=True,
                            vertical_flip=True,
                            validation_split=0.1,
                            dtype='float64',
                        )


In [None]:
batch_size = 64

train_generator = generator.flow_from_directory( os.path.join(dataset, 'train'), 
                                                target_size=(224, 224), 
                                                batch_size=batch_size,
                                                class_mode='binary',
                                                subset='training',
                                                shuffle=True) 
                                            
val_generator = generator.flow_from_directory( os.path.join(dataset, 'train'), 
                                                target_size=(224, 224), 
                                                batch_size=batch_size,
                                                class_mode='binary',
                                                subset='validation',
                                                shuffle=True) 

### Training the model

In [None]:
epochs = 100

history = model.fit(train_generator, 
                    epochs= epochs,
                    validation_data = val_generator,
                    verbose=1)

In [None]:
import matplotlib.pyplot as plt


plt.plot(history.history['auc'])
plt.plot(history.history['val_auc'])
plt.show()
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.show()

In [None]:
model.save(os.path.join(model_dir, 'casting.h5'))

### Testing the model

In [None]:
test_generator = generator.flow_from_directory( os.path.join(dataset, 'test'), 
                                                target_size=(224, 224), 
                                                batch_size=batch_size,
                                                class_mode='binary',
                                                shuffle=True) 

In [None]:
attribute_to_idx = test_generator.class_indices
idx_to_attribute = {value:key for key,value in attribute_to_idx.items()}

print(attribute_to_idx)
print(idx_to_attribute)

In [None]:
scores = model.evaluate_generator(test_generator, verbose=1)
print(scores)

In [None]:
from google.colab.patches import cv2_imshow

for idx, (images, output) in enumerate(test_generator):

    if idx == 1:
        break

    for i in range(batch_size):

        image = images[i]

        cv2_imshow(image * 255)

        confidence = model.predict(np.expand_dims(image, axis=0))[0]

        actual_value = output[i]
        
        predicted_value = (confidence > 0.5).astype(np.int)

        print("Confidence", confidence)
        print(idx_to_attribute[int(actual_value)])
        print(idx_to_attribute[int(predicted_value)])
    
    idx += 1

## STEEL DEFECT DATA

### Pre-processing

In [None]:
import os
import tarfile
import shutil
import numpy as np

dataset_dir = '/content/drive/MyDrive/Major Project/Datasets/datasets'

model_dir = '/content/drive/MyDrive/Major Project/Models/V1'

if not os.path.exists(model_dir):
    os.mkdir(model_dir)

In [None]:
filename = 'solar_panels_products.tar.xz'
tar_file = os.path.join(dataset_dir, filename)

my_tar = tarfile.open(tar_file)
my_tar.extractall(dataset_dir) # specify which folder to extract to
my_tar.close()

In [None]:
dataset = os.path.join(dataset_dir, 'casting_dataset')

generator = keras.preprocessing.image.ImageDataGenerator(
                            
                            rotation_range=10,
                            width_shift_range=0.1,
                            height_shift_range=0.1,
                            brightness_range = [0.5,1.0],
                            zoom_range=0.1,
                            rescale=1./255,
                            fill_mode="nearest",
                            cval=0.0,
                            horizontal_flip=True,
                            vertical_flip=True,
                            validation_split=0.1,
                            dtype='float64',
                        )


In [None]:
batch_size = 32

train_generator = generator.flow_from_directory( os.path.join(dataset, 'train'), 
                                                target_size=(224, 224), 
                                                batch_size=batch_size,
                                                class_mode='binary',
                                                subset='training') 
                                            
val_generator = generator.flow_from_directory( os.path.join(dataset, 'train'), 
                                                target_size=(224, 224), 
                                                batch_size=batch_size,
                                                class_mode='binary',
                                                subset='validation') 

test_generator = generator.flow_from_directory( os.path.join(dataset, 'test'), 
                                                target_size=(224, 224), 
                                                batch_size=batch_size,
                                                class_mode='binary') 

### Training the model

In [None]:
epochs = 100

history = model.fit(train_generator, 
                    epochs= epochs,
                    validation_data = val_generator,
                    verbose=1)

In [None]:
import matplotlib.pyplot as plt


plt.plot(history.history['auc'])
plt.plot(history.history['val_auc'])
plt.show()
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.show()

In [None]:
model.save(os.path.join(model_dir, 'casting.h5'))

### Testing the model

In [None]:
attribute_to_idx = test_generator.class_indices
idx_to_attribute = {value:key for key,value in attribute_to_idx.items()}

print(attribute_to_idx)
print(idx_to_attribute)

In [None]:
scores = model.evaluate_generator(test_generator, verbose=1)
print(scores)

In [None]:
from google.colab.patches import cv2_imshow

for idx, (images, output) in enumerate(test_generator):

    if idx == 1:
        break

    for i in range(batch_size):

        image = images[i]

        cv2_imshow(image * 255)

        confidence = model.predict(np.expand_dims(image, axis=0))[0]

        actual_value = output[i]
        
        predicted_value = (confidence > 0.5).astype(np.int)

        print("Confidence", confidence)
        print(idx_to_attribute[int(actual_value)])
        print(idx_to_attribute[int(predicted_value)])
    
    idx += 1

## SURFACE DEFECT DATA

### Pre-processing

In [None]:
import os
import tarfile
import shutil
import numpy as np

dataset_dir = '/content/drive/MyDrive/Major Project/Datasets/datasets'

model_dir = '/content/drive/MyDrive/Major Project/Models/V1'

if not os.path.exists(model_dir):
    os.mkdir(model_dir)

In [None]:
filename = 'solar_panels_products.tar.xz'
tar_file = os.path.join(dataset_dir, filename)

my_tar = tarfile.open(tar_file)
my_tar.extractall(dataset_dir) # specify which folder to extract to
my_tar.close()

In [None]:
dataset = os.path.join(dataset_dir, 'casting_dataset')

generator = keras.preprocessing.image.ImageDataGenerator(
                            
                            rotation_range=10,
                            width_shift_range=0.1,
                            height_shift_range=0.1,
                            brightness_range = [0.5,1.0],
                            zoom_range=0.1,
                            rescale=1./255,
                            fill_mode="nearest",
                            cval=0.0,
                            horizontal_flip=True,
                            vertical_flip=True,
                            validation_split=0.1,
                            dtype='float64',
                        )


In [None]:
batch_size = 32

train_generator = generator.flow_from_directory( os.path.join(dataset, 'train'), 
                                                target_size=(224, 224), 
                                                batch_size=batch_size,
                                                class_mode='binary',
                                                subset='training') 
                                            
val_generator = generator.flow_from_directory( os.path.join(dataset, 'train'), 
                                                target_size=(224, 224), 
                                                batch_size=batch_size,
                                                class_mode='binary',
                                                subset='validation') 

test_generator = generator.flow_from_directory( os.path.join(dataset, 'test'), 
                                                target_size=(224, 224), 
                                                batch_size=batch_size,
                                                class_mode='binary') 

### Training the model

In [None]:
epochs = 100

history = model.fit(train_generator, 
                    epochs= epochs,
                    validation_data = val_generator,
                    verbose=1)

In [None]:
import matplotlib.pyplot as plt


plt.plot(history.history['auc'])
plt.plot(history.history['val_auc'])
plt.show()
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.show()

In [None]:
model.save(os.path.join(model_dir, 'casting.h5'))

### Testing the model

In [None]:
attribute_to_idx = test_generator.class_indices
idx_to_attribute = {value:key for key,value in attribute_to_idx.items()}

print(attribute_to_idx)
print(idx_to_attribute)

In [None]:
scores = model.evaluate_generator(test_generator, verbose=1)
print(scores)

In [None]:
from google.colab.patches import cv2_imshow

for idx, (images, output) in enumerate(test_generator):

    if idx == 1:
        break

    for i in range(batch_size):

        image = images[i]

        cv2_imshow(image * 255)

        confidence = model.predict(np.expand_dims(image, axis=0))[0]

        actual_value = output[i]
        
        predicted_value = (confidence > 0.5).astype(np.int)

        print("Confidence", confidence)
        print(idx_to_attribute[int(actual_value)])
        print(idx_to_attribute[int(predicted_value)])
    
    idx += 1

## WELDING DATA

### Pre-processing

In [None]:
import os
import tarfile
import shutil
import numpy as np

dataset_dir = '/content/drive/MyDrive/Major Project/Datasets/datasets'

model_dir = '/content/drive/MyDrive/Major Project/Models/V1'

if not os.path.exists(model_dir):
    os.mkdir(model_dir)

In [None]:
filename = 'solar_panels_products.tar.xz'
tar_file = os.path.join(dataset_dir, filename)

my_tar = tarfile.open(tar_file)
my_tar.extractall(dataset_dir) # specify which folder to extract to
my_tar.close()

In [None]:
dataset = os.path.join(dataset_dir, 'casting_dataset')

generator = keras.preprocessing.image.ImageDataGenerator(
                            
                            rotation_range=10,
                            width_shift_range=0.1,
                            height_shift_range=0.1,
                            brightness_range = [0.5,1.0],
                            zoom_range=0.1,
                            rescale=1./255,
                            fill_mode="nearest",
                            cval=0.0,
                            horizontal_flip=True,
                            vertical_flip=True,
                            validation_split=0.1,
                            dtype='float64',
                        )


In [None]:
batch_size = 32

train_generator = generator.flow_from_directory( os.path.join(dataset, 'train'), 
                                                target_size=(224, 224), 
                                                batch_size=batch_size,
                                                class_mode='binary',
                                                subset='training') 
                                            
val_generator = generator.flow_from_directory( os.path.join(dataset, 'train'), 
                                                target_size=(224, 224), 
                                                batch_size=batch_size,
                                                class_mode='binary',
                                                subset='validation') 

test_generator = generator.flow_from_directory( os.path.join(dataset, 'test'), 
                                                target_size=(224, 224), 
                                                batch_size=batch_size,
                                                class_mode='binary') 

### Training the model

In [None]:
epochs = 100

history = model.fit(train_generator, 
                    epochs= epochs,
                    validation_data = val_generator,
                    verbose=1)

In [None]:
import matplotlib.pyplot as plt


plt.plot(history.history['auc'])
plt.plot(history.history['val_auc'])
plt.show()
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.show()

In [None]:
model.save(os.path.join(model_dir, 'casting.h5'))

### Testing the model

In [None]:
attribute_to_idx = test_generator.class_indices
idx_to_attribute = {value:key for key,value in attribute_to_idx.items()}

print(attribute_to_idx)
print(idx_to_attribute)

In [None]:
scores = model.evaluate_generator(test_generator, verbose=1)
print(scores)

In [None]:
from google.colab.patches import cv2_imshow

for idx, (images, output) in enumerate(test_generator):

    if idx == 1:
        break

    for i in range(batch_size):

        image = images[i]

        cv2_imshow(image * 255)

        confidence = model.predict(np.expand_dims(image, axis=0))[0]

        actual_value = output[i]
        
        predicted_value = (confidence > 0.5).astype(np.int)

        print("Confidence", confidence)
        print(idx_to_attribute[int(actual_value)])
        print(idx_to_attribute[int(predicted_value)])
    
    idx += 1