# CV Challenge Task

 ### Task 1: Preparing the data in the requested format

In [4]:
import os
import glob
import tensorflow as tf
import xml.etree.ElementTree as ET

# Function to parse XML annotations
def parse_xml(xml_file):
    tree = ET.parse(xml_file)
    root = tree.getroot()
    
    width = int(root.find('size').find('width').text)
    height = int(root.find('size').find('height').text)
    filename = root.find('filename').text
    source_id = root.find('source').find('database').text
    
    # Extract bounding box coordinates and class labels
    bboxes = []
    class_labels = []
    for obj in root.findall('object'):
        xmin = float(obj.find('bndbox').find('xmin').text)
        ymin = float(obj.find('bndbox').find('ymin').text)
        xmax = float(obj.find('bndbox').find('xmax').text)
        ymax = float(obj.find('bndbox').find('ymax').text)
        bbox = [xmin, xmax, ymin, ymax]
        bboxes.append(bbox)
        
        class_text = obj.find('name').text
        class_labels.append(class_text.encode('utf-8'))  # Encode class label to bytes
    
    return width, height, filename.encode('utf-8'), source_id.encode('utf-8'), bboxes, class_labels

# Function to encode image to raw bytes
def encode_image(image_path):
    with tf.io.gfile.GFile(image_path, 'rb') as f:
        encoded_image = f.read()
    return encoded_image

# Function to create TFRecord file
def create_tfrecord(images_dir, annotations_dir, output_file):
    writer = tf.io.TFRecordWriter(output_file)
    
    annotations = glob.glob(os.path.join(annotations_dir, '*.xml'))
    for annotation in annotations:
        image_filename = os.path.splitext(os.path.basename(annotation))[0] + '.jpg'
        word_before_underscore = image_filename.split('_')[0]
        image_path = os.path.join(images_dir,word_before_underscore, image_filename)
        print("-----------------------------")
        print(image_filename)
        print(image_path)
        if not os.path.exists(image_path):
            print(f"Image file '{image_path}' not found. Skipping...")
            continue
        
        width, height, filename, source_id, bboxes, class_labels = parse_xml(annotation)
        encoded_image = encode_image(image_path)
        
        if not encoded_image:
            print(f"Failed to encode image '{image_path}'. Skipping...")
            continue
        
        feature = {
            "image/width": tf.train.Feature(int64_list=tf.train.Int64List(value=[width])),
            "image/height": tf.train.Feature(int64_list=tf.train.Int64List(value=[height])),
            'image/filename': tf.train.Feature(bytes_list=tf.train.BytesList(value=[filename])),
            'image/source_id': tf.train.Feature(bytes_list=tf.train.BytesList(value=[source_id])),
            'image/encodedrawdata': tf.train.Feature(bytes_list=tf.train.BytesList(value=[encoded_image])),
            'image/format': tf.train.Feature(bytes_list=tf.train.BytesList(value=['jpeg'.encode('utf-8')])),
            'image/object/bbox/xmin': tf.train.Feature(float_list=tf.train.FloatList(value=[bbox[0] for bbox in bboxes])),
            'image/object/bbox/xmax': tf.train.Feature(float_list=tf.train.FloatList(value=[bbox[1] for bbox in bboxes])),
            'image/object/bbox/ymin': tf.train.Feature(float_list=tf.train.FloatList(value=[bbox[2] for bbox in bboxes])),
            'image/object/bbox/ymax': tf.train.Feature(float_list=tf.train.FloatList(value=[bbox[3] for bbox in bboxes])),
            'image/object/class/text': tf.train.Feature(bytes_list=tf.train.BytesList(value=class_labels)),
            'image/object/class/label': tf.train.Feature(int64_list=tf.train.Int64List(value=[0])),  
            'image/object/class/single': tf.train.Feature(int64_list=tf.train.Int64List(value=[1])),  
            'image/object/difficult': tf.train.Feature(int64_list=tf.train.Int64List(value=[0])),  
            'image/object/truncated': tf.train.Feature(int64_list=tf.train.Int64List(value=[0])), 
            'image/object/view': tf.train.Feature(bytes_list=tf.train.BytesList(value=[])),  
        }
        
        example = tf.train.Example(features=tf.train.Features(feature=feature))
        writer.write(example.SerializeToString())
    
    writer.close()


# Main function to create TFRecord files for train and validation sets
def main():
    train_images_dir = 'D:/Downloads/NEU-DET/train/images/'
    train_annotations_dir = 'D:/Downloads/NEU-DET/train/annotations/'
    train_output_file = 'D:/Downloads/NEU-DET/train/output.tfrecord'
    create_tfrecord(train_images_dir, train_annotations_dir, train_output_file)
    
    val_images_dir = 'D:/Downloads/NEU-DET/validation/images/'
    val_annotations_dir = 'D:/Downloads/NEU-DET/validation/annotations/'
    val_output_file = 'D:/Downloads/NEU-DET/validation/output.tfrecord'
    create_tfrecord(val_images_dir, val_annotations_dir, val_output_file)

if __name__ == "__main__":
    main()


-----------------------------
crazing_1.jpg
D:/Downloads/NEU-DET/train/images/crazing\crazing_1.jpg
-----------------------------
crazing_10.jpg
D:/Downloads/NEU-DET/train/images/crazing\crazing_10.jpg
-----------------------------
crazing_100.jpg
D:/Downloads/NEU-DET/train/images/crazing\crazing_100.jpg
-----------------------------
crazing_101.jpg
D:/Downloads/NEU-DET/train/images/crazing\crazing_101.jpg
-----------------------------
crazing_102.jpg
D:/Downloads/NEU-DET/train/images/crazing\crazing_102.jpg
-----------------------------
crazing_103.jpg
D:/Downloads/NEU-DET/train/images/crazing\crazing_103.jpg
-----------------------------
crazing_104.jpg
D:/Downloads/NEU-DET/train/images/crazing\crazing_104.jpg
-----------------------------
crazing_105.jpg
D:/Downloads/NEU-DET/train/images/crazing\crazing_105.jpg
-----------------------------
crazing_106.jpg
D:/Downloads/NEU-DET/train/images/crazing\crazing_106.jpg
-----------------------------
crazing_107.jpg
D:/Downloads/NEU-DET/tra

### Task 2: Classification

In [3]:
import tensorflow as tf
from tensorflow.keras.layers import BatchNormalization, GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import SparseCategoricalCrossentropy

# Load TFRecord datasets
train_tfrecord_path = 'D:/Downloads/NEU-DET/train/output.tfrecord'
val_tfrecord_path = 'D:/Downloads/NEU-DET/validation/output.tfrecord'

# Function to parse TFRecord dataset and extract image dimensions
def extract_input_shape(tfrecord_path):
    max_height = 0
    max_width = 0

    # Define feature description for parsing TFRecord
    feature_description = {
        'image/height': tf.io.FixedLenFeature([], tf.int64),
        'image/width': tf.io.FixedLenFeature([], tf.int64),
    }

    # Create TFRecord dataset
    dataset = tf.data.TFRecordDataset(tfrecord_path)

    # Parse TFRecord examples and extract image dimensions
    for example in dataset:
        parsed_example = tf.io.parse_single_example(example, feature_description)
        height = parsed_example['image/height']
        width = parsed_example['image/width']
        max_height = max(max_height, height)
        max_width = max(max_width, width)

    return max_height, max_width

# Extract input shape from TFRecord dataset
image_height, image_width = extract_input_shape(train_tfrecord_path)
n_color_channels = 3  # As images are not in RGB format

# Load pre-trained Xception base model
base_model = tf.keras.applications.Xception(input_shape=(image_height, image_width, n_color_channels),
                                            include_top=False,
                                            weights="imagenet")

n_classes = 6  # Defining the number of classes in the dataset

batch_size = 32
epochs = 5

# Define preprocessing function for TFRecord datasets
def preprocess_dataset(dataset):
    # Define the feature description for parsing TFRecord
    feature_description = {
        'image/encodedrawdata': tf.io.FixedLenFeature([], tf.string),
        'image/object/class/label': tf.io.FixedLenFeature([], tf.int64),
    }

    # Parse TFRecord examples and preprocess the data
    def _parse_function(example_proto):
        parsed_example = tf.io.parse_single_example(example_proto, feature_description)
        image = tf.image.decode_jpeg(parsed_example['image/encodedrawdata'], channels=3)
        image = tf.image.resize(image, [image_height, image_width])
        image = tf.cast(image, tf.float32) / 255.0  # Normalize pixel values to [0, 1]
        label = tf.cast(parsed_example['image/object/class/label'], tf.int32)
        return image, label

    dataset = dataset.map(_parse_function)
    dataset = dataset.batch(batch_size)  # Batch the dataset
    return dataset


# Extending the base model with additional layers as per the task instructions
x = base_model.output
x = BatchNormalization()(x)
x = GlobalAveragePooling2D()(x)
x = Dense(8, activation='relu')(x)
x = Dropout(0.4)(x)
output = Dense(n_classes, activation='softmax')(x)

# Create the classification model
model = Model(inputs=base_model.input, outputs=output)

# Compile the model
model.compile(optimizer=Adam(), 
              loss=SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

# Train the model
train_dataset = tf.data.TFRecordDataset(train_tfrecord_path)
val_dataset = tf.data.TFRecordDataset(val_tfrecord_path)

train_dataset = preprocess_dataset(train_dataset)
val_dataset = preprocess_dataset(val_dataset)

history = model.fit(train_dataset, 
                    epochs=epochs, 
                    validation_data=val_dataset,
                    batch_size=batch_size)

# Evaluate the model
loss, accuracy = model.evaluate(val_dataset)
print(f'Validation Loss: {loss}, Validation Accuracy: {accuracy}')


Epoch 1/5


  output, from_logits = _get_logits(


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Validation Loss: 1.3904621601104736, Validation Accuracy: 0.7666666507720947
