<a href="https://colab.research.google.com/github/prasanth-33460/matrice/blob/main/matricesf_final.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
import json
import tensorflow as tf
from tensorflow.keras import layers, models
from google.colab import drive

In [2]:
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
def bifpn_block(inputs, num_channels):
    P3_in, P4_in, P5_in = inputs

    P3_up = layers.UpSampling2D()(P4_in)
    P3_out = layers.Concatenate()([P3_in, P3_up])
    P3_out = conv_block(P3_out, num_channels, 3)

    P4_up = layers.UpSampling2D()(P5_in)
    P4_out = layers.Concatenate()([P4_in, P4_up])
    P4_out = conv_block(P4_out, num_channels, 3)

    P4_down = layers.MaxPooling2D()(P3_out)
    P4_out = layers.Concatenate()([P4_out, P4_down])
    P4_out = conv_block(P4_out, num_channels, 3)

    P5_down = layers.MaxPooling2D()(P4_out)
    P5_out = layers.Concatenate()([P5_in, P5_down])
    P5_out = conv_block(P5_out, num_channels, 3)

    return P3_out, P4_out, P5_out

In [4]:
def load_annotations(annotation_path, image_dir):
    with open(annotation_path, 'r') as f:
        coco_data = json.load(f)

    annotations = coco_data['annotations']
    images = {image['id']: image['file_name'] for image in coco_data['images']}
    result = []

    for annotation in annotations:
        image_id = annotation['image_id']
        image_file = images[image_id]
        image_path = os.path.join(image_dir, image_file)
        boxes = annotation['bbox']
        category_id = annotation['category_id']
        result.append((image_path, boxes, category_id))

    return result

In [5]:
def parse_annotation(annotation, image_dir):
    if not isinstance(annotation, list):
        raise ValueError("Annotation should be a list")

    parsed_annotations = []
    for item in annotation:
        if not isinstance(item, dict):
            print(f"Unexpected item type: {type(item)} - {item}")
            continue

        image_id = item.get('image_id', None)
        if image_id is None:
            raise KeyError('Image ID not found in annotation.')

        image_filename = f"{image_dir}/{image_id}.jpg"
        objects = item.get('objects', [])

        for obj in objects:
            category = obj.get('category', None)
            bbox = obj.get('bbox', None)
            if category is None or bbox is None:
                raise KeyError('Category or bounding box not found in object.')

            parsed_annotations.append({
                'image_id': image_id,
                'image_filename': image_filename,
                'category': category,
                'bbox': bbox
            })

    return parsed_annotations

In [7]:
def conv_block(inputs, filters, kernel_size, strides=1):
    x = layers.Conv2D(filters, kernel_size, strides=strides, padding='same')(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)
    return x

In [8]:
def build_single_head_model(input_shape, num_classes, backbone_weights='imagenet'):
    inputs = layers.Input(shape=input_shape)

    backbone = tf.keras.applications.ResNet50(include_top=False, input_tensor=inputs, weights=backbone_weights)
    C3, C4, C5 = backbone.get_layer('conv3_block4_out').output, backbone.get_layer('conv4_block6_out').output, backbone.get_layer('conv5_block3_out').output

    num_channels = 64
    P3, P4, P5 = C3, C4, C5
    for _ in range(3):
        P3, P4, P5 = bifpn_block((P3, P4, P5), num_channels)

    head = layers.Conv2D(num_classes, 1, activation='sigmoid')(P3)
    head = layers.GlobalAveragePooling2D()(head)

    model = models.Model(inputs, head)
    return model

input_shape = (512, 512, 3)
num_classes_food = 10
num_classes_appliance = 5

food_model = build_single_head_model(input_shape, num_classes_food)
appliance_model = build_single_head_model(input_shape, num_classes_appliance)

food_model.summary()
appliance_model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_2 (InputLayer)        [(None, 512, 512, 3)]        0         []                            
                                                                                                  
 conv1_pad (ZeroPadding2D)   (None, 518, 518, 3)          0         ['input_2[0][0]']             
                                                                                                  
 conv1_conv (Conv2D)         (None, 256, 256, 64)         9472      ['conv1_pad[0][0]']           
                                                                                                  
 conv1_bn (BatchNormalizati  (None, 256, 256, 64)         256       ['conv1_conv[0][0]']          
 on)                                                                                          

In [9]:
def train_step_single_model(model, images, labels):
    with tf.GradientTape() as tape:
        preds = model(images, training=True)
        loss = loss_fn(labels, preds)
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    return loss

def train_single_model(model, dataset, epochs=30):
    for epoch in range(epochs):
        for images, labels in dataset:
            loss = train_step_single_model(model, images, labels)
        print(f'Epoch {epoch + 1}, Loss: {loss.numpy()}')

In [10]:
def create_tf_dataset(annotations, image_dir):
    image_paths = []
    boxes_list = []
    labels_list = []

    for annotation in annotations:
        image_path, boxes, category_id = annotation
        image_paths.append(image_path)
        boxes_list.append(boxes)
        labels_list.append(category_id)

    def load_image(image_path):
        image = tf.io.read_file(image_path)
        image = tf.image.decode_jpeg(image, channels=3)
        image = tf.image.resize(image, [512, 512])
        return image

    image_dataset = tf.data.Dataset.from_tensor_slices(image_paths).map(load_image)
    boxes_dataset = tf.data.Dataset.from_tensor_slices(boxes_list)
    labels_dataset = tf.data.Dataset.from_tensor_slices(labels_list)

    dataset = tf.data.Dataset.zip((image_dataset, (boxes_dataset, labels_dataset)))
    return dataset

In [12]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
loss_fn = tf.keras.losses.CategoricalCrossentropy()

def train_step_single_model(model, images, labels):
    with tf.GradientTape() as tape:
        preds = model(images, training=True)
        loss = loss_fn(labels, preds)
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    return loss

def train_single_model(model, dataset, epochs=30):
    for epoch in range(epochs):
        for images, labels in dataset:
            loss = train_step_single_model(model, images, labels)
        print(f'Epoch {epoch + 1}, Loss: {loss.numpy()}')

train_single_model(food_model, food_dataset)
train_single_model(appliance_model, appliance_dataset)

NameError: name 'food_dataset' is not defined

In [13]:
def build_efficientdet_cspdarknet(input_shape, num_classes_food, num_classes_appliance):
    inputs = layers.Input(shape=input_shape)
    backbone = models.Sequential([
        layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(128, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
    ], name='backbone')

    backbone_output = backbone(inputs)
    food_output = layers.Conv2D(num_classes_food, (1, 1), activation='softmax', name='food_output')(backbone_output)
    appliance_output = layers.Conv2D(num_classes_appliance, (1, 1), activation='softmax', name='appliance_output')(backbone_output)

    model = models.Model(inputs=inputs, outputs=[food_output, appliance_output])
    return model

In [14]:

def evaluate_model(model, test_dataset):
    accuracy = tf.keras.metrics.CategoricalAccuracy()
    for images, labels in test_dataset:
        preds = model(images, training=False)
        if not tf.reduce_all(tf.equal(tf.reduce_sum(preds, axis=-1), 1.0)):
            preds = tf.nn.softmax(preds)
        if len(labels.shape) == 1 or labels.shape[-1] == 1:
            labels = tf.one_hot(labels, depth=preds.shape[-1])
        print(f'preds shape: {preds.shape}, labels shape: {labels.shape}')
        accuracy.update_state(labels, preds)
    print(f'Test Accuracy: {accuracy.result().numpy()}')

# Define the optimizer and loss function globally
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
loss_fn = tf.keras.losses.CategoricalCrossentropy()

In [15]:
def main():
    appliance_annotations_path = "/content/drive/MyDrive/hello/appliance/annotations"
    food_annotations_path = "/content/drive/MyDrive/hello/food/annotations"
    appliance_images_val_path = "/content/drive/MyDrive/hello/appliance/images/val"
    food_images_val_path = "/content/drive/MyDrive/hello/food/images/val"

    appliance_annotation_paths = [os.path.join(appliance_annotations_path, file) for file in os.listdir(appliance_annotations_path) if file.endswith('.json')]
    food_annotation_paths = [os.path.join(food_annotations_path, file) for file in os.listdir(food_annotations_path) if file.endswith('.json')]

    appliance_annotations = []
    food_annotations = []

    for annotation_path in appliance_annotation_paths:
        annotations = load_annotations(annotation_path, appliance_images_val_path)
        print(f"Loaded {len(annotations)} appliance annotations from {annotation_path}")
        appliance_annotations.extend(annotations)

    for annotation_path in food_annotation_paths:
        annotations = load_annotations(annotation_path, food_images_val_path)
        print(f"Loaded {len(annotations)} food annotations from {annotation_path}")
        food_annotations.extend(annotations)

    appliance_val_dataset = create_tf_dataset(appliance_annotations, appliance_images_val_path).batch(32).prefetch(tf.data.AUTOTUNE)
    food_val_dataset = create_tf_dataset(food_annotations, food_images_val_path).batch(32).prefetch(tf.data.AUTOTUNE)

    input_shape = (512, 512, 3)
    num_classes_food = 10
    num_classes_appliance = 5

    model = build_efficientdet_cspdarknet(input_shape, num_classes_food, num_classes_appliance)
    model.summary()

    print("Evaluating Food Model on Food Validation Set")
    evaluate_model(model, food_val_dataset)

    print("Evaluating Appliance Model on Appliance Validation Set")
    evaluate_model(model, appliance_val_dataset)

    # Training the models
    print("Training Food Model")
    train_single_model(model, food_val_dataset)

    print("Training Appliance Model")
    train_single_model(model, appliance_val_dataset)


In [16]:
if __name__ == "__main__":
    main()

Loaded 1141 appliance annotations from /content/drive/MyDrive/hello/appliance/annotations/instances_train2017.json
Loaded 148 appliance annotations from /content/drive/MyDrive/hello/appliance/annotations/instances_val2017.json
Loaded 82 appliance annotations from /content/drive/MyDrive/hello/appliance/annotations/instances_test2017.json
Loaded 287 food annotations from /content/drive/MyDrive/hello/food/annotations/instances_val2017.json
Loaded 3048 food annotations from /content/drive/MyDrive/hello/food/annotations/instances_train2017.json
Loaded 576 food annotations from /content/drive/MyDrive/hello/food/annotations/instances_test2017.json
Model: "model_2"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_4 (InputLayer)        [(None, 512, 512, 3)]        0         []                            
                                     

InvalidArgumentError: {{function_node __wrapped__Pack_N_2_device_/job:localhost/replica:0/task:0/device:CPU:0}} Shapes of all inputs must match: values[0].shape = [32,62,62,10] != values[1].shape = [32,62,62,5] [Op:Pack] name: packed