In [5]:
import pandas as pd

# Load the images and labels from the parquet files
images_df = pd.read_parquet('images.parquet')
labels_df = pd.read_parquet('labels.parquet')

# Inspect the loaded data
print("Images DataFrame:")
print(images_df.head(1))
print("\nLabels DataFrame:")
print(labels_df.head(20))

Images DataFrame:
   id                                              image
0   0  b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...

Labels DataFrame:
    image_id    x    y  orientation  radius  class
0          0  269  450     0.000000      17      0
1          0  533  299     0.663225      45      1
2          0  539  427     0.610865      46      1
3          0  365  148     0.488692      45      1
4          0  472  136     2.426008      40      1
5          0  846  448     4.660029      41      1
6          0  613  248     6.003933      41      1
7          0  287  214     2.391101      48      1
8          0  657  387     5.393067      44      1
9          0  752  311     2.792527      41      1
10         0  657  134     2.565634      49      1
11         0  699  245     3.019420      38      1
12         0  308  382     4.537856      39      2
13         0  926  116     6.021386      39      2
14         0  398  398     0.506145      42      2
15         0  774  120     3.3335

In [6]:
import cv2
import numpy as np

def decode_image(image_data):
    nparr = np.frombuffer(image_data, np.uint8)
    img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
    return img

images_df['image'] = images_df['image'].apply(decode_image)

In [7]:
# Inspect the loaded data
print("Images DataFrame shape:")
print(images_df.shape)
print("First image")
print(images_df.head(1))
print("\nLabels DataFrame shape:")
print(labels_df.shape)
print("\nFirst label:")
print(labels_df.head(10))
# Access the image array of the first row
first_image = images_df['image'].iloc[0]

print(f"\nShape of the first image: {first_image.shape}")

Images DataFrame shape:
(500, 2)
First image
   id                                              image
0   0  [[[17, 123, 57], [18, 124, 58], [18, 124, 58],...

Labels DataFrame shape:
(11500, 6)

First label:
   image_id    x    y  orientation  radius  class
0         0  269  450     0.000000      17      0
1         0  533  299     0.663225      45      1
2         0  539  427     0.610865      46      1
3         0  365  148     0.488692      45      1
4         0  472  136     2.426008      40      1
5         0  846  448     4.660029      41      1
6         0  613  248     6.003933      41      1
7         0  287  214     2.391101      48      1
8         0  657  387     5.393067      44      1
9         0  752  311     2.792527      41      1

Shape of the first image: (512, 1024, 3)


In [8]:
def scale_image(image, scale_factor=0.5):
    width = int(image.shape[1] * scale_factor)
    height = int(image.shape[0] * scale_factor)
    resized_image = cv2.resize(image, (width, height))
    return resized_image

images_df['scaled_image'] = images_df['image'].apply(scale_image)

# Access the image array of the first row
first_image_scaled = images_df['scaled_image'].iloc[0]

print(f"\nShape of the first SCALED image: {first_image_scaled.shape}")

print("images_df", images_df.head(10))
# print("val_images", len(val_images))


Shape of the first SCALED image: (256, 512, 3)
images_df    id                                              image  \
0   0  [[[17, 123, 57], [18, 124, 58], [18, 124, 58],...   
1   1  [[[17, 123, 57], [18, 124, 58], [18, 124, 58],...   
2   2  [[[17, 123, 57], [18, 124, 58], [18, 124, 58],...   
3   3  [[[17, 123, 57], [18, 124, 58], [18, 124, 58],...   
4   4  [[[17, 123, 57], [18, 124, 58], [18, 124, 58],...   
5   5  [[[17, 123, 57], [18, 124, 58], [18, 124, 58],...   
6   6  [[[17, 123, 57], [18, 124, 58], [18, 124, 58],...   
7   7  [[[17, 123, 57], [18, 124, 58], [18, 124, 58],...   
8   8  [[[17, 123, 57], [18, 124, 58], [18, 124, 58],...   
9   9  [[[17, 123, 57], [18, 124, 58], [18, 124, 58],...   

                                        scaled_image  
0  [[[18, 124, 58], [18, 124, 58], [17, 123, 57],...  
1  [[[18, 124, 58], [18, 124, 58], [17, 123, 57],...  
2  [[[18, 124, 58], [18, 124, 58], [17, 123, 57],...  
3  [[[18, 124, 58], [18, 124, 58], [17, 123, 57],...  
4  [[[

In [9]:
print("Missing values in 'scaled_image' column of images_df:")
print(images_df['scaled_image'].isnull().sum())

print("Missing values in 'image' column of images_df:")
print(images_df['image'].isnull().sum())

Missing values in 'scaled_image' column of images_df:
0
Missing values in 'image' column of images_df:
0


In [10]:
from sklearn.model_selection import train_test_split

# Create a list to store the merged DataFrames for each image
merged_dfs = []

for image_id in images_df['id'].unique():
    image_data = images_df[images_df['id'] == image_id]
    label_data = labels_df[labels_df['image_id'] == image_id]
    
    merged_df = pd.merge(image_data, label_data, left_on='id', right_on='image_id', how='inner')
    merged_dfs.append(merged_df)

final_df = pd.concat(merged_dfs, ignore_index=True)

# Split the data into training and validation sets
train_data, val_data = train_test_split(final_df, test_size=0.2, random_state=42)

# Separate the images and labels for training and validation sets
train_images = train_data['scaled_image'].tolist()
val_images = val_data['scaled_image'].tolist()
train_labels = train_data.drop(['id', 'scaled_image'], axis=1)
val_labels = val_data.drop(['id', 'scaled_image'], axis=1)

print("Length of train_images:", len(train_images))
print("Length of val_images:", len(val_images))
print("Length of train_labels:", len(train_labels))
print("Length of val_labels:", len(val_labels))
print("First train data sample", train_data.head(1))

Length of train_images: 9200
Length of val_images: 2300
Length of train_labels: 9200
Length of val_labels: 2300
First train data sample         id                                              image  \
11351  493  [[[17, 123, 57], [18, 124, 58], [18, 124, 58],...   

                                            scaled_image  image_id    x    y  \
11351  [[[18, 124, 58], [18, 124, 58], [17, 123, 57],...       493  864  115   

       orientation  radius  class  
11351          0.0      47      2  


In [11]:
from tensorflow.keras.utils import to_categorical

# Extract the label values and normalize coordinates and radius
X_train = np.array(train_images)
y_train = []

for _, row in train_data.iterrows():
    scaled_image = row['scaled_image']
    
    # Extract the label values
    x = row['x'] / scaled_image.shape[1]  # Normalize x coordinates
    y = row['y'] / scaled_image.shape[0]  # Normalize y coordinates
    orientation = row['orientation']
    radius = row['radius'] / (scaled_image.shape[0] + scaled_image.shape[1]) / 2  # Normalize radius
    class_label = row['class']
    
    # One-hot encode the class label
    class_one_hot = to_categorical(class_label, num_classes=3)  # Assuming you have 3 classes
    
    # Create the label array
    label = np.concatenate(([x, y, orientation, radius], class_one_hot))
    y_train.append(label)

y_train = np.array(y_train)

# Repeat the process for validation data
X_val = np.array(val_images)
y_val = []

for _, row in val_data.iterrows():
    scaled_image = row['scaled_image']
    
    # Extract the label values
    x = row['x'] / scaled_image.shape[1]  # Normalize x coordinates
    y = row['y'] / scaled_image.shape[0]  # Normalize y coordinates
    orientation = row['orientation']
    radius = row['radius'] / (scaled_image.shape[0] + scaled_image.shape[1]) / 2  # Normalize radius
    class_label = row['class']
    
    # One-hot encode the class label
    class_one_hot = to_categorical(class_label, num_classes=3)  # Assuming you have 3 classes
    
    # Create the label array
    label = np.concatenate(([x, y, orientation, radius], class_one_hot))
    y_val.append(label)

y_val = np.array(y_val)

print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
print("X_val shape:", X_val.shape)
print("y_val shape:", y_val.shape)

X_train shape: (9200, 256, 512, 3)
y_train shape: (9200, 7)
X_val shape: (2300, 256, 512, 3)
y_val shape: (2300, 7)


In [102]:
!pip install tensorflow

Python(22036) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.9/bin/python3.9 -m pip install --upgrade pip' command.[0m


In [12]:
import tensorflow as tf
from tensorflow.keras import layers

def create_cnn_backbone(input_shape):
    inputs = layers.Input(shape=input_shape)
    
    x = layers.Conv2D(32, 3, padding='same', activation='relu')(inputs)
    print("Conv2D_1 output shape:", x.shape)
    x = layers.MaxPooling2D()(x)
    print("MaxPooling2D_1 output shape:", x.shape)
    x = layers.Conv2D(64, 3, padding='same', activation='relu')(x)
    print("Conv2D_2 output shape:", x.shape)
    x = layers.MaxPooling2D()(x)
    print("MaxPooling2D_2 output shape:", x.shape)
    x = layers.Conv2D(128, 3, padding='same', activation='relu')(x)
    print("Conv2D_3 output shape:", x.shape)
    x = layers.MaxPooling2D()(x)
    print("MaxPooling2D_3 output shape:", x.shape)
    
    x = layers.Flatten()(x)
    print("Flatten output shape:", x.shape)
    x = layers.Dense(256, activation='relu')(x)
    print("Dense_1 output shape:", x.shape)
    outputs = layers.Dense(5, activation='linear')(x)
    print("Dense_2 output shape:", outputs.shape)

    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    return model

In [17]:
from tensorflow.keras.losses import binary_crossentropy, sparse_categorical_crossentropy, mse

def yolo_loss(y_true, y_pred):
    # Extract the different components from the ground truth and predicted tensors
    x_true, y_true, orient_true, radius_true = y_true[..., 0], y_true[..., 1], y_true[..., 2], y_true[..., 3]
    class_true = y_true[..., 4:]
    x_pred, y_pred, orient_pred, radius_pred = y_pred[..., 0], y_pred[..., 1], y_pred[..., 2], y_pred[..., 3]
    class_pred = y_pred[..., 4:]
    
    # Coordinate loss
    coord_loss = mse(x_true, x_pred) + mse(y_true, y_pred)
    
    # Orientation loss
    orient_loss = mse(orient_true, orient_pred)
    
    # Radius loss
    radius_loss = mse(radius_true, radius_pred)
    
    # Class loss
    class_loss = categorical_crossentropy(class_true, class_pred)
    
    # Combine the losses
    total_loss = coord_loss + orient_loss + radius_loss + class_loss
    
    return total_loss

In [None]:
model = create_cnn_backbone(input_shape=(256, 512, 3))
model.compile(optimizer='adam', loss=yolo_loss)
model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=10, batch_size=32, verbose=1)

print(model.summary())

Conv2D_1 output shape: (None, 256, 512, 32)
MaxPooling2D_1 output shape: (None, 128, 256, 32)
Conv2D_2 output shape: (None, 128, 256, 64)
MaxPooling2D_2 output shape: (None, 64, 128, 64)
Conv2D_3 output shape: (None, 64, 128, 128)
MaxPooling2D_3 output shape: (None, 32, 64, 128)
Flatten output shape: (None, 262144)
Dense_1 output shape: (None, 256)
Dense_2 output shape: (None, 5)
Epoch 1/10
[1m 20/288[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m12:41[0m 3s/step - loss: 25730524.0000