In [1]:
import os
import json
import pandas as pd
from PIL import Image
import numpy as np

In [2]:
# Load metadata file
meta_path = 'PKLot/train/_annotations.coco.json'
with open(meta_path) as file:
    meta_train = json.load(file)


In [3]:
# Create DataFrame from the metadata
train = pd.DataFrame(meta_train['images'])
df_meta = pd.DataFrame(meta_train['annotations'])
train = pd.DataFrame.merge(train,df_meta, how='left', left_on='id', right_on='image_id')
train = train.dropna()
train['id_y'] = train['id_y'].values.astype(int)
train['category_id'] = train['category_id'].values.astype(int)
train['area'] = train['area'].values.astype(int)
train['iscrowd'] = train['iscrowd'].values.astype(int)

In [4]:
train_img_folder = 'PKLot/train'
image = []
for img in train['file_name'].unique():
    img_path = os.path.join(train_img_folder, img)
    pixels = np.array(Image.open(img_path).convert('L'))
    pixels = pixels.reshape(409600)
    image.append(pixels)

In [5]:
images = np.array(image)
print(images.shape)

(8502, 409600)


In [6]:
train_small = train[['file_name', 'bbox']].copy()
df_grouped = train_small.groupby('file_name', as_index=True).agg({'bbox': list})
bboxes = df_grouped['bbox'].values.tolist()

In [7]:
print(type(bboxes), len(bboxes), len(bboxes[0]), len(bboxes[0][0]))

<class 'list'> 8502 100 4


In [8]:
import numpy as np

max_boxes = 100
padded_bboxes = np.zeros((len(bboxes), max_boxes, 4))
for i, boxes in enumerate(bboxes):
    padded_bboxes[i, :len(boxes), :] = boxes
padded_bboxes = padded_bboxes.reshape(len(bboxes), -1)  # Form: (n_samples, 4 * max_boxes)


In [9]:
images = images.reshape(-1, 640, 640, 3)
print(images.shape)  # Soll (8502, 640, 640, 3) sein


(2834, 640, 640, 3)


In [10]:
print("Eingabeform für das Modell:", images.shape)
print("Zielwerte:", padded_bboxes.shape)


Eingabeform für das Modell: (2834, 640, 640, 3)
Zielwerte: (8502, 400)


In [None]:
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, Input

base_model = MobileNetV2(input_shape=(640, 640, 3), include_top=False, weights='imagenet')
x = Flatten()(base_model.output)
x = Dense(1000, activation='relu')(x)  # High-level feature representation
output = Dense(4 * 100, activation='linear')(x)  # Predictions for all bounding boxes
model = Model(inputs=base_model.input, outputs=output)


In [None]:
model.compile(optimizer='adam', loss='mean_squared_error')


In [None]:
history = model.fit(images, padded_bboxes, batch_size=32, epochs=10, verbose=1)
