In [5]:
import numpy as np
import pandas as pd
import cv2
from tensorflow.keras.applications import MobileNet
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import EarlyStopping
import os

# Generate Train Data
train_files = os.listdir('/kaggle/input/opencode-23-kaggle-competition/train/Regression')
train_data = []

for file in train_files:
    if file.endswith('.jpg'):
        img_id = file.split('.')[0]
        target = int(img_id.split('_')[0])
        train_data.append({'id': img_id, 'target': target})

train_df = pd.DataFrame(train_data)

# Load images and preprocess
train_images = []
train_targets = []

for idx, row in train_df.iterrows():
    img_path = f"/kaggle/input/opencode-23-kaggle-competition/train/Regression/{row['id']}.jpg"
    img = cv2.imread(img_path)
    img = cv2.resize(img, (224, 224))  # Resize to 224x224 for MobileNet
    train_images.append(img)
    train_targets.append(row['target'])

train_images = np.array(train_images) / 255.0  # Normalize
train_targets = np.array(train_targets)

# Split data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(train_images, train_targets, test_size=0.2, random_state=42)

# Load MobileNet base model
base_model = MobileNet(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Build model on top of MobileNet
model = Sequential([
    base_model,
    GlobalAveragePooling2D(),
    Dense(128, activation='relu'),
    Dense(1)
])

model.compile(optimizer='adam', loss='mse')

# Early stopping
early_stop = EarlyStopping(monitor='val_loss', patience=3)

# Train model
model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=20, batch_size=32, callbacks=[early_stop])


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet/mobilenet_1_0_224_tf_no_top.h5
[1m17225924/17225924[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Epoch 1/20
[1m106/106[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 252ms/step - loss: 1323.1602 - val_loss: 1927.7998
Epoch 2/20
[1m106/106[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 76ms/step - loss: 119.4404 - val_loss: 501.1116
Epoch 3/20
[1m106/106[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 75ms/step - loss: 92.0397 - val_loss: 285.7320
Epoch 4/20
[1m106/106[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 76ms/step - loss: 69.2372 - val_loss: 205.6730
Epoch 5/20
[1m106/106[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 76ms/step - loss: 49.5162 - val_loss: 252.3335
Epoch 6/20
[1m106/106[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 75ms/step - loss: 50.3368 - val_loss: 107.9077
Epoch 7/20
[1m106/106[0m [32m━━━━━━━━━

<keras.src.callbacks.history.History at 0x7bed389c0ac0>

In [7]:
test_files = os.listdir('/kaggle/input/opencode-23-kaggle-competition/test/Regression')
test_data = [file.split('.')[0] for file in test_files if file.endswith('.jpg')]
test_df = pd.DataFrame(test_data, columns=['id'])

# Predict
test_images = []
for idx, row in test_df.iterrows():
    img_path = f"/kaggle/input/opencode-23-kaggle-competition/test/Regression/{row['id']}.jpg"
    img = cv2.imread(img_path)
    img = cv2.resize(img, (224, 224))
    test_images.append(img)

test_images = np.array(test_images) / 255.0

predictions = model.predict(test_images).flatten()

# Create submission file
submission_df = pd.DataFrame({
    'id': test_df['id'],
    'target': predictions
})

submission_df.to_csv('/kaggle/working/submission_2.csv', index=False)

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step
