In [None]:
import os
import cv2
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf

# -----------------------------
# CONFIGURATION
# -----------------------------
IMG_SIZE = 128
IMAGE_FOLDER = 'dataset/images'
LABELS_CSV = 'dataset/labels.csv'
MODEL_PATH = 'image_regression_model.h5'

# -----------------------------
# DATA LOADING & PREPROCESSING
# -----------------------------

def load_data():
    df = pd.read_csv(LABELS_CSV)
    images = []
    labels = []

    for idx, row in df.iterrows():
        image_path = os.path.join(IMAGE_FOLDER, row['filename'])
        if not os.path.exists(image_path):
            continue
        img = cv2.imread(image_path)
        img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
        img = img / 255.0  # normalize
        images.append(img)
        labels.append([row['physics'], row['math']])  # Add more fields if needed

    return np.array(images), np.array(labels)

# -----------------------------
# MODEL BUILDING
# -----------------------------

def build_model(output_dim):
    model = Sequential([
        Conv2D(32, (3,3), activation='relu', input_shape=(IMG_SIZE, IMG_SIZE, 3)),
        MaxPooling2D((2,2)),
        Conv2D(64, (3,3), activation='relu'),
        MaxPooling2D((2,2)),
        Conv2D(128, (3,3), activation='relu'),
        MaxPooling2D((2,2)),
        Flatten(),
        Dense(64, activation='relu'),
        Dropout(0.5),
        Dense(output_dim)  # Regression output
    ])
    model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    return model

# -----------------------------
# TRAINING
# -----------------------------

def train_model():
    X, y = load_data()

    # Normalize labels
    scaler = MinMaxScaler()
    y_scaled = scaler.fit_transform(y)

    X_train, X_val, y_train, y_val = train_test_split(X, y_scaled, test_size=0.2, random_state=42)

    model = build_model(output_dim=y.shape[1])
    model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=10, batch_size=32)

    model.save(MODEL_PATH)
    np.save('label_scaler_min.npy', scaler.data_min_)
    np.save('label_scaler_max.npy', scaler.data_max_)

# -----------------------------
# PREDICTION FUNCTION
# -----------------------------

def predict_image(image_path):
    if not os.path.exists(MODEL_PATH):
        raise Exception("Model not trained yet. Run train_model() first.")
    
    model = load_model(MODEL_PATH)

    img = cv2.imread(image_path)
    img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
    img = img / 255.0
    img = np.expand_dims(img, axis=0)

    pred_scaled = model.predict(img)[0]

    # Reverse the MinMaxScaler
    min_vals = np.load('label_scaler_min.npy')
    max_vals = np.load('label_scaler_max.npy')
    pred = pred_scaled * (max_vals - min_vals) + min_vals

    result = {
        'physics': round(pred[0]),
        'math': round(pred[1])
    }
    return result

# -----------------------------
# RUN TRAINING (Uncomment below to train)
# -----------------------------
# train_model()

# -----------------------------
# USAGE EXAMPLE
# -----------------------------
# result = predict_image('dataset/images/img1.jpg')
# print(result)
