In [None]:
import os
import pandas as pd
import random
from pycocotools.coco import COCO
from sklearn.model_selection import train_test_split
import tensorflow as tf
from transformers import TFViTForImageClassification, ViTImageProcessor
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import roc_curve, auc, precision_recall_curve, average_precision_score
import matplotlib.pyplot as plt
import cv2
import numpy as np
from datetime import datetime

# Paths remain the same as in original code
TRAIN_PATH = 'D:/Download/JDownloader/MSCOCO/images/train2017'
VAL_PATH = 'D:/Download/JDownloader/MSCOCO/images/val2017'
ANNOTATIONS_PATH = 'D:/Download/JDownloader/MSCOCO/annotations'
WORKING_DIR = 'D:/Projetos/Mestrado/2024_Topicos_Esp_Sist_Informacao/ARTIGO_FINAL/object_detection_model_compare/working'
FILTERED_CATEGORIES = ['person', 'cat', 'dog']


# Load and prepare the dataset
filtered_csv_path = os.path.join(WORKING_DIR, 'filtered_coco.csv')

# Load COCO annotations
annotations_file = os.path.join(ANNOTATIONS_PATH, 'instances_train2017.json')
coco = COCO(annotations_file)

# Get category IDs for the selected categories
category_ids = coco.getCatIds(catNms=FILTERED_CATEGORIES)

# Generate filtered dataset
filtered_data = []
for category_id in category_ids:
    ann_ids = coco.getAnnIds(catIds=[category_id])
    anns = coco.loadAnns(ann_ids)
    selected_anns = random.sample(anns, min(1000, len(anns)))
    for ann in selected_anns:
        image_info = coco.loadImgs(ann['image_id'])[0]
        filtered_data.append({
            "image_id": ann['image_id'],
            "image": image_info['file_name'],
            "category_id": ann['category_id']
        })

# Create and save filtered dataset
filtered_df = pd.DataFrame(filtered_data)
filtered_df.to_csv(filtered_csv_path, index=False)

# Split the dataset
data = pd.read_csv(filtered_csv_path)
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)


# Load and preprocess functions
def preprocess_image(image_path, target_size=(224, 224)):
    """Load and preprocess an image for ViT."""
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = cv2.resize(image, target_size)
    image = image / 255.0  # Normalize to [0, 1]
    return image


def load_data(data, path_prefix):
    images = []
    labels = []
    for _, row in data.iterrows():
        image_path = os.path.join(path_prefix, row['image'])
        images.append(preprocess_image(image_path))
        labels.append(row['category_id'])
    return np.array(images), np.array(labels)

# Build the model using ViT
def build_vit_model(num_classes):
    # Create input layer with correct shape (batch, height, width, channels)
    inputs = Input(shape=(224, 224, 3))
    
    # Initialize the ViT model
    vit = TFViTForImageClassification.from_pretrained(
        'google/vit-base-patch32-224-in21k',
        num_labels=num_classes
    )
    
    # Create the model
    model = tf.keras.Model(
        inputs=inputs,
        outputs=vit(inputs, training=True).logits
    )
    
    return model


# Main execution code
if __name__ == "__main__":
    # Dataset preparation code remains the same until loading data
    
    # Load and preprocess data
    X_train, y_train = load_data(train_data, TRAIN_PATH)
    X_test, y_test = load_data(test_data, TRAIN_PATH)
    
    # Encode labels
    label_encoder = LabelEncoder()
    y_train_encoded = to_categorical(label_encoder.fit_transform(y_train))
    y_test_encoded = to_categorical(label_encoder.transform(y_test))
    
    # Build and compile model
    model = build_vit_model(len(FILTERED_CATEGORIES))
    optimizer = Adam(learning_rate=0.0001)
    model.compile(
        optimizer=optimizer,
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    
model.summary()

# Train the model
history = model.fit(
    X_train, y_train_encoded,
    validation_data=(X_test, y_test_encoded),
    epochs=30,
    batch_size=32,
    verbose=1
)

# Save the trained model
model_path = os.path.join(WORKING_DIR, 'vit_coco.keras')
model.save(model_path)

print(f"Trained model saved at {model_path}")

# Plot training accuracy and loss
plt.figure(figsize=(12, 6))

plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()

