In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import seaborn as sns

import sys
sys.path.append("..")

from src.data.preprocess import load_and_preprocess_image, augment_image, prepare_datasets
from src.models.train import load_config, build_cnn_model, build_transfer_model, train_and_evaluate, log_experiment_mflow
from src.models.evaluate import evaluate_model, plot_confusion_matrix, save_classification_report

from pathlib import Path

In [None]:
notebook_dir = Path().resolve()

data_dir = notebook_dir.parent / 'data'

train_dir = data_dir / 'Train'
test_dir = data_dir / 'Test'

config_path = notebook_dir.parent / 'config.yaml'
config = load_config(config_path)
# walk through folders
# store imgae path + labels

In [None]:
# Count number of images per class
# Plt bar chart
# Show a grid of sample images for each class

In [None]:
# images_data = load_and_preprocess_image(data_dir)
sample_img = next(train_dir.rglob("*.png"))
processed_img = load_and_preprocess_image(sample_img)

In [None]:
# use ImageDatagenerator for augmentation (rotation, zoom, flips)
# Create train/validation/test iterators
# rotation_range=15, brightness_range=[0.8, 1.2], horizontal_flip=False

augment_image_data = augment_image(processed_img)

train_data, val_data, test_data, class_names = prepare_datasets(train_dir, test_dir)

In [None]:
# Sequential model
# Conv2D → MaxPooling → Conv2D → MaxPooling → Flatten → Dense → Dropout → Output (Softmax with 4 classes)
model = build_cnn_model()

In [None]:
resnet_model = build_transfer_model()

In [None]:
# Compile with Adam optimizer
# Loss: categorical_crossentropy
# Metrics: accuracy
# Train model for N epochs with validation set
# Store training history
history, eval_metrics = train_and_evaluate(model, train_data, val_data, test_data, config)

In [None]:
# Accuracy & loss over epochs
# Use matplotlib to plot
# Find out how to get accuracy_values and loss_values
epochs = config['epochs']

plt.plot(epochs, accuracy_values, label='Accuracy')
plt.plot(epochs, loss_values, label='Loss')
plt.xlabel('Epochs')
plt.ylabel('Value')
plt.title('Accuracy and Loss Graph')
plt.legend()
plt.show()

In [None]:
# Predict on test set
# Print classification_report
# Plot confusion matrix
cm, report = evaluate_model(history, test_data, class_names)

In [None]:
# Save model to models/cnn_grapevine.h5
# Save plots (accuracy/loss, confusion matrix) into reports/
plot_confusion_matrix(cm, class_names)
save_classification_report(report)

In [None]:
# Start MLflow run
# Log parameters: batch_size, epochs, learning_rate
# Log final accuracy & loss
# Log saved model
metrics = ["accuracy", "loss"]
log_experiment_mflow(model, history, ["accuracy", "loss"], config)