# XAI Project for signature classification using CEDAR dataset

## Configurations Colab & Kaggle API

In [None]:
!git clone https://github.com/silvano315/eXplainability-for-signature-detection.git

In [None]:
import os 

os.chdir("eXplainability-for-signature-detection")
os.getcwd()

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Move your Kaggle API to /root/.config/kaggle and /root/.kaggle/kaggle.json

os.makedirs('/root/.kaggle', exist_ok = True)

!cp /content/drive/MyDrive/Kaggle_api/kaggle.json /root/.config/kaggle.json
!cp /content/drive/MyDrive/Kaggle_api/kaggle.json /root/.kaggle/kaggle.json

## Import libraries

In [None]:
import json
import logging
import yaml
import torch
import pandas as pd
from pathlib import Path
from torch.utils.data import DataLoader
from torchvision import transforms, datasets

from src.utils.kaggle_downloader import setup_dataset
from src.utils.logger_setup import get_logger
from src.utils.dataset_analyzer import create_dataset_metadata, validate_dataset_consistency, \
                                        save_metadata, load_metadata
from src.utils.eda import print_dataset_statistics, plot_dataset_distribution, \
                            show_sample_images, analyze_image_properties, generate_eda_report

## Configurations

In [None]:
# Load config

with open('config/config.yaml', 'r') as f:
    config = yaml.safe_load(f)

In [None]:
# Setup paths

DATA_PATH = Path(config['paths']['data']['raw_path'])
MODELS_PATH = Path(config['paths']['data']['models_path'])
MODELS_PATH.mkdir(parents=True, exist_ok=True)

In [None]:
# Training parameters

BATCH_SIZE = config['training']['batch_size']
NUM_EPOCHS = config['training']['num_epochs']
LEARNING_RATE = config['training']['learning_rate']
NUM_CLASSES = config['dataset']['num_classes']

## Load data

In [None]:
# Download Malimg dataset from Kaggle

dataset_path = setup_dataset()
print(f"Dataset found at: {dataset_path}")

In [None]:
# Generate metadata for the dataset

signatures_path = Path("data/raw/cedardataset/signatures")
metadata = create_dataset_metadata(signatures_path)

In [None]:
# Save or load metadata with FLAG

SAVE_METADATA = False
dataset_metadata_path = Path("metadata/metadata.json")

if SAVE_METADATA:
    save_metadata(metadata, dataset_metadata_path)
else:
    metadata = load_metadata(dataset_metadata_path)

In [None]:
# Validate dataset consistency (statistics, class distribution, etc.)

validate_metadata = validate_dataset_consistency(metadata)
for key, value in validate_metadata.items():
    if isinstance(value, dict):
        print(f"{key}:")
        for subkey, subvalue in value.items():
            print(f"  {subkey}: {subvalue}")
    else:
      print(f"{key}: {value}")

## Exploratory Data Analysis

In [None]:
# Print a complete EDA report

signatures_path = Path("data/raw/cedardataset/signatures")
output_dir = Path("reports/eda")

generate_eda_report(signatures_path, metadata, output_dir)

In [None]:
# I you want to run the EDA report step by step

print_dataset_statistics(metadata)

In [None]:
# I you want to run the EDA report step by step

plot_dataset_distribution(metadata)