In [9]:
# Import required libraries
import os
import json
import shutil
from pathlib import Path
from kaggle.api.kaggle_api_extended import KaggleApi
import kagglehub

# Get current directory (should be task2)
BASE_DIR = Path.cwd()
DATASET_DIR = BASE_DIR / "dataset"
MODELS_DIR = BASE_DIR / "models"

# Create directories if they don't exist
for dir_path in [DATASET_DIR, MODELS_DIR]:
    dir_path.mkdir(exist_ok=True)

print("Current working directory:", BASE_DIR)
print("\nDirectory structure:")
print(f"- Dataset directory: {DATASET_DIR}")
print(f"- Models directory: {MODELS_DIR}")

Current working directory: C:\Users\mar4u\Documents\DS-Test-2025\task2

Directory structure:
- Dataset directory: C:\Users\mar4u\Documents\DS-Test-2025\task2\dataset
- Models directory: C:\Users\mar4u\Documents\DS-Test-2025\task2\models


In [2]:
def download_dataset(dataset_dir: Path) -> None:
    """
    Download the Animals-10 dataset from Kaggle.
    
    Args:
        dataset_dir (Path): Directory to save the dataset
    """
    # Authenticate Kaggle API
    api = KaggleApi()
    api.authenticate()
    
    # Download dataset
    print("Downloading animal dataset...")
    api.dataset_download_files(
        "alessiocorrado99/animals10",
        path=str(dataset_dir),
        unzip=True
    )
    print("Dataset downloaded successfully!")

In [None]:
def load_translation_mapping() -> dict:
    """
    Create translation mapping based on animal names
    """
    return {
        'cane': 'dog',
        'elefante': 'elephant',
        'farfalla': 'butterfly',
        'gallina': 'chicken',
        'gatto': 'cat',
        'mucca': 'cow',
        'ragno': 'spider',
        'scoiattolo': 'squirrel',
        'cavallo': 'horse',
        'pecora': 'sheep'
    }

def rename_folders(dataset_dir: Path, translation_map: dict) -> None:
    """
    Rename the Italian folders to their English equivalents
    """
    raw_dir = dataset_dir / "raw-img"
    if not raw_dir.exists():
        print("Error: raw-img directory not found!")
        return
    
    print("\nStarting folder renaming process...")
    print("Found folders:")
    
    # First, list all folders
    folders = [f for f in raw_dir.iterdir() if f.is_dir()]
    for folder in folders:
        print(f"- {folder.name}")
    
    # Then do the renaming
    renamed_count = 0
    skipped_count = 0
    
    for folder in folders:
        italian_name = folder.name
        if italian_name in translation_map:
            english_name = translation_map[italian_name]
            new_path = folder.parent / english_name
            try:
                # Skip if already in English
                if italian_name == english_name:
                    print(f"Skipping: {italian_name} (already in English)")
                    skipped_count += 1
                    continue
                    
                folder.rename(new_path)
                print(f"Renamed: {italian_name} → {english_name}")
                renamed_count += 1
            except Exception as e:
                print(f"Error renaming {italian_name}: {e}")
                skipped_count += 1
        else:
            print(f"Warning: No translation found for {italian_name}")
            skipped_count += 1
    
    print("\nRenaming process completed!")
    print(f"Summary:")
    print(f"- Total folders processed: {len(folders)}")
    print(f"- Successfully renamed: {renamed_count}")
    print(f"- Skipped/Errors: {skipped_count}")

In [19]:
def download_models():
    """
    Download pre-trained models from Kaggle and organize files correctly
    """
    MODELS_DIR = Path("models")
    MODELS_DIR.mkdir(exist_ok=True)
    
    try:
        # Download NER model
        print("\nDownloading NER model...")
        ner_model_path = MODELS_DIR / "ner_model"
        ner_model_path.mkdir(exist_ok=True)
        
        api = KaggleApi()
        api.authenticate()
        
        # Download NER model
        api.dataset_download_files(
            "liumar/ner-model",
            path=str(ner_model_path),
            unzip=True
        )
        
        # Move files from 'final' folder to ner_model folder
        final_folder = ner_model_path / "final"
        if final_folder.exists():
            print("Moving files from 'final' folder to ner_model folder...")
            for file in final_folder.glob('*'):
                # Move each file to parent directory
                shutil.move(str(file), str(ner_model_path / file.name))
            # Remove empty final folder
            final_folder.rmdir()
            print("Files reorganized successfully!")
        
        # Download Animal Classifier
        print("\nDownloading Animal Classifier...")
        api.dataset_download_files(
            "liumar/animal-classifier",
            path=str(MODELS_DIR),
            unzip=True
        )
        
        print("\nAll models downloaded and organized successfully!")
        
    except Exception as e:
        print(f"Error downloading models: {str(e)}")

In [3]:
# First, download the dataset
print("Step 1: Downloading dataset")
download_dataset(DATASET_DIR)

Step 1: Downloading dataset
Downloading animal dataset...
Dataset URL: https://www.kaggle.com/datasets/alessiocorrado99/animals10
Dataset downloaded successfully!


In [4]:
# Rename folders
print("Step 2: Renaming folders to English")
translation_map = load_translation_mapping()

print("\nTranslations to be applied:")
for italian, english in sorted(translation_map.items()):
    print(f"- {italian} → {english}")

rename_folders(DATASET_DIR, translation_map)

Step 2: Renaming folders to English

Translations to be applied:
- cane → dog
- cavallo → horse
- elefante → elephant
- farfalla → butterfly
- gallina → chicken
- gatto → cat
- mucca → cow
- pecora → sheep
- ragno → spider
- scoiattolo → squirrel

Starting folder renaming process...
Found folders:
- cane
- cavallo
- elefante
- farfalla
- gallina
- gatto
- mucca
- pecora
- ragno
- scoiattolo
Renamed: cane → dog
Renamed: cavallo → horse
Renamed: elefante → elephant
Renamed: farfalla → butterfly
Renamed: gallina → chicken
Renamed: gatto → cat
Renamed: mucca → cow
Renamed: pecora → sheep
Renamed: ragno → spider
Renamed: scoiattolo → squirrel

Renaming process completed!
Summary:
- Total folders processed: 10
- Successfully renamed: 10
- Skipped/Errors: 0


In [20]:
# Step 3: Download pre-trained models
print("\nStep 3: Downloading pre-trained models")
download_models()

print("\Downloading completed!")


Step 3: Downloading pre-trained models

Downloading NER model...
Dataset URL: https://www.kaggle.com/datasets/liumar/ner-model
Moving files from 'final' folder to ner_model folder...
Files reorganized successfully!

Downloading Animal Classifier...
Dataset URL: https://www.kaggle.com/datasets/liumar/animal-classifier

All models downloaded and organized successfully!

Setup completed!
