In [None]:
from huggingface_hub import HfApi, create_repo
import os
import shutil
import json

# Initialize the API
api = HfApi()
repo_id = "mpg-ranch/horse-detection"

# 1. Create temporary directory structure
os.makedirs("tmp_dataset/data", exist_ok=True)
os.makedirs("tmp_dataset/data/train", exist_ok=True)
os.makedirs("tmp_dataset/data/test", exist_ok=True)

# 2. Copy parquet files to the data directory
train_files = [f for f in os.listdir("../data/hf") if f.startswith("train_")]
test_files = [f for f in os.listdir("../data/hf") if f.startswith("test_")]

for file in train_files:
    shutil.copy(f"../data/hf/{file}", f"tmp_dataset/data/train/{file}")

for file in test_files:
    shutil.copy(f"../data/hf/{file}", f"tmp_dataset/data/test/{file}")

# 3. Create metadata file
metadata = {
    "configs": [
        {
            "name": "default",
            "data_files": {
                "train": "data/train/*.parquet",
                "test": "data/test/*.parquet"
            },
            "splits": {
                "train": {"num_examples": 12780},
                "test": {"num_examples": 3120}
            }
        }
    ]
}

with open("tmp_dataset/metadata.json", "w") as f:
    json.dump(metadata, f)

# 4. Create README.md
readme_content = """
# Horse Detection Dataset

Dataset for detecting horses in aerial imagery.

## Features

- `image`: The aerial image
- `Presence`: Binary indicator (1 = horse present, 0 = absent)
- `transform`: Geographic transformation parameters
- And more...

## Usage

```python
from datasets import load_dataset

dataset = load_dataset("mpg-ranch/horse-detection")
```
"""

with open("tmp_dataset/README.md", "w") as f:
    f.write(readme_content)

# 5. Upload to Hub
api.create_repo(repo_id=repo_id, repo_type="dataset", exist_ok=True)

# Upload the files
for root, _, files in os.walk("tmp_dataset"):
    for file in files:
        local_path = os.path.join(root, file)
        # Get the relative path for the hub
        hub_path = os.path.relpath(local_path, "tmp_dataset")
        
        api.upload_file(
            path_or_fileobj=local_path,
            path_in_repo=hub_path,
            repo_id=repo_id,
            repo_type="dataset"
        )
        print(f"Uploaded {hub_path}")

# Clean up
shutil.rmtree("tmp_dataset")
print("Dataset uploaded to HF Hub successfully")