In [6]:
import os
import pandas as pd
from pathlib import Path
from dotenv import load_dotenv

from datasets import Dataset
from huggingface_hub import login

In [None]:
load_dotenv(override=True)

In [8]:
current_dir = Path().resolve()
while not current_dir.name.endswith("xlm-roberta-base-cls-depression"):
    current_dir = current_dir.parent

os.chdir(current_dir)

input_mental_health_texts_data = current_dir / "data/raw/mental_health_texts.csv"

In [9]:
def upload_mental_health_dataset(
    csv_path: str,
    repo_name: str = "mental-health-depression",
    repo_owner: str = "malexandersalazar"
) -> None:
    """
    Upload the mental health dataset to Hugging Face Hub with a dataset card from file.
    
    Args:
        csv_path: Path to the mental_health_texts.csv file
        repo_name: Name for the dataset repository
        repo_owner: Username/organization to upload to
    """
    # Login to Hugging Face
    login()
    
    # Read the CSV file
    df = pd.read_csv(csv_path, sep='|')
    
    # Convert DataFrame to Hugging Face Dataset
    dataset = Dataset.from_pandas(df)
    
    # Prepare repository name
    full_repo_name = f"{repo_owner}/{repo_name}"
    
    # Push dataset to hub with the dataset card
    dataset.push_to_hub(
        repo_id=full_repo_name,
        private=False
    )

In [None]:
if __name__ == "__main__":
    CSV_PATH = input_mental_health_texts_data
    
    upload_mental_health_dataset(
        csv_path=CSV_PATH
    )