# Data Collection

> Functions and tools for acquiring image-text pair data, primarily using existing datasets from Kaggle. Includes Colab setup.

In [None]:
#| default_exp data.collection

## Colab Setup (Run these cells if using Google Colab)

In [None]:
# Install necessary libraries
# Note: nbdev install might be needed if running nbdev commands
!pip install -q kaggle nbdev fastai==2.7.19 transformers timm imagehash tqdm zipfile36 wandb

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m1.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m69.7/69.7 kB[0m [31m1.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m296.7/296.7 kB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.4/62.4 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m53.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m52.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m42.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
# Mount Google Drive (Optional, but recommended for persistent storage)
try:
    from google.colab import drive
    drive.mount('/content/drive')
    print("Google Drive mounted successfully.")
except ModuleNotFoundError:
    print("Not running in Colab, skipping Drive mount.")
except Exception as e:
    print(f"Error mounting Google Drive: {e}")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Google Drive mounted successfully.


In [None]:
# Kaggle API Setup: Upload your kaggle.json file
try:
    from google.colab import files
    import os

    # Create .kaggle directory if it doesn't exist
    kaggle_dir = os.path.expanduser('~/.kaggle')
    if not os.path.exists(kaggle_dir):
        os.makedirs(kaggle_dir)
        print(f"Created directory: {kaggle_dir}")

    # Check if kaggle.json already exists
    kaggle_json_path = os.path.join(kaggle_dir, 'kaggle.json')
    if not os.path.exists(kaggle_json_path):
        print("Please upload your kaggle.json file:")
        uploaded = files.upload() # This prompts the user to upload

        for fn in uploaded.keys():
            if fn == 'kaggle.json':
                print(f'User uploaded file "{fn}" with length {len(uploaded[fn])} bytes')
                # Move the uploaded file to the correct location
                !mkdir -p ~/.kaggle/
                !cp kaggle.json ~/.kaggle/
                !chmod 600 ~/.kaggle/kaggle.json # Set correct permissions
                print("kaggle.json copied and permissions set.")
            else:
                print(f"Ignoring uploaded file: {fn}. Please upload 'kaggle.json'.")
    else:
        print("kaggle.json already exists.")

except ModuleNotFoundError:
    print("Not running in Colab, assuming local Kaggle setup.")
except Exception as e:
    print(f"An error occurred during Kaggle setup: {e}")

Please upload your kaggle.json file:


Saving kaggle.json to kaggle.json
User uploaded file "kaggle.json" with length 67 bytes
kaggle.json copied and permissions set.


---

In [None]:
#| hide
# Reload core module in case Drive mount changed PROJECT_ROOT
# This is fragile, better to define paths relative to notebook or pass explicitly
# Or ensure core is imported *after* potential drive mount
import importlib
from pathlib import Path
try:
    import indic_clip.core
    importlib.reload(indic_clip.core)
    print("Reloaded indic_clip.core")
except ModuleNotFoundError:
    print("indic_clip.core not found initially.")
    # Attempt to set sys.path if running in Colab and project cloned
    import sys
    if 'google.colab' in sys.modules:
        project_parent = '/content' # Assuming cloned into /content/indic-clip
        if Path('/content/drive/MyDrive/Indic-Clip').exists():
             project_parent = '/content/drive/MyDrive/Indic-Clip'
        if project_parent not in sys.path:
             sys.path.insert(0, project_parent)
             print(f"Added {project_parent} to sys.path")
        try:
            import indic_clip.core
            print("Imported indic_clip.core after path adjustment.")
        except ModuleNotFoundError:
            print("ERROR: Still cannot find indic_clip.core. Ensure project structure is correct.")
            print("Expected: /content/Indic-Clip/indic_clip/core.py or similar in Drive")
            # raise # Stop execution if core components missing

indic_clip.core not found initially.
Added /content/drive/MyDrive/Indic-Clip to sys.path
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Google Drive detected, setting PROJECT_ROOT to /content/drive/MyDrive/Indic-Clip
Ensure your project files are located there.
Imported indic_clip.core after path adjustment.


In [None]:
#| export
import os
import time
import json
import logging
from pathlib import Path
import zipfile
import pandas as pd
from tqdm.notebook import tqdm
import sys

# Try importing core components
try:
    from indic_clip.core import (PROJECT_ROOT, HINDI_RAW_PATH, SANSKRIT_RAW_PATH,
                               SYNTHETIC_RAW_PATH, get_logger, setup_logging, ensure_dir)
except ModuleNotFoundError as e:
    print(f"Error importing from indic_clip.core: {e}")
    print("Please ensure the indic_clip library is installed (pip install -e .) or the path is correct.")
    # Define fallbacks if running interactively without full setup
    if 'google.colab' in sys.modules:
        PROJECT_ROOT=Path('/content/Indic-Clip')
        if Path('/content/drive/MyDrive/Indic-Clip').exists():
           PROJECT_ROOT=Path('/content/drive/MyDrive/Indic-Clip')
    else:
        PROJECT_ROOT=Path('.').resolve()
    print(f"Using fallback PROJECT_ROOT: {PROJECT_ROOT}")
    DATA_PATH = PROJECT_ROOT / 'data'
    RAW_DATA_PATH = DATA_PATH / 'raw'
    HINDI_RAW_PATH = RAW_DATA_PATH / 'hindi'
    SANSKRIT_RAW_PATH = RAW_DATA_PATH / 'sanskrit'
    SYNTHETIC_RAW_PATH = RAW_DATA_PATH / 'synthetic'
    # Define simple logging if setup fails
    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
    logger = logging.getLogger(__name__)
    def ensure_dir(path: Path): path.mkdir(parents=True, exist_ok=True)
    def setup_logging(): pass # No-op
    def get_logger(name): return logging.getLogger(name)

try:
    import kaggle
    print("Kaggle library imported.")
except OSError as e:
    print("Kaggle API Error: Ensure kaggle.json is uploaded/configured correctly in Colab or locally.")
    # raise e # Don't raise here, let download attempt fail later
except ImportError:
     print("ERROR: Kaggle library not installed. Run !pip install kaggle")

# Setup logging for this module
setup_logging()
logger = get_logger(__name__)

Kaggle library imported.


## Helper Functions

In [None]:
#| export
def save_formatted_data(data: list, output_path: Path, filename: str):
    """Saves a list of data (dicts) to a JSONL file.

    Args:
        data: A list of dictionaries, where each dict represents an image-text pair
              (e.g., {'image_filename': 'name.jpg', 'caption': 'text', 'source': 'datasource'}).
        output_path: The directory Path object where the file should be saved.
        filename: The name of the output file (e.g., 'flickr8k_hindi_raw.jsonl').
    """
    if not data:
        logger.warning(f"No data provided to save for {filename}. Skipping.")
        return

    ensure_dir(output_path)
    filepath = output_path / filename

    try:
        with open(filepath, 'w', encoding='utf-8') as f: # Overwrite mode for consistency on rerun
            for item in data:
                f.write(json.dumps(item, ensure_ascii=False) + '\n')
        logger.info(f"Successfully wrote {len(data)} items to {filepath}")
    except IOError as e:
        logger.error(f"Error saving data to {filepath}: {e}")
    except Exception as e:
        logger.error(f"An unexpected error occurred while saving data to {filepath}: {e}")

In [None]:
#| export
def download_kaggle_dataset(dataset_slug: str, download_path: Path):
    """Downloads a dataset from Kaggle using the official API.

    Args:
        dataset_slug: The Kaggle dataset slug (e.g., 'user/dataset-name').
        download_path: The Path object representing the directory to download files into.
    """
    logger.info(f"Attempting to download dataset '{dataset_slug}' to '{download_path}'...")
    ensure_dir(download_path)
    try:
        kaggle.api.authenticate() # Reads credentials from ~/.kaggle/kaggle.json or env vars
        kaggle.api.dataset_download_files(dataset_slug, path=download_path, unzip=False, quiet=False)
        logger.info(f"Dataset '{dataset_slug}' downloaded successfully to '{download_path}'.")
        return True
    except NameError:
         logger.error("Kaggle library not imported correctly. Cannot download.")
         return False
    except Exception as e:
        logger.error(f"Failed to download dataset '{dataset_slug}': {e}")
        logger.error("Please ensure the Kaggle API is configured correctly (kaggle.json or env vars) and you accepted the dataset's terms on the Kaggle website if required.")
        # Consider raising the exception if download is critical
        # raise e
        return False

In [None]:
#| export
def unzip_file(zip_path: Path, extract_to: Path):
    """Unzips a file to a specified directory.

    Args:
        zip_path: The Path object of the zip file.
        extract_to: The Path object of the directory to extract files into.
    """
    if not zip_path.exists():
        logger.error(f"Zip file not found at {zip_path}. Cannot unzip.")
        return False

    logger.info(f"Unzipping '{zip_path.name}' to '{extract_to}'...")
    ensure_dir(extract_to)
    try:
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            for member in tqdm(zip_ref.infolist(), desc=f'Extracting {zip_path.name}'):
                try:
                    # Ensure extraction path is safe (within extract_to)
                    target_path = os.path.join(extract_to, member.filename)
                    if not os.path.abspath(target_path).startswith(os.path.abspath(extract_to)):
                         logger.warning(f"Skipping potentially unsafe path in zip: {member.filename}")
                         continue
                    zip_ref.extract(member, extract_to)
                except zipfile.error as e:
                    logger.error(f"Error extracting {member.filename} from {zip_path.name}: {e}")
                except Exception as e:
                     logger.error(f"Unexpected error extracting {member.filename}: {e}")
        logger.info(f"Successfully unzipped '{zip_path.name}'.")
        # Optional: Remove the zip file after successful extraction
        # os.remove(zip_path)
        # logger.info(f"Removed zip file: '{zip_path.name}'")
        return True
    except zipfile.BadZipFile:
        logger.error(f"Error: '{zip_path.name}' is not a valid zip file or is corrupted.")
        return False
    except Exception as e:
        logger.error(f"An unexpected error occurred during unzipping '{zip_path.name}': {e}")
        return False

In [None]:
#| export
def load_hindi_captions(csv_path: Path) -> pd.DataFrame | None:
    """Loads Hindi captions from the specified CSV file.

    Args:
        csv_path: Path object to the captions CSV file.

    Returns:
        A pandas DataFrame containing the captions, or None if loading fails.
    """
    if not csv_path.exists():
        logger.error(f"Caption file not found: {csv_path}")
        return None

    logger.info(f"Loading captions from {csv_path}...")
    try:
        # The provided CSV seems to have a header based on sample
        df = pd.read_csv(csv_path, header=0)

        # Basic validation
        required_columns = ['image', 'caption']
        # Clean column names (strip whitespace etc.)
        df.columns = df.columns.str.strip()
        if not all(col in df.columns for col in required_columns):
            logger.error(f"CSV file {csv_path} missing required columns. Expected: {required_columns}, Found: {df.columns.tolist()}")
            return None

        logger.info(f"Successfully loaded {len(df)} captions from {csv_path}.")
        return df
    except pd.errors.EmptyDataError:
        logger.error(f"Error: Caption file {csv_path} is empty.")
        return None
    except Exception as e:
        logger.error(f"Error loading captions from {csv_path}: {e}")
        return None

## Sanskrit Data Collection Interface (Placeholder)

Define placeholder functions or an interface for acquiring Sanskrit image-text pairs (e.g., from digitized manuscripts). This acknowledges the difficulty and allows integration later. Comments highlight the manual/collaborative nature of this step.

In [None]:
#| export
def get_sanskrit_data_placeholder() -> list:
    """Placeholder function representing the Sanskrit data acquisition process.

    In a real scenario, this function would interact with APIs, databases,
    or parsed files from digitized manuscripts or other sources.

    Returns:
        A list of dictionaries (or an empty list), each containing
        'image_filename', 'caption' (Sanskrit text), and 'source'.
    """
    logger.warning("Using placeholder function for Sanskrit data. No actual Sanskrit data loaded.")
    # TODO: Replace this with actual logic to load Sanskrit data
    # This might involve:
    # - Reading pre-processed files created manually or via collaboration
    # - Connecting to specific digital library APIs
    # - Processing OCR results linked to manuscript images
    sanskrit_data = [
        # {
        #     'image_filename': 'manuscript_page_1_illustration_1.jpg',
        #     'caption': 'ॐ असतो मा सद्गमय । तमसो मा ज्योतिर्गमय । मृत्योर्मा अमृतं गमय ॥',
        #     'source': 'example_manuscript_archive'
        # },
    ]
    if sanskrit_data:
       logger.info(f"Loaded {len(sanskrit_data)} placeholder Sanskrit items.")
    return sanskrit_data

## Synthetic Data Integration Point (Placeholder)

Define a function or placeholder to integrate synthetic data from the IndicTTI project. Specify the expected input format (e.g., path to a file/directory containing image paths/data and corresponding Hindi/Sanskrit captions).

In [None]:
#| export
def get_synthetic_data_placeholder(data_path: Path) -> list:
    """Placeholder function representing the synthetic data integration process.

    In a real scenario, this would read data generated by the IndicTTI project,
    assuming a specific format (e.g., a directory of images and a metadata file).

    Args:
        data_path: Path to the directory or file containing synthetic data.

    Returns:
        A list of dictionaries (or an empty list), each containing
        'image_filename', 'caption' (could be Hindi or Sanskrit), and 'source'.
    """
    logger.warning("Using placeholder function for Synthetic data. No actual data loaded.")
    # TODO: Replace with actual logic to load synthetic data from IndicTTI
    # Example: Assume a metadata JSONL file exists at data_path
    metadata_file = data_path / 'metadata.jsonl'
    synthetic_data = []
    if metadata_file.exists():
        try:
            with open(metadata_file, 'r', encoding='utf-8') as f:
                for line in f:
                    item = json.loads(line)
                    # Assume item has 'image_filename' and 'caption' keys
                    if 'image_filename' in item and 'caption' in item:
                         item['source'] = 'indic_tti_synthetic'
                         synthetic_data.append(item)
                    else:
                         logger.warning(f"Skipping synthetic item due to missing keys: {item}")
            logger.info(f"Loaded {len(synthetic_data)} items from synthetic source: {metadata_file}")
        except Exception as e:
            logger.error(f"Error loading synthetic data from {metadata_file}: {e}")
    else:
        logger.warning(f"Synthetic data metadata file not found at {metadata_file}")

    return synthetic_data

## Main Execution: Data Acquisition

In [None]:
HINDI_RAW_PATH

PosixPath('/content/drive/MyDrive/Indic-Clip/data/raw/hindi')

In [None]:
#| eval: false
# This block orchestrates the data acquisition process.
# Ensure you have run the Colab Setup cells above if applicable.

# Ensure core components are loaded after potential Colab setup / Drive mount
# It might be safer to put this entire block in a function called from outside
# or explicitly re-import core here if running interactively.

if __name__ == '__main__':
    logger.info("--- Running Data Acquisition Script ---")

    # --- Configuration ---
    # Kaggle dataset slugs
    FLICKR8K_IMAGES_SLUG = "adityajn105/flickr8k"
    FLICKR8K_HINDI_CAPTIONS_SLUG = "dsmeena/flickr8k-hindi-captions"

    # Define paths using variables from indic_clip.core
    # Assumes PROJECT_ROOT is correctly set for Colab or local
    IMAGES_DOWNLOAD_PATH = HINDI_RAW_PATH
    CAPTIONS_DOWNLOAD_PATH = HINDI_RAW_PATH
    IMAGES_EXTRACT_PATH = HINDI_RAW_PATH
    CAPTIONS_EXTRACT_PATH = HINDI_RAW_PATH

    # Expected filenames after download/extraction (adjust if needed based on Kaggle dataset structure)
    IMAGES_ZIP_FILENAME = 'flickr8k.zip' # Default name from kaggle API might vary
    CAPTIONS_ZIP_FILENAME = 'flickr8k-hindi-captions.zip' # Default name
    # <<< Check the actual filename in the captions zip file >>>
    CAPTIONS_CSV_FILENAME = 'Clean-1Sentences_withComma.txt' # This needs verification after download!
    # Common variations: captions.csv, Hindi_Captions.csv, etc.
    # It's crucial this matches the actual extracted file name.

    # --- Download Datasets ---
    logger.info("Step 1: Downloading datasets from Kaggle...")
    # Define full paths to zip files
    images_zip_path = IMAGES_DOWNLOAD_PATH / IMAGES_ZIP_FILENAME
    captions_zip_path = CAPTIONS_DOWNLOAD_PATH / CAPTIONS_ZIP_FILENAME

    # Define expected output locations after extraction
    extracted_images_dir = IMAGES_EXTRACT_PATH / 'Images' # Flickr8k images are in 'Images' subdir
    extracted_captions_csv = CAPTIONS_EXTRACT_PATH / CAPTIONS_CSV_FILENAME

    download_images_flag = False
    unzip_images_flag = False
    download_captions_flag = False
    unzip_captions_flag = False

    # Decide whether to download/unzip images
    if not extracted_images_dir.exists():
        logger.info(f"Extracted images directory '{extracted_images_dir}' not found.")
        if not images_zip_path.exists():
            logger.info(f"Image zip file '{images_zip_path}' not found. Will attempt download.")
            download_images_flag = True
        else:
            logger.info(f"Image zip file already exists at {images_zip_path}. Skipping download.")
        unzip_images_flag = True # Need to unzip if extracted dir doesn't exist
    else:
        logger.info(f"Image directory '{extracted_images_dir}' already exists. Skipping image download and unzip.")

    # Decide whether to download/unzip captions
    if not extracted_captions_csv.exists():
        logger.info(f"Extracted captions CSV '{extracted_captions_csv}' not found.")
        if not captions_zip_path.exists():
             logger.info(f"Captions zip file '{captions_zip_path}' not found. Will attempt download.")
             download_captions_flag = True
        else:
            logger.info(f"Captions zip file already exists at {captions_zip_path}. Skipping download.")
        unzip_captions_flag = True # Need to unzip if extracted csv doesn't exist
    else:
        logger.info(f"Captions CSV file '{extracted_captions_csv}' already exists. Skipping captions download and unzip.")

    # Perform downloads
    if download_images_flag:
        if not download_kaggle_dataset(FLICKR8K_IMAGES_SLUG, IMAGES_DOWNLOAD_PATH):
             unzip_images_flag = False # Don't attempt unzip if download failed
    if download_captions_flag:
        if not download_kaggle_dataset(FLICKR8K_HINDI_CAPTIONS_SLUG, CAPTIONS_DOWNLOAD_PATH):
             unzip_captions_flag = False # Don't attempt unzip if download failed

    # --- Unzip Files ---
    logger.info("Step 2: Unzipping downloaded files (if necessary)...")
    if unzip_images_flag and images_zip_path.exists():
        unzip_file(images_zip_path, IMAGES_EXTRACT_PATH)

    if unzip_captions_flag and captions_zip_path.exists():
        unzip_file(captions_zip_path, CAPTIONS_EXTRACT_PATH)
        # IMPORTANT: Verify CAPTIONS_CSV_FILENAME matches the extracted file now!
        if not extracted_captions_csv.exists():
             logger.error(f"Caption file '{CAPTIONS_CSV_FILENAME}' not found in {CAPTIONS_EXTRACT_PATH} after unzipping. Check the zip contents and update CAPTIONS_CSV_FILENAME.")

    # --- Load and Format Hindi Captions ---
    logger.info("Step 3: Loading and formatting Hindi captions...")
    hindi_captions_df = None
    if extracted_captions_csv.exists():
        hindi_captions_df = load_hindi_captions(extracted_captions_csv)
    else:
        logger.error(f"Cannot load captions, file not found: {extracted_captions_csv}")

    formatted_hindi_data = []
    if hindi_captions_df is not None:
        logger.info(f"Formatting {len(hindi_captions_df)} loaded captions...")
        # Construct relative path for images within the raw directory structure
        # Assumes images are extracted to HINDI_RAW_PATH / 'Images'
        image_subfolder = 'Images'

        for index, row in tqdm(hindi_captions_df.iterrows(), total=len(hindi_captions_df), desc="Formatting Hindi Captions"):
            image_id_base = row['image'] # Base ID like '1000268201_693b08cb0e'
            caption = row['caption']

            # Construct filename (assuming .jpg extension, common for Flickr8k)
            image_filename_only = f"{image_id_base}.jpg"
            # Store relative path within the source directory for later use
            image_relative_path = f"{image_subfolder}/{image_filename_only}"

            # Basic check: ensure image file actually exists after extraction
            image_full_path = IMAGES_EXTRACT_PATH / image_subfolder / image_filename_only
            if not image_full_path.exists():
                logger.warning(f"Image file not found: {image_full_path}. Skipping caption for {image_id_base}.")
                continue

            if isinstance(caption, str) and caption.strip(): # Basic validation
                formatted_hindi_data.append({
                    # Store relative path from the source's root (HINDI_RAW_PATH)
                    'image_path_relative': image_relative_path,
                    'caption': caption.strip(),
                    'source': 'flickr8k_hindi'
                })
            else:
                 logger.warning(f"Skipping row {index} for image {image_id_base} due to invalid caption: {caption}")
    else:
        logger.error("Failed to load Hindi captions DataFrame. Cannot format data.")

    # --- Load Sanskrit Data (Placeholder) ---
    logger.info("Step 4: Loading Sanskrit data (placeholder)...")
    formatted_sanskrit_data = get_sanskrit_data_placeholder()
    # Adjust 'image_path_relative' if real data is used
    # for item in formatted_sanskrit_data:
    #     item['image_path_relative'] = f"sanskrit_images/{item['image_filename']}" # Example

    # --- Load Synthetic Data (Placeholder) ---
    logger.info("Step 5: Loading synthetic data (placeholder)...")
    formatted_synthetic_data = get_synthetic_data_placeholder(SYNTHETIC_RAW_PATH)
    # Adjust 'image_path_relative' if real data is used
    # for item in formatted_synthetic_data:
    #     item['image_path_relative'] = f"synthetic_images/{item['image_filename']}" # Example

    # --- Combine and Save Data ---
    logger.info("Step 6: Saving formatted data...")
    # Save Hindi data
    save_formatted_data(formatted_hindi_data, HINDI_RAW_PATH, 'flickr8k_hindi_raw.jsonl')

    # Save Sanskrit data (if any)
    if formatted_sanskrit_data:
        save_formatted_data(formatted_sanskrit_data, SANSKRIT_RAW_PATH, 'sanskrit_raw.jsonl')
    else:
        logger.info("No Sanskrit data to save.")

    # Save Synthetic data (if any)
    if formatted_synthetic_data:
        save_formatted_data(formatted_synthetic_data, SYNTHETIC_RAW_PATH, 'synthetic_raw.jsonl')
    else:
        logger.info("No synthetic data to save.")

    logger.info("--- Data Acquisition Script Finished ---")

In [None]:
MAX

/content/drive/MyDrive/Indic-Clip


In [None]:
#| hide
import nbdev
nbdev.nbdev_export() # Run this in terminal to export