# Update gridVeg Additional Species in BigQuery

This notebook appends new additional species records to the BigQuery table from a CSV file stored in GCS.

**Operation**: APPEND new rows (not replace entire table)

## Requirements
- Google Cloud credentials configured
- Configuration file: copy `config.example.yml` to `config.yml` and fill in your values
- Required packages: google-cloud-bigquery, google-cloud-storage, pandas, pyyaml


In [None]:
# Import required libraries
import yaml
import pandas as pd
from pathlib import Path
from google.cloud import bigquery
from google.cloud import storage
from datetime import datetime

print("Libraries imported successfully")


## Load Configuration

**TODO**: Add configuration section to config.yml for this table


In [None]:
# Load configuration from YAML file
config_path = Path("../config.yml")

if not config_path.exists():
    raise FileNotFoundError(
        f"Configuration file not found: {config_path}\n"
        "Please copy config.example.yml to config.yml and fill in your values."
    )

with open(config_path, 'r') as f:
    config = yaml.safe_load(f)

# Extract configuration values for gridVeg additional species
# TODO: Update these config keys once added to config.yml
GCS_CSV_URL = config['gridveg_additional_species']['gcs']['csv_url']
BACKUP_BUCKET = config['gridveg_additional_species']['gcs'].get('backup_bucket')
BACKUP_PREFIX = config['gridveg_additional_species']['gcs'].get('backup_prefix', 'backups/gridveg_additional_species')
BQ_TABLE_ID = config['gridveg_additional_species']['bigquery']['table_id']
BQ_PROJECT = config['gridveg_additional_species']['bigquery'].get('project')

# Verify required config values
if not GCS_CSV_URL or GCS_CSV_URL.startswith('gs://your-'):
    raise ValueError("Please configure gridveg_additional_species.gcs.csv_url in config.yml")
if not BQ_TABLE_ID or 'your-project' in BQ_TABLE_ID:
    raise ValueError("Please configure gridveg_additional_species.bigquery.table_id in config.yml")

print("✓ Configuration loaded successfully")
print(f"  CSV URL: {GCS_CSV_URL[:60]}..." if len(GCS_CSV_URL) > 60 else f"  CSV URL: {GCS_CSV_URL}")
print(f"  Table ID: {BQ_TABLE_ID}")
print(f"  Backup: gs://{BACKUP_BUCKET}/{BACKUP_PREFIX}" if BACKUP_BUCKET else "  Backup: Not configured")
