Make sure the following installs have been done:

pip install --upgrade google-cloud-bigquery
pip install --upgrade google-cloud-storage


Authenticate:

gcloud auth application-default set-quota-project YOUR_PROJECT_ID
gcloud auth application-default set-quota-project ff-dbt

In [1]:
# import pandas as pd
# from pathlib import Path
# import pandas as pd
# from google.cloud import bigquery

# # --- CONFIGURE THESE ---
# folder_path = Path("/Users/mattshaw/Desktop/dbt/Data/BBM IV")
# project_id = "ff-dbt"
# dataset_id = "ff_dbt_data_rw"
# # ------------------------

# # Initialize BigQuery client
# client = bigquery.Client(project=project_id)

# # Loop through CSV files in the folder
# for file in folder_path.iterdir():
#     if file.is_file() and file.suffix.lower() == ".csv":
        
#         # Create table name: rw_<filename_without_ext>
#         table_name = f"rw_{file.stem}"
#         table_id = f"{project_id}.{dataset_id}.{table_name}"

#         # print(f"Loading {file.name} → {table_id}")

#         # Load CSV into DataFrame
#         df = pd.read_csv(file)

#         # Upload to BigQuery
#         job = client.load_table_from_dataframe(df, table_id)
#         job.result()  # Wait for job to complete

#         #print(f"✔ Loaded {file.name} into {table_id}")

# print('Done')

In [1]:
### IMPROVED VERSION

from pathlib import Path
from concurrent.futures import ThreadPoolExecutor, as_completed
from google.cloud import bigquery
import pandas as pd

# --- CONFIGURE THESE ---
# folder_path = Path("/Users/mattshaw/Desktop/dbt/Data/FF Data Raw Files (FantasyData.com)")
folder_path = Path("/Users/mattshaw/Desktop/dbt/Data/BBM IV")
project_id = "ff-dbt"
dataset_id = "ff_dbt_data_rw"
max_workers = 8   # Number of parallel threads (you can increase if needed)
# ------------------------

client = bigquery.Client(project=project_id)


def load_csv_to_bigquery(file: Path):
    """Load a single CSV file into BigQuery."""
    try:
        # Replace hyphens with underscores in table names
        cleaned_name = file.stem.replace("-", "_")

        table_name = f"rw_{cleaned_name}"
        table_id = f"{project_id}.{dataset_id}.{table_name}"

        # Read CSV
        df = pd.read_csv(file)

        # Upload to BigQuery
        job = client.load_table_from_dataframe(df, table_id)
        job.result()  # Wait until job completes

        return f"✔ Loaded {file.name} → {table_id}"

    except Exception as e:
        return f"✖ Error loading {file.name}: {e}"


# Collect CSV files
csv_files = [
    f for f in folder_path.iterdir()
    if f.is_file() and f.suffix.lower() == ".csv"
]

# Run load jobs in parallel
results = []
with ThreadPoolExecutor(max_workers=max_workers) as executor:
    future_to_file = {executor.submit(load_csv_to_bigquery, f): f for f in csv_files}

    for future in as_completed(future_to_file):
        results.append(future.result())

# Print results
for r in results:
    print(r)

print("DONE!!!")

✔ Loaded bbm_iv_2023_r1_results.csv → ff-dbt.ff_dbt_data_rw.rw_bbm_iv_2023_r1_results
DONE!!!


In [8]:
from pathlib import Path

folder_path = Path("/Users/mattshaw/Desktop/dbt/Data/FF Data Raw Files (FantasyData.com)")

# Get all file names (not full paths)
file_names = [file.name for file in folder_path.iterdir() if file.is_file()]

# Sort alphabetically
file_names.sort()

file_names

['.DS_Store',
 'advanced-qb-efficiency-metrics_2017.csv',
 'advanced-qb-efficiency-metrics_2018.csv',
 'advanced-qb-efficiency-metrics_2019.csv',
 'advanced-qb-efficiency-metrics_2020.csv',
 'advanced-qb-efficiency-metrics_2021.csv',
 'advanced-qb-efficiency-metrics_2022.csv',
 'advanced-qb-metrics_2017.csv',
 'advanced-qb-metrics_2018.csv',
 'advanced-qb-metrics_2019.csv',
 'advanced-qb-metrics_2020.csv',
 'advanced-qb-metrics_2021.csv',
 'advanced-qb-metrics_2022.csv',
 'advanced-rb-efficiency-metrics_2017.csv',
 'advanced-rb-efficiency-metrics_2018.csv',
 'advanced-rb-efficiency-metrics_2019.csv',
 'advanced-rb-efficiency-metrics_2020.csv',
 'advanced-rb-efficiency-metrics_2021.csv',
 'advanced-rb-efficiency-metrics_2022.csv',
 'advanced-rb-metrics_2017.csv',
 'advanced-rb-metrics_2018.csv',
 'advanced-rb-metrics_2019.csv',
 'advanced-rb-metrics_2020.csv',
 'advanced-rb-metrics_2021.csv',
 'advanced-rb-metrics_2022.csv',
 'advanced-te-efficiency-metrics_2017.csv',
 'advanced-te-effi