# Import CSV Files into MongoDB

This notebook imports all CSV files from the `data/` folder into MongoDB collections in the `brazilian-ecommerce` database. Each CSV file will become a collection with the same name (without `.csv`).

In [None]:
import os
import glob
import pandas as pd
from pymongo import MongoClient

# MongoDB connection settings from environment variables
MONGO_URI = os.environ.get("MONGO_URI")
DATABASE_NAME = os.environ.get("DATABASE_NAME", "brazilian-ecommerce")

if not MONGO_URI:
    raise ValueError("MONGO_URI environment variable is not set. Please set it before running this notebook.")

client = MongoClient(MONGO_URI)
db = client[DATABASE_NAME]
print(f"Connected to MongoDB database: {DATABASE_NAME}")

In [None]:
# Import all CSV files in the data/ folder as MongoDB collections
csv_files = glob.glob(os.path.join('data', '*.csv'))

for csv_path in csv_files:
    collection_name = os.path.splitext(os.path.basename(csv_path))[0]
    print(f"Importing {csv_path} into collection '{collection_name}'...")
    df = pd.read_csv(csv_path)
    # Convert DataFrame to dictionary records
    records = df.to_dict(orient='records')
    if records:
        db[collection_name].delete_many({})  # Optional: clear existing data
        db[collection_name].insert_many(records)
        print(f"Inserted {len(records)} records into '{collection_name}'")
    else:
        print(f"No records found in {csv_path}")