# Pipeline ETL - API → MongoDB Atlas

In [None]:
!pip install -q pymongo python-dotenv requests pandas dnspython

## 1️⃣ Configuração

In [None]:

import os
from dotenv import load_dotenv
from pymongo import MongoClient

load_dotenv()

ATLAS_URI = os.getenv("ATLAS_URI")
if not ATLAS_URI:
    raise RuntimeError("❌ ATLAS_URI não encontrada no .env")

client = MongoClient(ATLAS_URI)
db = client["test"]
collection = db["users"]

print("✅ Conectado ao MongoDB Atlas")


## 2️⃣ Extract

In [None]:

import requests
import pandas as pd

API_URL = "https://users-api-etl.up.railway.app/users"

response = requests.get(API_URL, timeout=10)
response.raise_for_status()

data = response.json()
df = pd.json_normalize(data)

print(f"✅ {len(df)} registros extraídos")
df.head()


## 3️⃣ Transform

In [None]:

df = df.dropna(subset=["id", "name"])
df["id"] = df["id"].astype(int)

def transform_user(row):
    return {
        "id": int(row["id"]),
        "name": row["name"],
        "account": {
            "id": int(row.get("account_id", 0)),
            "number": row.get("account_number", ""),
            "agency": row.get("agency", ""),
            "balance": float(row.get("balance", 0.0)),
            "limit": float(row.get("account_limit", 0.0)),
        },
        "card": {
            "id": int(row.get("card_id", 0)),
            "number": row.get("card_number", ""),
            "limit": float(row.get("card_limit", 0.0)),
        },
        "features": [],
        "news": []
    }

users = df.apply(transform_user, axis=1).tolist()
print(f"✅ {len(users)} usuários transformados")


## 4️⃣ Load

In [None]:

collection.create_index("id", unique=True)

operations = [
    {
        "updateOne": {
            "filter": {"id": user["id"]},
            "update": {"$setOnInsert": user},
            "upsert": True
        }
    }
    for user in users
]

result = collection.bulk_write(operations, ordered=False)

print("Inseridos:", result.upserted_count)


## 5️⃣ Validação

In [None]:

from pprint import pprint
docs = list(collection.find({}, {"_id": 0}).limit(5))
pprint(docs)
