In [20]:
import os

base_dir = "/content/API_Aggregator"

folders = [
    "extractor",
    "transformer",
    "loader",
    "utils"
]

for folder in folders:
    os.makedirs(os.path.join(base_dir, folder), exist_ok=True)

In [21]:
for folder in folders:
    open(os.path.join(base_dir, folder, "__init__.py"), "w").close()

In [22]:
%%writefile /content/API_Aggregator/extractor/api_fetcher.py
import requests
import time

def fetch_data(url, retries=3, timeout=5):
    for attempt in range(1, retries + 1):
        try:
            response = requests.get(url, timeout=timeout)
            response.raise_for_status()
            return response.json()
        except requests.exceptions.RequestException:
            time.sleep(2 ** attempt)
    return []

Overwriting /content/API_Aggregator/extractor/api_fetcher.py


In [23]:
%%writefile /content/API_Aggregator/transformer/aggregator.py
import pandas as pd

def count_comments(posts, comments):
    counts = {}
    for c in comments:
        pid = c.get("postId")
        counts[pid] = counts.get(pid, 0) + 1

    df = pd.DataFrame(posts)
    df["comment_count"] = df["id"].map(counts).fillna(0).astype(int)
    return df

Overwriting /content/API_Aggregator/transformer/aggregator.py


In [24]:
%%writefile /content/API_Aggregator/loader/csv_loader.py
def save_csv(df, path):
    df.to_csv(path, index=False)

Overwriting /content/API_Aggregator/loader/csv_loader.py


In [25]:
%%writefile /content/API_Aggregator/utils/file_utils.py
import os

def create_dir(path):
    os.makedirs(path, exist_ok=True)

Overwriting /content/API_Aggregator/utils/file_utils.py


In [26]:
import sys

sys.path.append("/content/API_Aggregator")

In [27]:
from extractor.api_fetcher import fetch_data
from transformer.aggregator import count_comments
from loader.csv_loader import save_csv
from utils.file_utils import create_dir

DATA_DIR = "/content/API_Aggregator/data"
create_dir(DATA_DIR)

posts_url = "https://jsonplaceholder.typicode.com/posts"
comments_url = "https://jsonplaceholder.typicode.com/comments"

posts = fetch_data(posts_url)
comments = fetch_data(comments_url)

df = count_comments(posts, comments)
save_csv(df, f"{DATA_DIR}/posts_comments.csv")

df.head()


Unnamed: 0,userId,id,title,body,comment_count
0,1,1,sunt aut facere repellat provident occaecati e...,quia et suscipit\nsuscipit recusandae consequu...,5
1,1,2,qui est esse,est rerum tempore vitae\nsequi sint nihil repr...,5
2,1,3,ea molestias quasi exercitationem repellat qui...,et iusto sed quo iure\nvoluptatem occaecati om...,5
3,1,4,eum et est occaecati,ullam et saepe reiciendis voluptatem adipisci\...,5
4,1,5,nesciunt quas odio,repudiandae veniam quaerat sunt sed\nalias aut...,5
