In [1]:
import os

BASE_DIR = "/content/api_aggregator"

folders = [
    "extractor",
    "transformer",
    "loader",
    "utils",
    "data"
]

for folder in folders:
    os.makedirs(os.path.join(BASE_DIR, folder), exist_ok=True)

print("Directories created")

Directories created


In [19]:
for folder in ["extractor", "transformer", "loader", "utils"]:
    open(os.path.join(BASE_DIR, folder, "__init__.py"), "w").close()

print("__init__.py files added")

__init__.py files added


In [20]:
%%writefile /content/api_aggregator/extractor/api_extractor.py
import requests
import time

class APIExtractor:
    def __init__(self, retries=3, timeout=5):
        self.retries = retries
        self.timeout = timeout

    def extract(self, url):
        for attempt in range(1, self.retries + 1):
            try:
                response = requests.get(url, timeout=self.timeout)
                response.raise_for_status()
                return response.json()
            except requests.exceptions.RequestException:
                time.sleep(2 ** attempt)
        return []

Overwriting /content/api_aggregator/extractor/api_extractor.py


In [21]:
%%writefile /content/api_aggregator/transformer/comment_transformer.py
import pandas as pd

class CommentTransformer:
    def transform(self, posts, comments):
        comment_count = {}

        for comment in comments:
            post_id = comment.get("postId")
            comment_count[post_id] = comment_count.get(post_id, 0) + 1

        df = pd.DataFrame(posts)
        df["comment_count"] = df["id"].map(comment_count).fillna(0).astype(int)
        return df

Overwriting /content/api_aggregator/transformer/comment_transformer.py


In [22]:
%%writefile /content/api_aggregator/loader/csv_loader.py
class CSVLoader:
    def load(self, dataframe, file_path):
        dataframe.to_csv(file_path, index=False)

Overwriting /content/api_aggregator/loader/csv_loader.py


In [23]:
%%writefile /content/api_aggregator/utils/file_utils.py
import os

class FileUtils:
    @staticmethod
    def create_directory(path):
        os.makedirs(path, exist_ok=True)

Overwriting /content/api_aggregator/utils/file_utils.py


In [24]:
%%writefile /content/api_aggregator/aggregator.py
class APIAggregator:
    def __init__(self, extractor, transformer, loader):
        self.extractor = extractor
        self.transformer = transformer
        self.loader = loader

    def run(self, posts_url, comments_url, output_path):
        posts = self.extractor.extract(posts_url)
        comments = self.extractor.extract(comments_url)

        df = self.transformer.transform(posts, comments)
        self.loader.load(df, output_path)

        return df

Overwriting /content/api_aggregator/aggregator.py


In [25]:
import sys
sys.path.append("/content/api_aggregator")

In [26]:

from extractor.api_extractor import APIExtractor
from transformer.comment_transformer import CommentTransformer
from loader.csv_loader import CSVLoader
from utils.file_utils import FileUtils
from aggregator import APIAggregator
DATA_DIR = "/content/api_aggregator/data"
FileUtils.create_directory(DATA_DIR)

POSTS_URL = "https://jsonplaceholder.typicode.com/posts"
COMMENTS_URL = "https://jsonplaceholder.typicode.com/comments"

aggregator = APIAggregator(
extractor=APIExtractor(),
transformer=CommentTransformer(),
loader=CSVLoader())

df = aggregator.run(
POSTS_URL,
COMMENTS_URL,
f"{DATA_DIR}/posts_comments.csv"
)

df.head()

Unnamed: 0,userId,id,title,body,comment_count
0,1,1,sunt aut facere repellat provident occaecati e...,quia et suscipit\nsuscipit recusandae consequu...,5
1,1,2,qui est esse,est rerum tempore vitae\nsequi sint nihil repr...,5
2,1,3,ea molestias quasi exercitationem repellat qui...,et iusto sed quo iure\nvoluptatem occaecati om...,5
3,1,4,eum et est occaecati,ullam et saepe reiciendis voluptatem adipisci\...,5
4,1,5,nesciunt quas odio,repudiandae veniam quaerat sunt sed\nalias aut...,5
