The JSONPlaceholder API offers another URL which responds with a list of 'TODO' tasks: https://jsonplaceholder.typicode.com/todos

Your goal for this exercise is to extract the TODOs and users data from the API, calculate the number of completed tasks for each user, and load the result into a new table in the local PostgreSQL database.

In [185]:
# Step 1: Extract the data

import requests
from psycopg2 import connect, sql
import pandas as pd

conn_string = "dbname=etl_bites user=olikelly password=i_am_a_password host=localhost port='5432'"

def fetch_data_from_api(url):
    response = requests.get(url)
    return response.json()

todos_url = 'https://jsonplaceholder.typicode.com/todos'
users_url = 'https://jsonplaceholder.typicode.com/users'

todo_data = fetch_data_from_api(todos_url)
user_data = fetch_data_from_api(users_url)



In [186]:
# Step 2: Transform the data (combine user's name with their to-do items)

def join_task_and_user(todo_data, user_data):
    for todo_row in todo_data:
        for user_row in user_data:
            if todo_row['userId'] == user_row['id']:
                todo_row['name'] = user_row['name']
    return todo_data


combined_data = join_task_and_user(todo_data, user_data)



In [None]:
# Step 3: Load the data into our database for analysis -- this isn't strictly needed for the given task, but wanted to get more practice of ETL!

# Step 3.1: Create the task table

def execute_query_postgresql(conn_string, query):
    with connect(conn_string) as conn:
        with conn.cursor() as cur:
            cur.execute(query)
            conn.commit()
            


create_task_data_table = '''
CREATE TABLE IF NOT EXISTS task_data (
user_id INTEGER NOT NULL,
id INTEGER NOT NULL UNIQUE,
title TEXT NOT NULL,
completed BOOL NOT NULL,
name TEXT NOT NULL 
);
'''

execute_query_postgresql(conn_string, 'TRUNCATE TABLE task_data RESTART IDENTITY;')
execute_query_postgresql(conn_string, create_task_data_table)


# Step 3.2: Insert task data into table

table_name = "task_data"

def insert_data_to_postgresql(conn_string, table_name, data):
    with connect(conn_string) as conn:
        with conn.cursor() as cur:
            for item in data:
                query = sql.SQL("INSERT INTO {} (user_id, id, title, completed, name) VALUES (%s, %s, %s, %s, %s) ON CONFLICT DO NOTHING").format(sql.Identifier(table_name))       
                cur.execute(query, (item['userId'], item['id'], item['title'], item['completed'], item['name'], ))
                conn.commit()

insert_data_to_postgresql(conn_string, table_name, combined_data)



In [188]:
# Step 4: Calculate the number of completed tasks for each user, and load the result into a new table in the local PostgreSQL database.


data_for_new_table = {}

for i in range(len(combined_data)):  # Use range(len(combined_data)) to include all elements
    if combined_data[i]['completed']:
        name = combined_data[i]['name']
        if name in data_for_new_table:
            data_for_new_table[name] += 1
        else:
            data_for_new_table[name] = 1


final_table = [{'name': name, 'completed_task_count': count} for name, count in data_for_new_table.items()]



# Load data_for_new_table into new table


completed_task_data_table = '''
CREATE TABLE IF NOT EXISTS completed_task_data (
name TEXT NOT NULL UNIQUE,
completed_task_count INTEGER NOT NULL
);
'''


execute_query_postgresql(conn_string, 'TRUNCATE TABLE completed_task_data RESTART IDENTITY;')
execute_query_postgresql(conn_string, completed_task_data_table)


def insert_task_data_to_postgresql(conn_string, table_name, data):
    with connect(conn_string) as conn:
        with conn.cursor() as cur:
            for item in data:
                query = sql.SQL("INSERT INTO {} (name, completed_task_count) VALUES (%s, %s) ON CONFLICT DO NOTHING").format(sql.Identifier(table_name))       
                cur.execute(query, (item['name'], item['completed_task_count'], ))
                conn.commit()

insert_task_data_to_postgresql(conn_string, 'completed_task_data', final_table)







