In [15]:
def make_api_call(resource):
    ENDPOINT = "https://dummyjson.com/"
    results_picked = 0
    total_results = 100 #We don't know yet, but we need to initialize
    all_data = []
    while results_picked < total_results:
        response = requests.get(f"{ENDPOINT}{resource}", params = {"skip" : results_picked})
        if response.status_code == 200:
            data = response.json()
            rows = data.get(resource)
            all_data += rows #concatening the two lists
            total_results = data.get("total")
            results_picked += len(rows) #to skip them in the next call
        else:
            raise Exception(response.text)
    return all_data

In [16]:
users_data = make_api_call("users")
print(len(users_data))

100


In [17]:
carts_data = make_api_call("carts")
print(len(carts_data))

20


We are going to use the native library json and write our JSON data inside a file.

In [18]:
import json    

def download_json(data, resource_name):
    file_path = f"{resource_name}.json"
    with open(file_path, "w") as file:
        file.write("\n".join([json.dumps(row) for row in data]))

download_json(carts_data, "carts")
download_json(users_data, "users")

In [19]:
!head -1 carts.json

{"id": 1, "products": [{"id": 59, "title": "Spring and summershoes", "price": 20, "quantity": 3, "total": 60, "discountPercentage": 8.71, "discountedPrice": 55}, {"id": 88, "title": "TC Reusable Silicone Magic Washing Gloves", "price": 29, "quantity": 2, "total": 58, "discountPercentage": 3.19, "discountedPrice": 56}, {"id": 18, "title": "Oil Free Moisturizer 100ml", "price": 40, "quantity": 2, "total": 80, "discountPercentage": 13.1, "discountedPrice": 70}, {"id": 95, "title": "Wholesale cargo lashing Belt", "price": 930, "quantity": 1, "total": 930, "discountPercentage": 17.67, "discountedPrice": 766}, {"id": 39, "title": "Women Sweaters Wool", "price": 600, "quantity": 2, "total": 1200, "discountPercentage": 17.2, "discountedPrice": 994}], "total": 2328, "discountedTotal": 1941, "userId": 97, "totalProducts": 5, "totalQuantity": 10}


In [20]:
!head -1 users.json

{"id": 1, "firstName": "Terry", "lastName": "Medhurst", "maidenName": "Smitham", "age": 50, "gender": "male", "email": "atuny0@sohu.com", "phone": "+63 791 675 8914", "username": "atuny0", "password": "9uQFF1Lh", "birthDate": "2000-12-25", "image": "https://robohash.org/hicveldicta.png", "bloodGroup": "A\u2212", "height": 189, "weight": 75.4, "eyeColor": "Green", "hair": {"color": "Black", "type": "Strands"}, "domain": "slashdot.org", "ip": "117.29.86.254", "address": {"address": "1745 T Street Southeast", "city": "Washington", "coordinates": {"lat": 38.867033, "lng": -76.979235}, "postalCode": "20020", "state": "DC"}, "macAddress": "13:69:BA:56:A3:74", "university": "Capitol University", "bank": {"cardExpire": "06/22", "cardNumber": "50380955204220685", "cardType": "maestro", "currency": "Peso", "iban": "NO17 0695 2754 967"}, "company": {"address": {"address": "629 Debbie Drive", "city": "Nashville", "coordinates": {"lat": 36.208114, "lng": -86.58621199999999}, "postalCode": "37076", 

##### Exercise Idea - Build an ELT pipeline by loading this json into data warehouse and perform joins

In [31]:
import boto3
import json

def get_secret(secret_name, region_name="us-east-1"):
    session = boto3.session.Session()
    client = session.client(
        service_name='secretsmanager',
        region_name=region_name)
    get_secret_value_response = client.get_secret_value(SecretId=secret_name)
    get_secret_value_response = json.loads(get_secret_value_response['SecretString'])
    return get_secret_value_response

creds = get_secret("wysde")
USERNAME = creds["RDS_POSTGRES_USERNAME"]
PASSWORD = creds["RDS_POSTGRES_PASSWORD"]
HOST = creds["RDS_POSTGRES_HOST"]
DATABASE = 'sparsh'

conn_str = 'postgresql://{0}:{1}@{2}/{3}'.format(USERNAME, PASSWORD, HOST, DATABASE)

%config SqlMagic.autopandas=True
%config SqlMagic.displaycon=False
%config SqlMagic.feedback=False
%config SqlMagic.displaylimit=5
%reload_ext sql
%sql {conn_str}

In [21]:
import pandas as pd
import json

In [25]:
carts_df = pd.read_json('carts.json', lines=True)
carts_df

Unnamed: 0,id,products,total,discountedTotal,userId,totalProducts,totalQuantity
0,1,"[{'id': 59, 'title': 'Spring and summershoes',...",2328,1941,97,5,10
1,2,"[{'id': 96, 'title': 'lighting ceiling kitchen...",3023,2625,30,5,10
2,3,"[{'id': 37, 'title': 'ank Tops for Womens/Girl...",460,403,63,5,10
3,4,"[{'id': 36, 'title': 'Sleeve Shirt Womens', 'p...",553,493,83,5,10
4,5,"[{'id': 23, 'title': 'Orange Essence Food Flav...",844,745,58,5,10
5,6,"[{'id': 53, 'title': 'printed high quality T s...",1454,1276,26,5,12
6,7,"[{'id': 61, 'title': 'Leather Straps Wristwatc...",588,519,56,5,10
7,8,"[{'id': 45, 'title': 'Malai Maxi Dress', 'pric...",1129,952,1,5,9
8,9,"[{'id': 74, 'title': 'Leather Hand Bag', 'pric...",3608,3371,91,5,10
9,10,"[{'id': 75, 'title': 'Seven Pocket Women Bag',...",9064,8205,13,5,9


In [27]:
carts_df['products'] = carts_df['products'].apply(json.dumps)
carts_df

Unnamed: 0,id,products,total,discountedTotal,userId,totalProducts,totalQuantity
0,1,"[{""id"": 59, ""title"": ""Spring and summershoes"",...",2328,1941,97,5,10
1,2,"[{""id"": 96, ""title"": ""lighting ceiling kitchen...",3023,2625,30,5,10
2,3,"[{""id"": 37, ""title"": ""ank Tops for Womens/Girl...",460,403,63,5,10
3,4,"[{""id"": 36, ""title"": ""Sleeve Shirt Womens"", ""p...",553,493,83,5,10
4,5,"[{""id"": 23, ""title"": ""Orange Essence Food Flav...",844,745,58,5,10
5,6,"[{""id"": 53, ""title"": ""printed high quality T s...",1454,1276,26,5,12
6,7,"[{""id"": 61, ""title"": ""Leather Straps Wristwatc...",588,519,56,5,10
7,8,"[{""id"": 45, ""title"": ""Malai Maxi Dress"", ""pric...",1129,952,1,5,9
8,9,"[{""id"": 74, ""title"": ""Leather Hand Bag"", ""pric...",3608,3371,91,5,10
9,10,"[{""id"": 75, ""title"": ""Seven Pocket Women Bag"",...",9064,8205,13,5,9


In [29]:
carts_df.to_sql('carts_elt', con=conn_str, index=False)

In [32]:
%%sql
SELECT * FROM carts_elt LIMIT 10

Unnamed: 0,id,products,total,discountedTotal,userId,totalProducts,totalQuantity
0,1,"[{""id"": 59, ""title"": ""Spring and summershoes"",...",2328,1941,97,5,10
1,2,"[{""id"": 96, ""title"": ""lighting ceiling kitchen...",3023,2625,30,5,10
2,3,"[{""id"": 37, ""title"": ""ank Tops for Womens/Girl...",460,403,63,5,10
3,4,"[{""id"": 36, ""title"": ""Sleeve Shirt Womens"", ""p...",553,493,83,5,10
4,5,"[{""id"": 23, ""title"": ""Orange Essence Food Flav...",844,745,58,5,10
5,6,"[{""id"": 53, ""title"": ""printed high quality T s...",1454,1276,26,5,12
6,7,"[{""id"": 61, ""title"": ""Leather Straps Wristwatc...",588,519,56,5,10
7,8,"[{""id"": 45, ""title"": ""Malai Maxi Dress"", ""pric...",1129,952,1,5,9
8,9,"[{""id"": 74, ""title"": ""Leather Hand Bag"", ""pric...",3608,3371,91,5,10
9,10,"[{""id"": 75, ""title"": ""Seven Pocket Women Bag"",...",9064,8205,13,5,9


In [37]:
users_df = pd.read_json('users.json', lines=True)
users_df.head(2).T

Unnamed: 0,0,1
id,1,2
firstName,Terry,Sheldon
lastName,Medhurst,Quigley
maidenName,Smitham,Cole
age,50,28
gender,male,male
email,atuny0@sohu.com,hbingley1@plala.or.jp
phone,+63 791 675 8914,+7 813 117 7139
username,atuny0,hbingley1
password,9uQFF1Lh,CQutx25i8r


In [38]:
users_df['hair'] = users_df['hair'].apply(json.dumps)
users_df['address'] = users_df['address'].apply(json.dumps)
users_df['bank'] = users_df['bank'].apply(json.dumps)
users_df['company'] = users_df['company'].apply(json.dumps)
users_df.head(2).T

Unnamed: 0,0,1
id,1,2
firstName,Terry,Sheldon
lastName,Medhurst,Quigley
maidenName,Smitham,Cole
age,50,28
gender,male,male
email,atuny0@sohu.com,hbingley1@plala.or.jp
phone,+63 791 675 8914,+7 813 117 7139
username,atuny0,hbingley1
password,9uQFF1Lh,CQutx25i8r


In [39]:
users_df.to_sql('users_elt', con=conn_str, index=False)

In [40]:
%%sql
SELECT * FROM users_elt LIMIT 10

Unnamed: 0,id,firstName,lastName,maidenName,age,gender,email,phone,username,password,...,domain,ip,address,macAddress,university,bank,company,ein,ssn,userAgent
0,1,Terry,Medhurst,Smitham,50,male,atuny0@sohu.com,+63 791 675 8914,atuny0,9uQFF1Lh,...,slashdot.org,117.29.86.254,"{""address"": ""1745 T Street Southeast"", ""city"":...",13:69:BA:56:A3:74,Capitol University,"{""cardExpire"": ""06/22"", ""cardNumber"": ""5038095...","{""address"": {""address"": ""629 Debbie Drive"", ""c...",20-9487066,661-64-2976,Mozilla/5.0 (Windows NT 6.1) AppleWebKit/534.2...
1,2,Sheldon,Quigley,Cole,28,male,hbingley1@plala.or.jp,+7 813 117 7139,hbingley1,CQutx25i8r,...,51.la,253.240.20.181,"{""address"": ""6007 Applegate Lane"", ""city"": ""Lo...",13:F1:00:DA:A4:12,Stavropol State Technical University,"{""cardExpire"": ""10/23"", ""cardNumber"": ""5355920...","{""address"": {""address"": ""8821 West Myrtle Aven...",52-5262907,447-08-9217,Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/53...
2,3,Terrill,Hills,Hoeger,38,male,rshawe2@51.la,+63 739 292 7942,rshawe2,OWsTbMUgFc,...,earthlink.net,205.226.160.3,"{""address"": ""560 Penstock Drive"", ""city"": ""Gra...",F2:88:58:64:F7:76,University of Cagayan Valley,"{""cardExpire"": ""10/23"", ""cardNumber"": ""3586082...","{""address"": {""address"": ""18 Densmore Drive"", ""...",48-3951994,633-89-1926,Mozilla/5.0 (Windows NT 6.2; Win64; x64; rv:21...
3,4,Miles,Cummerata,Maggio,49,male,yraigatt3@nature.com,+86 461 145 4186,yraigatt3,sRQxjPfdS,...,homestead.com,243.20.78.113,"{""address"": ""150 Carter Street"", ""city"": ""Manc...",03:45:58:59:5A:7B,Shenyang Pharmaceutical University,"{""cardExpire"": ""07/24"", ""cardNumber"": ""3580047...","{""address"": {""address"": ""210 Green Road"", ""cit...",71-3644334,487-28-6642,Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKi...
4,5,Mavis,Schultz,Yundt,38,male,kmeus4@upenn.edu,+372 285 771 1911,kmeus4,aUTdmmmbH,...,columbia.edu,103.72.86.183,"{""address"": ""2721 Lindsay Avenue"", ""city"": ""Lo...",F8:04:9E:ED:C0:68,Estonian University of Life Sciences,"{""cardExpire"": ""01/24"", ""cardNumber"": ""4917245...","{""address"": {""address"": ""8398 West Denton Lane...",18-7178563,667-98-5357,Mozilla/5.0 (Windows NT 6.0) AppleWebKit/535.1...
5,6,Alison,Reichert,Franecki,21,female,jtreleven5@nhs.uk,+351 527 735 3642,jtreleven5,zY1nE46Zm,...,bandcamp.com,49.201.206.36,"{""address"": ""18 Densmore Drive"", ""city"": ""Esse...",6C:34:D0:4B:4E:81,Universidade da Beira Interior,"{""cardExpire"": ""03/22"", ""cardNumber"": ""3456758...","{""address"": {""address"": ""6231 North 67th Avenu...",78-3192791,158-68-0184,Mozilla/5.0 (Windows; U; Windows NT 6.0; nb-NO...
6,7,Oleta,Abbott,Wyman,31,female,dpettegre6@columbia.edu,+62 640 802 7111,dpettegre6,YVmhktgYVS,...,ovh.net,25.207.107.146,"{""address"": ""637 Britannia Drive"", ""city"": ""Va...",48:2D:A0:67:19:E0,Institut Sains dan Teknologi Al Kamal,"{""cardExpire"": ""10/23"", ""cardNumber"": ""3589640...","{""address"": {""address"": ""1407 Walden Court"", ""...",29-1568401,478-11-2206,Mozilla/5.0 (Windows; U; Windows NT 5.1; ru-RU...
7,15,Jeanne,Halvorson,Cummerata,26,female,kminchelle@qq.com,+86 581 108 7855,kminchelle,0lelplR,...,google.co.uk,78.43.74.226,"{""address"": ""4 Old Colony Way"", ""city"": ""Yarmo...",D9:DB:D9:5A:01:09,"Donghua University, Shanghai","{""cardExpire"": ""10/23"", ""cardNumber"": ""3588859...","{""address"": {""address"": ""22572 Toreador Drive""...",62-0561095,855-43-8639,Mozilla/5.0 (X11; Linux i686) AppleWebKit/534....
8,8,Ewell,Mueller,Durgan,29,male,ggude7@chron.com,+86 946 297 2275,ggude7,MWwlaeWcOoF6,...,homestead.com,91.200.56.127,"{""address"": ""5601 West Crocus Drive"", ""city"": ...",72:DA:1B:D7:30:E9,Wenzhou Medical College,"{""cardExpire"": ""09/23"", ""cardNumber"": ""3054992...","{""address"": {""address"": ""81 Seaton Place North...",88-4396827,238-41-5528,Mozilla/5.0 (X11; Linux amd64) AppleWebKit/534...
9,9,Demetrius,Corkery,Gleason,22,male,nloiterton8@aol.com,+86 356 590 9727,nloiterton8,HTQxxXV9Bq4,...,goodreads.com,78.170.185.120,"{""address"": ""5403 Illinois Avenue"", ""city"": ""N...",98:EE:94:A2:91:C4,Nanjing University of Economics,"{""cardExpire"": ""02/24"", ""cardNumber"": ""5372664...","{""address"": {""address"": ""12245 West 71st Place...",14-1066382,717-26-3759,Mozilla/5.0 (Macintosh; U; PPC Mac OS X 10_4_1...


In [56]:
%%sql
SELECT *
FROM (
        SELECT u.id AS user_id,
            u."firstName" AS user_first_name,
            u."lastName" AS user_last_name,
            SUM(total) AS total_spent
        FROM users_elt u
            LEFT JOIN carts_elt c ON u.id = c."userId"
        GROUP BY u.id,
            user_first_name,
            user_last_name
        ORDER BY total_spent DESC
    ) ts
WHERE total_spent IS NOT NULL

Unnamed: 0,user_id,user_first_name,user_last_name,total_spent
0,13,Trace,Douglas,9064
1,47,Coralie,Boyle,4339
2,15,Jeanne,Halvorson,4040
3,91,Arne,Jacobs,3608
4,30,Maurine,Stracke,3023
5,5,Mavis,Schultz,2492
6,42,Tiara,Rolfson,2476
7,97,Allene,Harber,2328
8,76,Deon,Gutkowski,2121
9,26,Griffin,Braun,1454


Seems like Trace Douglas is our top spender! Let’s add this table as a part of our ELT.

In [57]:
%%sql
CREATE TABLE top_spenders AS
SELECT *
FROM (
        SELECT u.id AS user_id,
            u."firstName" AS user_first_name,
            u."lastName" AS user_last_name,
            SUM(total) AS total_spent
        FROM users_elt u
            LEFT JOIN carts_elt c ON u.id = c."userId"
        GROUP BY u.id,
            user_first_name,
            user_last_name
        ORDER BY total_spent DESC
    ) ts
WHERE total_spent IS NOT NULL

In [58]:
%%sql
SELECT *
FROM top_spenders
LIMIT 10

Unnamed: 0,user_id,user_first_name,user_last_name,total_spent
0,13,Trace,Douglas,9064
1,47,Coralie,Boyle,4339
2,15,Jeanne,Halvorson,4040
3,91,Arne,Jacobs,3608
4,30,Maurine,Stracke,3023
5,5,Mavis,Schultz,2492
6,42,Tiara,Rolfson,2476
7,97,Allene,Harber,2328
8,76,Deon,Gutkowski,2121
9,26,Griffin,Braun,1454
