# Use this notbook for your 3 choices of APIs

In [172]:
from bs4 import BeautifulSoup as bs4
import requests
import pandas as pd
from pandas import json_normalize
import numpy as np

In [12]:
url = "https://countrycode.org/"
response = requests.get(url)
contents = response.content
soup = bs4(contents, "html.parser")

In [21]:
def getjson(row):
    base = "https://date.nager.at/api/v3/publicholidays/2022/"
    return base+row["code_2"]

In [25]:
table = soup.find("table",{"class":"table table-hover table-striped main-table"}).find("tbody")
countries = []
codes = []
populations = []
areas = []
gdps = []
results = table.find_all("tr")
for i in results:
    country = i.find_all("td")[0].find("a").text
    code = i.find_all("td")[2].text
    pop = i.find_all("td")[3].text
    area = i.find_all("td")[4].text
    gdp = i.find_all("td")[5].text
    countries.append(country)
    codes.append(code)
    populations.append(pop)
    areas.append(area)
    gdps.append(gdp)
data = pd.DataFrame({"country":countries, "code":codes,"population":populations,"area":areas,"gdp":gdps})
data[["code_2","code_3"]] = data["code"].str.split(" / ", 1, expand = True)
data["json"] = data.apply(getjson, axis = 1)
data.head()

Unnamed: 0,country,code,population,area,gdp,code_2,code_3,json
0,Afghanistan,AF / AFG,29121286,647500,20.65 Billion,AF,AFG,https://date.nager.at/api/v3/publicholidays/20...
1,Albania,AL / ALB,2986952,28748,12.8 Billion,AL,ALB,https://date.nager.at/api/v3/publicholidays/20...
2,Algeria,DZ / DZA,34586184,2381740,215.7 Billion,DZ,DZA,https://date.nager.at/api/v3/publicholidays/20...
3,American Samoa,AS / ASM,57881,199,462.2 Million,AS,ASM,https://date.nager.at/api/v3/publicholidays/20...
4,Andorra,AD / AND,84000,468,4.8 Billion,AD,AND,https://date.nager.at/api/v3/publicholidays/20...


In [26]:
data.loc[0,"json"]

'https://date.nager.at/api/v3/publicholidays/2022/AF'

In [54]:
holidays = []
for i in data["json"]:
    try:
        datax = requests.get(i).json()
        data2 = json_normalize(datax)
        data2 = data2.drop_duplicates(subset = "date")
        holidays.append(data2)
    except:
        pass


In [57]:
holidays[9]

Unnamed: 0,date,localName,name,countryCode,fixed,global,counties,launchYear,types
0,2022-01-03,New Year's Day,New Year's Day,BZ,False,True,,,[Public]
1,2022-03-07,Baron Bliss Day,Baron Bliss Day,BZ,False,True,,,[Public]
2,2022-04-15,Good Friday,Good Friday,BZ,False,True,,,[Public]
3,2022-04-16,Holy Saturday,Holy Saturday,BZ,False,True,,,[Public]
4,2022-04-17,Easter Sunday,Easter Sunday,BZ,False,True,,,[Public]
5,2022-04-18,Easter Monday,Easter Monday,BZ,False,True,,,[Public]
6,2022-05-01,Labour Day,Labour Day,BZ,False,True,,,[Public]
7,2022-05-23,Commonwealth Day,Commonwealth Day,BZ,False,True,,,[Public]
8,2022-09-12,Saint George's Caye Day,Saint George's Caye Day,BZ,False,True,,,[Public]
9,2022-09-21,Independence Day,Independence Day,BZ,False,True,,,[Public]


In [62]:
pivots = []
for i in holidays:
    i = i.pivot_table(index = ["countryCode"], values = "date", aggfunc = "count").reset_index()
    pivots.append(i)
pivots[9]

Unnamed: 0,countryCode,date
0,BZ,14


In [63]:
holiday = pd.concat(pivots)
holiday

Unnamed: 0,countryCode,date
0,AL,13
0,AD,14
0,AR,16
0,AU,18
0,AT,13
...,...,...
0,UY,15
0,VA,15
0,VE,38
0,VN,4


In [137]:
df = pd.merge(data, holiday, left_on = "code_2", right_on = "countryCode")
df = df.drop(["code","code_2","code_3","json","countryCode"], axis = 1)
df = df.rename(columns = {"gdp":"income_in_mio","date":"num_holidays"})
df.population = df.population.str.replace(",","")
df.area = df.area.str.replace(",","")
df[["income","mio"]] = df["income_in_mio"].str.split(" ", 1, expand = True)
df.income = df.income.str.replace(" ","")
df = df.drop(labels = [68,96,105], axis = 0)
df.income = df.income.astype(float)
df["mio"].loc[df["mio"] == "Million"] = 1
df["mio"].loc[df["mio"] == "Billion"] = 1000
df["mio"].loc[df["mio"] == "Trillion"] = 1000000
df["income"] = df["income"] * df["mio"]
df = df.drop(["income_in_mio","mio"], axis = 1)
df.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["mio"].loc[df["mio"] == "Million"] = 1
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["mio"].loc[df["mio"] == "Billion"] = 1000
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["mio"].loc[df["mio"] == "Trillion"] = 1000000


Unnamed: 0,country,population,area,num_holidays,income
0,Albania,2986952,28748,13,12800.0
1,Andorra,84000,468,14,4800.0
2,Argentina,41343201,2766890,16,484600.0
3,Australia,21515754,7686850,18,1488000.0
4,Austria,8205000,83858,13,417900.0


In [None]:
# next: scrape https://en.wikipedia.org/wiki/List_of_minimum_annual_leave_by_country

##### ======== this marks the end of the 1st JSON ========

In [188]:
# api = ""
# req = requests.get(api).json()
# data2 = json_normalize(req)
