In [4]:
# request from curl -X GET "https://api-cepalstat.cepal.org/cepalstat/api/v1/thematic-tree?lang=en&format=json" -H  "accept: application/json"

import requests
import json

def get_cepalstat_data():
	url = "https://api-cepalstat.cepal.org/cepalstat/api/v1/thematic-tree?lang=en&format=json"
	headers = {
		"accept": "application/json"
	}

	response = requests.get(url, headers=headers)

	if response.status_code == 200:
		data = response.json()
		return data
	else:
		print(f"Error: {response.status_code}")
		return None

In [5]:
data = get_cepalstat_data()

In [6]:
subjects = data["body"]["children"]
subjects_names = [subj["name"] for subj in subjects]

In [7]:
subjects_children = [subject["children"] for subject in subjects]
subjects_children_flatten = [item for sublist in subjects_children for item in sublist]
subjects_children_names = []
for subject_children in subjects_children:
	for subjects_child in subject_children:
		subjects_children_names.append(subjects_child["name"])

In [8]:
subjects_children_children = [subjects_child["children"] for subjects_child in subjects_children_flatten]
subjects_children_children_flatten = [item for sublist in subjects_children_children for item in sublist]
subjects_children_children_names = []
for subjects_children_child in subjects_children_children:
	for subjects_children_child in subjects_children_child:
		subjects_children_children_names.append(subjects_children_child["name"])
subjects_children_children_names

['Population',
 'Fertility',
 'Mortality',
 'Migration',
 'Motherhood',
 'Spatial distribution and urbanization',
 'Internal migration',
 'Poverty',
 'Income distribution',
 'Labour',
 'Education',
 'Health',
 'Housing and basic services',
 'Household structure',
 'Perceptions and Expectations of Well-being',
 'Governance, justice and security',
 'Social public expenditure',
 'Annual',
 'Quarterly',
 'Consumer prices',
 'Producer prices',
 'International commodity prices',
 'Fuels price',
 'Purchasing power parity',
 'Productivity and labor costs',
 'Sectoral statistics',
 'Balance of payments',
 'External trade of goods',
 'Derivate indicators of balance of payments',
 'Indicators of external sector',
 'Financing and external debt',
 'Exchange rates',
 'Public spending by function',
 'Public and private social spending (SOCX methodology)',
 'Public revenues',
 'Government operations',
 'External public debt',
 'Financial indicators',
 'Monetary indicators',
 'Atmosphere, climate and w

In [9]:
subjects_children_children_children = [subjects_children_child["children"] for subjects_children_child in subjects_children_children_flatten]
subjects_children_children_children_flatten = [item for sublist in subjects_children_children_children for item in sublist]
subjects_children_children_children_flatten = [subjects_children_children_children_flatten[i] for i in range(len(subjects_children_children_children_flatten)) if "indicator_id" in subjects_children_children_children_flatten[i]]
subjects_children_children_children_names = [subjects_children_children_child["name"] for subjects_children_children_child in subjects_children_children_children_flatten]
subjects_children_children_children_ids = [subjects_children_children_child["indicator_id"] for subjects_children_children_child in subjects_children_children_children_flatten]

In [10]:
subjects_children_children_children_names

['Total population, by sex',
 'Population, by age group, by sex',
 'Demographic dependency ratio, by dependent groups and sex',
 'Structure of the total population by sex and age group',
 'Annual growth rate of the total population, by age group',
 'Crude birth rate',
 'Total fertility rate',
 'Adolescent birth rate (per 1,000 women aged 15-19 and 10-14 years) SP_DYN_ADKL',
 'Life expectancy at birth, by sex',
 'Crude death rate',
 'Infant mortality rate, by sex',
 'Under-five mortality rate, by sex',
 'Migration rate',
 'Relative distribution of mothers by single age (10 to 50 and over) and number of children (1-20 and over). Percentage.',
 'School dropout due to parenting duties',
 'Proportion of women aged 15-19 years who are mothers.',
 'Percentage of live births in adolescence and adolescence that were unplanned.',
 'Children ever born by years of schooling (grouped) and single age 10 - 50 and over (absolute number) (Missing motherhood assigned to zero child)\r\n',
 'Population gr

In [11]:
# fint the one that has no indicator_id
for subjects_children_children_child in subjects_children_children_flatten:
	if "indicator_id" not in subjects_children_children_child:
		print(subjects_children_children_child)
		break

{'name': 'Population', 'order': 200, 'area_id': 2427, 'children': [{'name': 'Total population, by sex', 'order': 10, 'indicator_id': 4788}, {'name': 'Population, by age group, by sex', 'order': 20, 'indicator_id': 4789}, {'name': 'Demographic dependency ratio, by dependent groups and sex', 'order': 30, 'indicator_id': 4792}, {'name': 'Structure of the total population by sex and age group', 'order': 40, 'indicator_id': 4793}, {'name': 'Annual growth rate of the total population, by age group', 'order': 50, 'indicator_id': 4795}]}


In [12]:
indicator_id = 4788
request = f"https://api-cepalstat.cepal.org/cepalstat/api/v1/indicator/{indicator_id}/dimensions"
headers = {
	"accept": "application/json",
}
response = requests.get(request, headers=headers)
response.raise_for_status()
# get the dimensions
dimensions_raw = response.json()
dimensions = dimensions_raw["body"]["dimensions"]
dimensions_names = [dimension["name"] for dimension in dimensions]
dimensions_ids = [dimension["id"] for dimension in dimensions]
dimensions_names, dimensions_ids

(['Country__ESTANDAR', 'Years__ESTANDAR', 'Sex'], [208, 29117, 88622])

In [13]:
possible_values = {}
for dimension in dimensions:
	dimension_name = dimension["name"]
	members = dimension["members"]
	members_names = [member["name"] for member in members]
	members_ids = [member["id"] for member in members]
	possible_values[dimension_name] = dict(zip(members_names, members_ids))
possible_values

{'Country__ESTANDAR': {'Andean Community': 31807,
  'Andorra': 20376,
  'Angola': 43437,
  'Anguilla': 213,
  'Antigua and Barbuda': 214,
  'Argentina': 216,
  'Aruba': 217,
  'Australia': 43438,
  'Austria': 31770,
  'Bahamas': 218,
  'Barbados': 219,
  'Belgium': 31771,
  'Belize': 220,
  'Bermudas': 31799,
  'Bolivia (Plurinational State of)': 221,
  'Bonaire': 46571,
  'Brazil': 222,
  'British Virgin Islands': 243,
  'Bulgaria': 31772,
  'Canada': 43440,
  'Cape Verde': 43439,
  'Caribbean': 223,
  'Caribbean Netherlands': 43403,
  'Cayman Islands': 20442,
  'CELAC': 85583,
  'Central America': 209,
  'Central American Common Market (CACM)': 31808,
  'Chile': 224,
  'China': 43441,
  'Colombia': 225,
  'Common market of the south (MERCOSUR)': 31809,
  'Common market of the south (MERCOSUR), Bolivia (Plurinational State of) and Chile': 46573,
  'Costa Rica': 226,
  'Croatia': 43442,
  'Cuba': 249,
  'Curaçao': 43404,
  'Cyprus': 31773,
  'Czech Republic': 31791,
  'Democratic Peopl

In [14]:
possible_values_prompt = {key: list(value.keys()) for key, value in possible_values.items()}
possible_values_prompt

{'Country__ESTANDAR': ['Andean Community',
  'Andorra',
  'Angola',
  'Anguilla',
  'Antigua and Barbuda',
  'Argentina',
  'Aruba',
  'Australia',
  'Austria',
  'Bahamas',
  'Barbados',
  'Belgium',
  'Belize',
  'Bermudas',
  'Bolivia (Plurinational State of)',
  'Bonaire',
  'Brazil',
  'British Virgin Islands',
  'Bulgaria',
  'Canada',
  'Cape Verde',
  'Caribbean',
  'Caribbean Netherlands',
  'Cayman Islands',
  'CELAC',
  'Central America',
  'Central American Common Market (CACM)',
  'Chile',
  'China',
  'Colombia',
  'Common market of the south (MERCOSUR)',
  'Common market of the south (MERCOSUR), Bolivia (Plurinational State of) and Chile',
  'Costa Rica',
  'Croatia',
  'Cuba',
  'Curaçao',
  'Cyprus',
  'Czech Republic',
  'Democratic Peoples Republic of Korea (North Korea)',
  'Denmark',
  'Dominica',
  'Dominican Republic',
  'Ecuador',
  'Egypt',
  'El Salvador',
  'England',
  'Estonia',
  'Europe',
  'European Union',
  'European Union (15 countries)',
  'European 

In [15]:
dimensions_values = {"Country__ESTANDAR": ["Bolivia (Plurinational State of)", "Brazil"], "Years__ESTANDAR": ["2020"], "Sex": ["Men"]}

In [16]:
# dimensions_values_ids = {key: possible_values[key][dimensions_values[key]] for key in dimensions_names if dimensions_values[key] is not None}.values()
dimensions_values_ids = []
for key in dimensions_names:
	if dimensions_values[key] is not None:
		ks = dimensions_values[key]
		for k in ks:
			dimensions_values_ids.append(str(possible_values[key][k]))
members = ",".join(map(str,dimensions_values_ids))
# members = "216,88626,29195"
members, dimensions_values_ids

('221,222,29190,88626', ['221', '222', '29190', '88626'])

In [17]:
def make_request(url, headers=None, params=None):
    response = requests.get(url, headers=headers, params=params)
    response.raise_for_status()  # Raise an error for bad responses
    return response.json()

indicator_id = 4788

response = make_request(f"https://api-cepalstat.cepal.org/cepalstat/api/v1/indicator/{indicator_id}/data", 
                        params={"members": members})
response["body"]["data"]

[{'value': '5940.1',
  'source_id': 6547,
  'notes_ids': '',
  'iso3': 'BOL',
  'dim_208': 221,
  'dim_88622': 88626,
  'dim_29117': 29190},
 {'value': '102844.5',
  'source_id': 6547,
  'notes_ids': '',
  'iso3': 'BRA',
  'dim_208': 222,
  'dim_88622': 88626,
  'dim_29117': 29190}]

In [18]:
def get_key(possible_values, id):
	for area in possible_values:
		for key, value in possible_values[area].items():
			if id == value:
				return key
get_key(possible_values, 29190)

'2020'

In [19]:
indicator_name = "Total population, by sex"
prompt = f"{indicator_name}:\n"
for point in response["body"]["data"]:
	value = point["value"]
	ids = [val for key,val in point.items() if key not in ("value", "source_id", "notes_ids", "iso3")]
	dims = [get_key(possible_values, id) for id in ids]
	dims_str = ", ".join(dims)
	prompt += f"{dims_str}: {value}\n"
print(prompt)

Total population, by sex:
Bolivia (Plurinational State of), Men, 2020: 5940.1
Brazil, Men, 2020: 102844.5



In [20]:
import Levenshtein
import json

def get_closer_string(query: str, possible_values: list) -> str:
	min_distance = float("inf")
	closest_string = ""
	for value in possible_values:
		distance = Levenshtein.distance(query, value)
		if distance < min_distance:
			min_distance = distance
			closest_string = value
	return closest_string

none_values = {'Years__ESTANDAR': None}
response2 = {
  "Years__ESTANDAR": ["2023", "2024", "2025"]
}
for key in none_values.keys():
	if key in response2.keys() and response2[key] is not None:
		response_values = response2[key]
		if isinstance(response2[key], str):
			response_values = [response_values]
		response_values = [get_closer_string(value, possible_values[key]) for value in response_values]
		dimensions_values[key] = response_values
	else:
		dimensions_values[key] = None

In [24]:
dimensions_values = {'Years__ESTANDAR': ['2024', '2025'], 'City (Country)': 'Argentina'}
dimensions_names = ['Years__ESTANDAR', 'City (Country)']
possible_values = {'Years__ESTANDAR': {'1900': 68109, '1901': 68110, '1902': 68111, '1903': 68112, '1904': 68113, '1905': 68114, '1906': 68115, '1907': 68116, '1908': 68117, '1909': 68118, '1910': 68119, '1911': 68120, '1912': 68121, '1913': 68122, '1914': 68123, '1915': 68124, '1916': 68125, '1917': 68126, '1918': 68127, '1919': 68128, '1920': 68129, '1921': 68130, '1922': 68131, '1923': 68132, '1924': 68133, '1925': 68134, '1926': 68135, '1927': 68136, '1928': 68137, '1929': 68138, '1930': 68139, '1931': 68140, '1932': 68141, '1933': 68142, '1934': 68143, '1935': 68144, '1936': 68145, '1937': 68146, '1938': 68147, '1939': 68148, '1940': 68149, '1941': 68150, '1942': 68151, '1943': 68152, '1944': 68153, '1945': 68154, '1946': 68155, '1947': 68156, '1948': 68157, '1949': 68158, '1950': 29119, '1951': 29118, '1952': 29120, '1953': 29121, '1954': 29122, '1955': 29123, '1956': 29124, '1957': 29125, '1958': 29126, '1959': 29127, '1960': 29128, '1961': 29129, '1962': 29130, '1963': 29131, '1964': 29132, '1965': 29133, '1966': 29134, '1967': 29135, '1968': 29136, '1969': 29137, '1970': 29138, '1971': 29139, '1972': 29140, '1973': 29141, '1974': 29142, '1975': 29143, '1976': 29144, '1977': 29145, '1978': 29146, '1979': 29147, '1980': 29150, '1981': 29151, '1982': 29152, '1983': 29153, '1984': 29154, '1985': 29155, '1986': 29156, '1987': 29157, '1988': 29158, '1989': 29159, '1990': 29160, '1991': 29161, '1992': 29162, '1993': 29163, '1994': 29164, '1995': 29165, '1996': 29166, '1997': 29167, '1998': 29168, '1999': 29169, '2000': 29170, '2001': 29171, '2002': 29172, '2003': 29173, '2004': 29174, '2005': 29175, '2006': 29176, '2007': 29177, '2008': 29178, '2009': 29179, '2010': 29180, '2011': 29181, '2012': 29182, '2013': 29183, '2014': 29184, '2015': 29185, '2016': 29186, '2017': 29187, '2018': 29188, '2019': 29189, '2020': 29190, '2021': 29191, '2022': 29192, '2023': 29193, '2024': 29194, '2025': 29195, '2026': 29196, '2027': 29197, '2028': 29198, '2029': 29199, '2030': 29200, '2031': 29201, '2032': 29202, '2033': 29203, '2034': 29204, '2035': 29205, '2036': 29206, '2037': 29207, '2038': 29208, '2039': 29209, '2040': 29210, '2041': 29211, '2042': 29212, '2043': 29213, '2044': 29214, '2045': 29215, '2046': 29216, '2047': 29217, '2048': 29218, '2049': 29219, '2050': 29220, '2051': 32096, '2052': 32097, '2053': 32098, '2054': 32099, '2055': 32100, '2056': 32101, '2057': 32102, '2058': 32103, '2059': 32104, '2060': 32105, '2061': 32106, '2062': 32107, '2063': 32108, '2064': 32109, '2065': 32110, '2066': 32111, '2067': 32112, '2068': 32113, '2069': 32114, '2070': 32115, '2071': 32116, '2072': 32117, '2073': 32118, '2074': 32119, '2075': 32120, '2076': 32121, '2077': 32122, '2078': 32123, '2079': 32124, '2080': 32125, '2081': 32126, '2082': 32127, '2083': 32128, '2084': 32129, '2085': 32130, '2086': 32131, '2087': 32132, '2088': 32133, '2089': 32134, '2090': 32135, '2091': 32136, '2092': 32137, '2093': 32138, '2094': 32139, '2095': 32140, '2096': 32141, '2097': 32142, '2098': 32143, '2099': 32144, '2100': 32145}, 'City (Country)': {'BOGOTA D.C. (COLOMBIA)': 56132, 'MEXICO CITY (MEXICO)': 56135, 'MONTEVIDEO - DEPARTMENT (URUGUAY)': 56133, 'MUNICIPALITY OF SAO PAULO (BRASIL)': 56136, 'SANTIAGO METROPOLITAN REGION (CHILE)': 56134}}

In [26]:
dimensions_values_ids = []
for key in dimensions_names:
	if dimensions_values[key] is not None:
		ks = dimensions_values[key]
		for k in ks:
			print(key, k)
			dimensions_values_ids.append(str(possible_values[key][k]))
members = ",".join(map(str,dimensions_values_ids))

Years__ESTANDAR 2024
Years__ESTANDAR 2025
City (Country) A


KeyError: 'A'