**Imports and Setup**

In [None]:
import os

import numpy as np
import pandas as pd
import requests
import dotenv
import json

import duckdb

# setup
API_KEY = dotenv.get_key(".env", "api_min")

# API root entry point
MINDAT_API_URL = "https://api.mindat.org"

# authorization header that must be included with each request.
headers = {'Authorization': 'Token ' + API_KEY}

# directories
DATA_DIR = os.path.join(os.path.abspath('./'), 'data')
QUERIES_DIR = os.path.join(os.path.abspath('./'), 'queries')

# fixing pandas analyzer
duckdb.execute("SET GLOBAL pandas_analyze_sample = 100_000")

**Making a simple request**

In [None]:

# making a request

end_point = "/geomaterials/"
filter_dict = {
  'page':1,
  'page_size':1000
}

df_all = pd.DataFrame()
interest_cols = ['id','guid','name','colour','entrytype','mindat_formula','occurrence']

endpoint = MINDAT_API_URL+end_point
print("endpoint", endpoint)
response = requests.get(endpoint,params=filter_dict,headers=headers)

mineral_list = response.json()['results']
df_page = pd.DataFrame.from_dict(mineral_list)
df_page[interest_cols][:10]

**EXTRACT: Making page requests**

In [None]:
# making a request

endpoint = MINDAT_API_URL + "/geomaterials/"
# interest_cols = ['id','guid','name','colour','entrytype','mindat_formula','occurrence','crystal_system']

df_all = pd.DataFrame()
for page in range(1,3):
  filter_dict = {
    'page': page,
    'page_size':1000
  }
  
  response = requests.get(endpoint,params=filter_dict,headers=headers,timeout=5)

  try:
    mineral_list = response.json()['results']
    print('page', page, response)

    # data frame handling
    df_page = pd.DataFrame.from_dict(mineral_list)
    df_all = pd.concat([df_all, df_page])

  except Exception as e:
    print('Erro ao tentar criar dataframe!', 'pagina', page)

df_all.reset_index(inplace=True, drop=True)
df_all

**LOAD**
* loading data into csv file using duckdb

In [None]:
# creating a duckdb object
df_duck = duckdb.from_df(df_all)
df_duck.write_csv(os.path.join(DATA_DIR, "bronze_geomaterials.csv"), overwrite=True)

**bronze_geomaterials**

In [11]:
bronze_geomaterials = duckdb.read_csv(os.path.join(DATA_DIR,'bronze_geomaterials.csv'))
bronze_geomaterials

┌───────┬─────────────┬──────────────────────┬──────────────────────┬───┬───────────────┬────────┬────────┬───────────┐
│  id   │   longid    │         guid         │         name         │ … │ shortcode_ima │ rimin  │ rimax  │ weighting │
│ int64 │   varchar   │       varchar        │       varchar        │   │    varchar    │ double │ double │   int64   │
├───────┼─────────────┼──────────────────────┼──────────────────────┼───┼───────────────┼────────┼────────┼───────────┤
│     1 │ 1:1:1:5     │ 464e5cfa-be77-4568…  │ Abelsonite           │ … │ Abl           │   NULL │   NULL │        13 │
│     2 │ 1:1:2:4     │ fb40f3e1-f058-46fe…  │ Abenakiite-(Ce)      │ … │ Abk-Ce        │   NULL │   NULL │        19 │
│     3 │ 1:1:3:3     │ 7e270e8f-93b1-419c…  │ Abernathyite         │ … │ Abn           │  1.608 │  1.608 │       103 │
│     4 │ 1:1:4:2     │ 7b925f9e-322d-4ad2…  │ Abhurite             │ … │ Abh           │   NULL │   NULL │       134 │
│     5 │ 1:1:5:1     │ 8c40e602-73b0-49

**silver_geomaterials**

In [32]:
with open(os.path.join(QUERIES_DIR,'silver_geomaterials.sql')) as f:
  query = f.read()
  f.close()

silver_geomaterials = duckdb.sql(query)

In [33]:
silver_geomaterials.write_csv(os.path.join(DATA_DIR, "silver_geomaterials.csv"), overwrite=True)

**TRASH**

In [None]:
# making a request

endpoint = MINDAT_API_URL + "/geomaterials/"
# interest_cols = ['id','guid','name','colour','entrytype','mindat_formula','occurrence','crystal_system']

df_teste = pd.DataFrame()

filter_dict = {
  'page': 49,
  'page_size':10
}

response = requests.get(endpoint,params=filter_dict,headers=headers)

try:
  mineral_list = response.json()['results']
  print('page', filter_dict['page'], response)
  
  # data frame handling
  print(type(mineral_list))
  df_teste = pd.DataFrame.from_dict(mineral_list)

except Exception as e:
  print('Erro ao tentar criar dataframe!', 'pagina', filter_dict['page'])

df_teste.reset_index(inplace=True, drop=True)
mineral_list[0]