**Imports and Setup**

In [1]:
import os

import numpy as np
import pandas as pd
import requests
import dotenv
import json

import duckdb

# setup
API_KEY = dotenv.get_key(".env", "api_min")

# API root entry point
MINDAT_API_URL = "https://api.mindat.org"

# authorization header that must be included with each request.
headers = {'Authorization': 'Token ' + API_KEY}

# directories
DATA_DIR = os.path.join(os.path.abspath('./'), 'data')
QUERIES_DIR = os.path.join(os.path.abspath('./'), 'queries')


**Making a simple request**

In [68]:

# making a request

end_point = "/geomaterials/"
filter_dict = {
  'page':1,
  'page_size':1000
}

df_all = pd.DataFrame()
interest_cols = ['id','guid','name','colour','entrytype','mindat_formula','occurrence']

endpoint = MINDAT_API_URL+end_point
print("endpoint", endpoint)
response = requests.get(endpoint,params=filter_dict,headers=headers)

mineral_list = response.json()['results']
df_page = pd.DataFrame.from_dict(mineral_list)
df_page[interest_cols][:10]

endpoint https://api.mindat.org/geomaterials/


Unnamed: 0,id,guid,name,colour,entrytype,mindat_formula,occurrence
0,1,464e5cfa-be77-4568-a724-62137f35df18,Abelsonite,"Pink-purple, dark greyish purple, pale purplis...",0,Ni(C<sub>31</sub>H<sub>32</sub>N<sub>4</sub>),Mahogany Zone oil shale; found in six stratigr...
1,2,fb40f3e1-f058-46fe-9008-e0036aae3ec7,Abenakiite-(Ce),Pale brown,0,Na<sub>26</sub>Ce<sub>6</sub>(Si<sub>6</sub>O<...,In a xenolith of sodalite syenite. A late-stag...
2,3,7e270e8f-93b1-419c-b49e-217d547d3df7,Abernathyite,yellow,0,K(UO<sub>2</sub>)(AsO<sub>4</sub>)&middot;3H<s...,Colorado Plateau uranium-vanadium deposit
3,4,7b925f9e-322d-4ad2-9689-03849f7ea289,Abhurite,Colourless,0,Sn<sub>21</sub>Cl<sub>16</sub>(OH)<sub>14</sub...,On the surface of a tin ingot recovered from a...
4,5,8c40e602-73b0-49c8-b514-475c99ea5831,Ablykite,,0,,
5,6,1d46f167-d1c9-494e-b46b-bfe43acc7596,Abraum Salts,,0,,
6,7,ba449d79-63ad-4cb6-ba7b-59a5a6385e66,Abrazite,,0,,
7,8,06e59140-bc5c-4b73-90dd-7e400da0a89b,Absite,,2,Ti<sub>7</sub>U<sub>2</sub>ThO<sub>20</sub>&mi...,
8,9,befbce36-2949-4a16-a3df-07b5844bb1e2,Abswurmbachite,Black,0,CuMn<sup>3+</sup><sub>6</sub>(SiO<sub>4</sub>)...,"In very low-grade, high pressure metamorphic q..."
9,10,42460b0a-a0f4-4a29-8046-035a65bb60c0,Acanthite,"lead grey, black",0,Ag<sub>2</sub>S,


**EXTRACT: Making page requests**

In [55]:
# making a request

endpoint = MINDAT_API_URL + "/geomaterials/"
# interest_cols = ['id','guid','name','colour','entrytype','mindat_formula','occurrence','crystal_system']

df_all = pd.DataFrame()
for page in range(1,60):
  filter_dict = {
    'page': page,
    'page_size':1000
  }
  
  response = requests.get(endpoint,params=filter_dict,headers=headers,timeout=5)

  try:
    mineral_list = response.json()['results']
    print('page', page, response)

    # data frame handling
    df_page = pd.DataFrame.from_dict(mineral_list)
    df_all = pd.concat([df_all, df_page])

  except Exception as e:
    print('Erro ao tentar criar dataframe!', 'pagina', page)

df_all.reset_index(inplace=True, drop=True)
df_all

page 1 <Response [200]>
page 2 <Response [200]>
page 3 <Response [200]>
page 4 <Response [200]>
page 5 <Response [200]>
page 6 <Response [200]>
page 7 <Response [200]>
page 8 <Response [200]>
page 9 <Response [200]>
page 10 <Response [200]>
page 11 <Response [200]>
page 12 <Response [200]>
page 13 <Response [200]>
page 14 <Response [200]>
page 15 <Response [200]>
Erro ao tentar criar dataframe! pagina 16
page 17 <Response [200]>
page 18 <Response [200]>
page 19 <Response [200]>
page 20 <Response [200]>
page 21 <Response [200]>
page 22 <Response [200]>
page 23 <Response [200]>
page 24 <Response [200]>
page 25 <Response [200]>
page 26 <Response [200]>
page 27 <Response [200]>
page 28 <Response [200]>
page 29 <Response [200]>
page 30 <Response [200]>
page 31 <Response [200]>
page 32 <Response [200]>
page 33 <Response [200]>
page 34 <Response [200]>
page 35 <Response [200]>
page 36 <Response [200]>
page 37 <Response [200]>
page 38 <Response [200]>
page 39 <Response [200]>
page 40 <Response

PermissionError: [Errno 13] Permission denied: 'c:\\Users\\lucas\\Desktop\\code\\mindapp\\data'

**LOAD**
* loading data into csv file using duckdb

In [None]:
# creating a duckdb object
df_duck = duckdb.from_df(df_all)
df_duck.write_csv(os.path.join(DATA_DIR, "mindapp.csv"), overwrite=True)

In [69]:
df_duck.columns

['id',
 'longid',
 'guid',
 'name',
 'updttime',
 'mindat_formula',
 'mindat_formula_note',
 'ima_formula',
 'ima_status',
 'ima_notes',
 'varietyof',
 'synid',
 'polytypeof',
 'groupid',
 'entrytype',
 'entrytype_text',
 'description_short',
 'impurities',
 'elements',
 'sigelements',
 'tlform',
 'cim',
 'occurrence',
 'otheroccurrence',
 'industrial',
 'discovery_year',
 'diapheny',
 'cleavage',
 'parting',
 'tenacity',
 'colour',
 'csmetamict',
 'opticalextinction',
 'hmin',
 'hardtype',
 'hmax',
 'vhnmin',
 'vhnmax',
 'vhnerror',
 'vhng',
 'vhns',
 'luminescence',
 'lustre',
 'lustretype',
 'aboutname',
 'other',
 'streak',
 'csystem',
 'cclass',
 'spacegroup',
 'a',
 'b',
 'c',
 'alpha',
 'beta',
 'gamma',
 'aerror',
 'berror',
 'cerror',
 'alphaerror',
 'betaerror',
 'gammaerror',
 'va3',
 'z',
 'dmeas',
 'dmeas2',
 'dcalc',
 'dmeaserror',
 'dcalcerror',
 'cleavagetype',
 'fracturetype',
 'morphology',
 'twinning',
 'epitaxidescription',
 'opticaltype',
 'opticalsign',
 'opticala

In [66]:
with open(os.path.join(QUERIES_DIR,'mineral_silver.sql')) as f:
  query = f.read()
  f.close()

duckdb.sql(query)

┌───────────┬──────────────────────┬──────────────────────┬───┬───────────────┬─────────────┬─────────────────────┐
│ idMineral │       descName       │      descColour      │ … │ descImaStatus │ idVarietyOf │     dtUpdatedAt     │
│   int64   │       varchar        │       varchar        │   │    varchar    │   varchar   │       varchar       │
├───────────┼──────────────────────┼──────────────────────┼───┼───────────────┼─────────────┼─────────────────────┤
│         1 │ abelsonite           │ pink-purple, dark …  │ … │ approved      │ 0           │ 2022-08-03 21:27:09 │
│         2 │ abenakiite-(ce)      │ pale brown           │ … │ approved      │ 0           │ 2023-09-11 22:14:49 │
│         3 │ abernathyite         │ yellow               │ … │ approved      │ 0           │ 2024-01-11 10:28:12 │
│         4 │ abhurite             │ colourless           │ … │ approved      │ 0           │ 2023-08-07 12:59:27 │
│         5 │ ablykite             │                      │ … │ NULL    

**TRASH**

In [42]:
# making a request

endpoint = MINDAT_API_URL + "/geomaterials/"
# interest_cols = ['id','guid','name','colour','entrytype','mindat_formula','occurrence','crystal_system']

df_teste = pd.DataFrame()

filter_dict = {
  'page': 49,
  'page_size':10
}

response = requests.get(endpoint,params=filter_dict,headers=headers)

try:
  mineral_list = response.json()['results']
  print('page', filter_dict['page'], response)
  
  # data frame handling
  print(type(mineral_list))
  df_teste = pd.DataFrame.from_dict(mineral_list)

except Exception as e:
  print('Erro ao tentar criar dataframe!', 'pagina', filter_dict['page'])

df_teste.reset_index(inplace=True, drop=True)
mineral_list[0]

page 49 <Response [200]>
<class 'list'>


{'id': 483,
 'longid': '1:1:483:5',
 'guid': 'bbd041d5-d635-49be-8cc0-387e5f0315d6',
 'name': 'Baghdadite',
 'updttime': '2022-08-02 10:20:20',
 'mindat_formula': 'Ca<sub>6</sub>Zr<sub>2</sub>(Si<sub>2</sub>O<sub>7</sub>)<sub>2</sub>O<sub>4</sub>',
 'mindat_formula_note': '',
 'ima_formula': 'Ca<sub>6</sub>Zr<sub>2</sub>(Si<sub>2</sub>O<sub>7</sub>)<sub>2</sub>O<sub>4</sub>',
 'ima_status': ['APPROVED'],
 'ima_notes': [],
 'varietyof': 0,
 'synid': 0,
 'polytypeof': 0,
 'groupid': 35647,
 'entrytype': 0,
 'entrytype_text': 'mineral',
 'description_short': '',
 'impurities': ',Al,Fe,Mg,Na,',
 'elements': ['Ca', 'Si', 'Zr', 'O'],
 'sigelements': ['Ca', 'Si', 'Zr', 'O'],
 'tlform': 'As four grains observed in polished thin sections.',
 'cim': '14.10.25',
 'occurrence': 'In a melilite sskarn in contact with a banded diorite.',
 'otheroccurrence': '',
 'industrial': '',
 'discovery_year': '',
 'diapheny': 'Transparent',
 'cleavage': '',
 'parting': '',
 'tenacity': '',
 'colour': 'Colourles