# Set up

Load libraries

In [1]:
import os
import sys, getopt
from pathlib import Path
import xml.etree.cElementTree as ET
import pandas as pd

Libraries for database access

In [2]:
from configparser import ConfigParser
import psycopg2
from psycopg2.extras import DictCursor
from psycopg2.extensions import AsIs

## Database access

In [3]:
filename = Path(os.path.expanduser('~')) / ".database.ini"
section = 'iucnecoadm'

parser = ConfigParser()
parser.read(filename)
db = {}
if parser.has_section(section):
    params = parser.items(section)
    for param in params:
        db[param[0]] = param[1]
else:
    raise Exception('Section {0} not found in the {1} file'.format(section, filename))

In [4]:
conn = psycopg2.connect(**db)
cur = conn.cursor(cursor_factory=DictCursor)

## Database query

In [5]:
qry = """
SELECT eco_id,eco_name,eco_name_orig,external_code,efg_code,level,membership,assigned_by 
FROM rle.assessment_get_xwalk
LEFT JOIN rle.assessment_units USING(eco_id)
"""
cur.execute(qry)
xwalks = cur.fetchall()
df_from_sql=pd.DataFrame(xwalks,
                columns=['eco_id','eco_name','eco_name_orig','external_code','efg_code','level','membership','assigned_by'])

Close database connection


In [6]:
cur.close()
        
if conn is not None:
    conn.close()
    print('Database connection closed.')

Database connection closed.


# Read XML database

Define function to read selected nodes from XML

In [7]:
def extract_IUCN_GET_classification(file):
    records=list()
    tree = ET.parse(file)
    root = tree.getroot()
    for CS in root:
        record={'id':CS.attrib['id'],'name':CS.attrib['name']}
        for AT in CS.findall('Assessment-Target'):
            record['AT-id']=AT.find('AT-id').text

            for elem in AT.findall("Classifications/Classification-system[@id='IUCN Global Typology']"):
                record['version']=elem.attrib['version']
                record['assigned-by']=elem.attrib['assigned-by']
                for subelem in elem:
                    record['level' + subelem.attrib['level']]=subelem.text
            records.append(record)
    return(records)

Path to the XML collection

In [8]:
xmldb = Path(os.path.expanduser('~')) / 'proyectos' / 'IUCN-RLE' / 'RLE_DB' 

Iterate through folders to read all xml files:

In [9]:
records=list()
for pub_status in ['3.Published','2.Not published' ]:
    for asm_type in ['Strategic','Systematic']:
        xmldir = xmldb / pub_status / asm_type
        for xmlfile in os.listdir(xmldir):
            new_records=extract_IUCN_GET_classification(xmldir / xmlfile)
            records.extend(new_records)
            

In [10]:
print(xmlfile)
len(records)

Pliscoff_RLE_Chile_2015.xml


638

In [11]:
records[10]

{'id': 'Oliveira_TepuiShrublands_2013_1',
 'name': 'Tepui shrublands, Southern Venezuela, LC',
 'AT-id': 'VEN_Tepui_Shrublands_1',
 'version': 'draft',
 'assigned-by': 'RLE team',
 'level1': 'Terrestrial',
 'level2': 'T3. Shrublands and shrubby woodlands',
 'level3': 'T3.1 Seasonally dry tropical shrublands'}

In [12]:
df_from_xml=pd.DataFrame(records)
df_from_xml.tail()

Unnamed: 0,id,name,AT-id,version,assigned-by,level1,level2,level3
633,MAC_RLE_Chile_2015_123,123. Temperate Mediterranean steppe of Festuca...,CHL_MAC_RLE_Chile_2015_123,,,,,
634,MAC_RLE_Chile_2015_124,124. Eastern temperate Mediterranean steppe of...,CHL_MAC_RLE_Chile_2015_124,,,,,
635,MAC_RLE_Chile_2015_125,125. Temperate eastern steppe of Festuca graci...,CHL_MAC_RLE_Chile_2015_125,,,,,
636,MAC_RLE_Chile_2015_126,126. Temperate eastern steppe of Festuca graci...,CHL_MAC_RLE_Chile_2015_126,,,,,
637,MAC_RLE_Chile_2015_127,127. Eastern temperate steppe of Festuca graci...,CHL_MAC_RLE_Chile_2015_127,,,,,


In [13]:
df_from_xml['assigned-by'].unique()
df_from_xml['version'].unique()

array(['draft', 'Draft', nan], dtype=object)

In [16]:
list1=df_from_sql['eco_id'].unique()
list2=df_from_xml['id'].unique()

In [19]:
for j in list1:
    if j in list2:
        print(j)

Etter_RLE_Colombia_2015_44
Etter_RLE_Colombia_2015_72


In [34]:
df_from_sql[df_from_sql['eco_id']=='Ghoraba_Egypt_MarineBar_2021:1']

Unnamed: 0,eco_id,eco_name,eco_name_orig,external_code,efg_code,level,membership,assigned_by
290,Ghoraba_Egypt_MarineBar_2021:1,,Sand plain ecosystem,,MT2.1,6.0,1.0,JRFEP


In [35]:
df_from_xml[df_from_xml['id']=='Ghoraba_Egypt_MarineBar_2021_1']

Unnamed: 0,id,name,AT-id,version,assigned-by,level1,level2,level3


In [33]:
df_from_sql[df_from_sql['eco_id']!=df_from_sql['external_code']]

Unnamed: 0,eco_id,eco_name,eco_name_orig,external_code,efg_code,level,membership,assigned_by
288,IUCN_FR_Mangroves_Mayotte_2017:2,,Mangroves mésohalines et mangroves estuariennes,,MFT1.2,6.0,1.0,JRFEP
289,IUCN_FR_Mangroves_Mayotte_2017:3,,"Tannes, prés-salés, mangroves et forêts supral...",,MFT1.2,6.0,1.0,JRFEP
290,Ghoraba_Egypt_MarineBar_2021:1,,Sand plain ecosystem,,MT2.1,6.0,1.0,JRFEP
291,Ghoraba_Egypt_MarineBar_2021:2,,Salt marshes,,MFT1.3,6.0,1.0,JRFEP
292,Uribe_Marine_Colombia_2020:1,,Manglares Caribe oceánico,,MFT1.2,6.0,1.0,JRFEP
293,Uribe_Marine_Colombia_2020:5,,Manglares Pacífico continental norte,,MFT1.2,6.0,1.0,JRFEP
294,Uribe_Marine_Colombia_2020:9,,Manglares Pacífico continental sur,,MFT1.2,6.0,1.0,JRFEP
295,Uribe_Marine_Colombia_2020:10,,Manglares Caribe continental,,MFT1.2,6.0,1.0,JRFEP
296,CaseStudies_RLE_Venezuela_2010:1,,Laguna de Tacarigua,,MFT1.2,6.0,1.0,JRFEP
297,CaseStudies_RLE_Venezuela_2010:2,,Ciénaga de Los Olivitos,,MFT1.2,6.0,1.0,JRFEP
