# Update profiles for functional groups of the IUCN Global Ecosystem typology

Scripts by José R. Ferrer-Paris

The scripts described in this document are used to:

- Read data from database
- Write static markdown pages for a Jekyll site

## Set-up
Load all the libraries we will need in this script:

In [2]:
import os
from pathlib import Path
import re
from datetime import datetime
from configparser import ConfigParser
import psycopg2
from psycopg2.extras import DictCursor
from psycopg2.extensions import AsIs
import shutil
import yaml

In [10]:
repodir = Path(os.path.expanduser('~')) / 'proyectos' / 'typology-website'


Read configuration parameters for the connection to the current version of the database:

In [3]:
filename = Path(os.path.expanduser('~')) / ".database.ini"
section = 'psqlaws'

parser = ConfigParser()
parser.read(filename)
db = {}
if parser.has_section(section):
    params = parser.items(section)
    for param in params:
        db[param[0]] = param[1]
else:
    raise Exception('Section {0} not found in the {1} file'.format(section, filename))

params = db

## Connect to database

In [4]:
conn = psycopg2.connect(**params)
cur = conn.cursor(cursor_factory=DictCursor)


## Copy DAM between repos
Assumming  `repo one` has been updated recently, copy files to `repo two`:

In [4]:
repo_one = repodir  / 'typology-map-content' / 'assets' / 'uploads'
repo_two =  repodir / 'Ecosystem-profiles-comments' / 'assets' / 'diagrams'

DAMfiles=list()
for fn in os.listdir(repo_one):
    if fn.find('diagram')>0:
        DAMfiles.append(fn)

In [43]:
for infile in DAMfiles:
    nfn = infile.replace('-diagram.png','')
    outfile = re.sub(r'([mfts]+)_([0-9])_([0-9])',r'\1\2.\3',nfn).upper() +'.png'
    source =  repo_one / infile
    destination =  repo_two / outfile
    # copy only files
    if os.path.isfile(source):
        shutil.copy(source, destination)
    

## Add references to data folder

In [6]:
outfile =  repodir / 'Ecosystem-profiles-comments' / '_data' / 'references.yaml'
qry = """
SELECT ref_code,ref_markdown
FROM ref_list 
WHERE ref_code IN (SELECT distinct ref_code from map_references) 
OR ref_code IN (SELECT distinct ref_code from efg_references);
"""
cur.execute(qry)
references = cur.fetchall()
records=dict()
for item in references:
    records[item['ref_code']]=item['ref_markdown']

with open(outfile, 'w') as file:
    yaml.dump(records, file, allow_unicode=True, encoding = 'utf-8')


In [7]:
records['Abell et al. 2008']

'Abell R, Thieme ML, Revenga C, Bryer M, Kottelat M, Bogutskaya N, Coad B, Mandrak N, Contreras Balderas S, Bussing W, Stiassny MLJ, Skelton P, Allen GR, Unmack P, Naseka A, Ng R, Sindorf N, Robertson J, Armijo E, Higgins JV, Heibel TJ, Wikramanayake E, Olson D, López HL, Reis RE, Lundberg JG, Sabaj Pérez MH, Petry P (2008) *Freshwater ecoregions of the world: A new map of biogeographic units for freshwater biodiversity conservation*, **BioScience** 58: 403–414. DOI:[10.1641/B580507](https://doi.org/10.1641/B580507)'

## Add map information to data folder

In [211]:
outfile =  repodir / 'Ecosystem-profiles-comments' / '_data' / 'mapinfo.yaml'
qry="""
    SELECT map_code, map_version, code, map_source,contributors,status 
    FROM map_metadata WHERE status IN ('valid','replaced','superceded','superceeded') 
    ORDER BY map_version DESC;
    """
cur.execute(qry)
mapinfo = cur.fetchall()
records=dict()
record=dict()
record['code']='MISSING'
record['version']='MISSING'
record['description']='Map information is missing'
record['contributors']='MISSING'
records['MISSING']=record
for item in mapinfo:
    mapcode="%s_%s" % (item['map_code'],item['map_version'])
    record=dict()
    record['code']=item['map_code']
    record['efg']=item['code']
    record['version']=item['map_version']
    record['description']=item['map_source']
    record['contributors']=", ".join(item['contributors'])
    if item['status']=='valid':
        record['status']='valid'
    else:
        record['status']='replaced'
    records[mapcode]=record

with open(outfile, 'w') as file:
    yaml.dump(records, file)


In [229]:
outfile =  repodir / 'Ecosystem-profiles-comments' / '_data' / 'maprefs.yaml'
qry="""
    SELECT map_code,map_version,ARRAY_AGG(ref_code) as refs, ARRAY_AGG(dataset) as datasets
    FROM map_references 
    GROUP BY map_code, map_version;
    """
cur.execute(qry)
refs = cur.fetchall()

records=dict()
records['MISSING']={'references':'Missing references'}
for item in refs:
    mapcode="%s_%s" % (item['map_code'],item['map_version'])
    records[mapcode]={'references':item['refs'],
                      'datasets':item['datasets']}

with open(outfile, 'w') as file:
    yaml.dump(records, file)

In [230]:
outfile =  repodir / 'Ecosystem-profiles-comments' / '_data' / 'efgrefs.yaml'
qry="""
    SELECT code,ARRAY_AGG(ref_code) as refs 
    FROM efg_references 
    GROUP BY CODE
    """
cur.execute(qry)
refs = cur.fetchall()

records=dict()
records['MISSING']=('Missing references',)
for item in refs:

    records[item['code']]=item['refs']

with open(outfile, 'w') as file:
    yaml.dump(records, file)

## Update EFG content (short descriptions)

In [6]:
qry="""
SELECT code, f.biome_code as biome, f.name, f.shortname, b.name as biome_name,
    realms, f.update as original_date, shortdesc, keyfeatures, distdesc 
FROM functional_groups f
LEFT JOIN biomes b
    ON b.biome_code=f.biome_code
ORDER BY code
;"""
cur.execute(qry)
efgs = cur.fetchall()


In [7]:
len(efgs)
efgs[0]

['F1.1',
 'F1',
 'F1.1 Permanent upland streams',
 'F1.1 Perm upland streams',
 'F1. Rivers and streams biome',
 ['Freshwater'],
 datetime.datetime(2020, 6, 24, 9, 42, 1),
 'These small rivers or streams in mountainous or hilly areas are characterised by steep gradients and fast flow. They flow all year, increasing in wet periods, in humid tropical and temperate zones. Stones are common along their rapids and pools, turning over and oxygenating the water. Dependent organisms are specialised for these high flow-velocity environments, with resources for food webs derived mainly from the stream and inputs from adjacent and upstream vegetation.',
 'High-medium velocity, low-medium volume perennial flows with abundant benthic filter feeders, algal biofilms & small fish',
 'Global uplands with wet climates']

In [8]:
template = """---
name: {name}
shortname: {shortname}
biome: {biome_name}
realm: {realms}
code: {code}
biomecode: {biome}
---

{shortdesc}

### Key Features

{keyfeatures}.

### Overview of distribution

{distdesc}.
"""

In [11]:
for record in efgs:
    outfile =  repodir / 'Ecosystem-profiles-comments' / '_EFGs' / (record['code'] + '.md')
    outtext=template.format(**record)
    with open(outfile,'w') as f:
        f.write(outtext)

## Content versions and contributors

In [25]:
sections = ("Ecological Traits","Key Ecological Drivers","Distribution")
version = 'v2.1'
records=dict()
for efg in efgs:  
    record=dict()
    for section in sections:
        qry ="""
        SELECT description,contributors,version,update 
        FROM efg_%s 
        WHERE code = %s AND language = 'en'
        ORDER BY update DESC;
        """
        cur.execute(qry,(AsIs(section.lower().replace(' ','_')),efg['code']))
        sectinfo = cur.fetchall()
        
        for row in sectinfo:
            record[row['version']]={'contributors':row['contributors'],}
            if isinstance(row['update'],datetime):
                record[row['version']]['update']=row['update'].date()
            if row['version']==version:
                record[row['version']]['current']=True

        records[efg['code']]=(record)

In [26]:
record

{'v2.01': {'contributors': None},
 'v2.1': {'contributors': ['DA Keith',
   'RT Kingsford',
   'F Essl',
   'LJ Jackson',
   'M Kelly-Quinn',
   'KR Young',
   'T Tahvanainen'],
  'update': datetime.date(2022, 4, 6),
  'current': True},
 'v2.0': {'contributors': ['DA Keith',
   'RT Kingsford',
   'F Essl',
   'LJ Jackson',
   'M Kelly-Quinn',
   'KR Young',
   'T Tahvanainen'],
  'update': datetime.date(2020, 6, 3)},
 'v1.0': {'contributors': ['DA Keith',
   'RT Kingsford',
   'F Essl',
   'LJ Jackson',
   'T Tahvanainen'],
  'update': datetime.date(2020, 1, 20)}}

In [27]:
outfile =  repodir / 'Ecosystem-profiles-comments' / '_data' / 'efgversions.yaml'
with open(outfile, 'w') as file:
    yaml.dump(records, file, allow_unicode=True, encoding = 'utf-8')


In [170]:
for record in records:
    outfile =  repodir / 'Ecosystem-profiles-comments' / '_EFGs' / (record['code'] + '.md')
    outtext=template.format(**record)
    with open(outfile,'w') as f:
        f.write(outtext)

In [197]:
sections = ("Ecological Traits","Key Ecological Drivers","Distribution")
version = 'v2.1'
records=list()
for efg in efgs:
    qry="""
    SELECT code,f.biome_code,f.name,b.name as biome_name,realms,f.update as original_date 
    FROM functional_groups f
    LEFT JOIN biomes b
        ON b.biome_code=f.biome_code
    WHERE code = %s
    ;"""
    cur.execute(qry,(efg[0],))

    efginfo = cur.fetchone()
    record=dict()
    
    record['name']=efginfo['name']
    record['biome']=efginfo['biome_name']
    record['realm']=efginfo['realms']
    record['code']=efginfo['code']
    record['biomecode']=efginfo['biome_code']
    record['contributors']=list()
    
    for section in sections:
        qry ="""
        SELECT description,contributors,version,update 
        FROM efg_%s 
        WHERE code = %s AND language = 'en' AND version=%s
        ORDER BY update DESC;
        """
        cur.execute(qry,(AsIs(section.lower().replace(' ','_')),efginfo['code'],version))
        sectinfo = cur.fetchone()
        record[section]=sectinfo['description']
        for author in sectinfo['contributors']:
            if author not in record['contributors']:
                record['contributors'].append(author)
        record['version']="%s (%s)" % (sectinfo['version'],sectinfo['update'].date())

    reflist1=list()
    reflist2=list()

    
    qry="""
    SELECT map_code, map_version 
    FROM map_metadata WHERE code = %s AND status='valid' AND map_type='Indicative Map' 
    ORDER BY map_version DESC;
    """
    cur.execute(qry,(efg[0],))
    mapinfo = cur.fetchone()
    if mapinfo is not None:
        record['mapcode']="%s_%s" % (mapinfo['map_code'],mapinfo['map_version'])
    else:
        record['mapcode']="MISSING" 


    record['realmstr']=", ".join(record['realm'])
    record['contributorstr']=", ".join(record['contributors'])
    record['DAMtext']='{% include DAM.html %}'
    
    qry="""
    SELECT ref_cite 
    FROM efg_references as e 
    LEFT JOIN ref_list as l ON e.ref_code=l.ref_code 
    WHERE code = %s
    ORDER BY ref_cite;
    """
    cur.execute(qry,(efg[0],))
    refs = cur.fetchall()
    
    for ref in refs:
        if ref[0] is not None:
            reflist1.append("* "+ref[0])
    
    
    #record['maprefstr']='{% for ref in map.contributors %}\n* {{ref}}\n{% endfor %}'
    
    #record['mainrefstr']="\n".join(reflist1)
    
    records.append(record)

## Add maps with version codes

## Add map descriptions

## Close database connection

In [227]:

cur.close()
        
if conn is not None:
    conn.close()
    print('Database connection closed.')

Database connection closed.
