In [1]:
import os
from pathlib import Path
import re
from datetime import date,datetime
from configparser import ConfigParser
import psycopg2
from psycopg2.extensions import AsIs
import pandas as pd
import pyprojroot

In [2]:
repodir = pyprojroot.find_root(pyprojroot.has_dir(".git"))

In [3]:
filename = Path(os.path.expanduser('~')) / ".database.ini"
section = 'iucnecoadm'

parser = ConfigParser()
parser.read(filename)
db = {}
if parser.has_section(section):
    params = parser.items(section)
    for param in params:
        db[param[0]] = param[1]
else:
    raise Exception('Section {0} not found in the {1} file'.format(section, filename))

params = db

In [4]:
conn = psycopg2.connect(**db)
cur = conn.cursor()

In [5]:
qry = """
SELECT code, biome_code, name, shortname, update, shortdesc, keyfeatures, distdesc 
FROM functional_groups 
ORDER BY biome_code ;"""
cur.execute(qry )
short_table = cur.fetchall()

In [6]:
qry = """
SELECT code, name,
et.description as traits, k.description as key_drivers, d.description as distribution, 
array_to_string(contributors,' :: '), 'Content version ' || version || ' updated ' || date(k.update)
FROM efg_key_ecological_drivers as k
LEFT JOIN efg_ecological_traits as et
    USING (code,language,version,contributors)
LEFT JOIN efg_distribution as d
    USING (code,language,version,contributors)
LEFT JOIN functional_groups
    USING(code)
WHERE language = 'en' AND version = 'v2.1';"""
cur.execute(qry )
long_table = cur.fetchall()

In [7]:
qry = """
SELECT code, ref_code, ref_cite, author_list,date,title,post_title,doi
FROM efg_references 
LEFT JOIN ref_list 
    USING (ref_code)
ORDER BY code;
"""
cur.execute(qry )
references = cur.fetchall()

In [8]:
cur.close()
conn.commit()
if conn is not None:
    conn.close()
    print('Database connection closed.')

Database connection closed.


In [9]:
short_table_df = pd.DataFrame(short_table,
                           columns=("code","biome code", "name", "short name", "update", "short description", "key features", "distribution summary"))

In [10]:
long_table_df = pd.DataFrame(long_table,
             columns=("code", "name", "ecosystem properties", "ecological drivers", "distribution", "contributors","update"))

In [11]:
refs_df = pd.DataFrame(references,
             columns=("code", "citation", "full reference", "author list", "date", "title", "post title","doi"))

In [12]:
refs_df.head()

Unnamed: 0,code,citation,full reference,author list,date,title,post title,doi
0,F1.1,"Meyer JL, Strayer DL, Wallace JB, Eggert SL, H...","Meyer JL, Strayer DL, Wallace JB, Eggert SL, H...","Meyer JL, Strayer DL, Wallace JB, Eggert SL, H...",2007,The contribution of headwater streams to biodi...,43: 86–103,10.1111/j.1752-1688.2007.00008.x
1,F1.1,"Meyer JL, Wallace JB 2001","Meyer JL, Wallace JB (2001) **Lost linkages an...","Meyer JL, Wallace JB",2001,,"Blackwell, Oxford",
2,F1.1,"Giller PS., Giller P., Malmqvist B 1998","Giller PS., Giller P., Malmqvist B (1998) **Th...","Giller PS., Giller P., Malmqvist B",1998,,"Oxford University Press, Oxford",
3,F1.2,"Tockner K, Malard, F, Ward JV 2000","Tockner K, Malard, F, Ward JV (2000) An extens...","Tockner K, Malard, F, Ward JV",2000,An extension of the flood pulse concept,14: 2861-2883,10.1002/1099-1085(200011/12)14:16/17<2861::aid...
4,F1.3,Olsson TI 1981,Olsson TI (1981) Overwintering of benthic macr...,Olsson TI,1981,Overwintering of benthic macroinvertebrates in...,4: 161-166,10.1111/j.1600-0587.1981.tb00993.x


In [13]:
outfile = "IUCN-GET-profiles-exported-%s.xlsx" % date.today()

In [14]:
with pd.ExcelWriter(repodir / 'data' / outfile) as writer:  
    short_table_df.to_excel(writer, sheet_name='Short description', index=False, freeze_panes=(1,1))
    long_table_df.to_excel(writer, sheet_name='Profile text', index=False, freeze_panes=(1,1))
    refs_df.to_excel(writer, sheet_name='List of References', index=False, freeze_panes=(1,2))