In [7]:
import numpy as np
import pandas as pd
from ydata_profiling import ProfileReport
import chardet
import glob
import os
import shutil
import seaborn as sns
import matplotlib.pyplot as plt
import unicodedata
import sys
import cfunits

# import custom modules
sys.path.append('../scripts')
import funcs

---
---
## 1. Load the data <a class="anchor" id="load-data"></a>
NOTE: If certain fancy characters aren't used in a file, an encoder will pick it up as us/ascii even if I've explicitly encoded it as utf-8-sig, and that's fine. UTF-8-SIG just ensures that the .csv is read properly in Excel on a Mac AND a windows computer.

In [8]:
# import LCE site data
filename = 'anderegg_2018'

# ensure readable encoding of summary data (01) and raw curve data (02)
d1_path = f'../data/input/{filename}/anderegg_2018.csv'
utf8_d1_path = funcs.convert_to_utf8sig(d1_path)
d2_path = f'../data/input/{filename}/anderegg_2018_supp1.csv'
utf8_d2_path = funcs.convert_to_utf8sig(d2_path)

# ensure readable encoding of metadata files (01, 02)
metadata_path = f'../data/input/{filename}/anderegg_2018_supp2.csv'
utf8_metadata_path = funcs.convert_to_utf8sig(metadata_path)

Encoding ascii was detected for ../data/input/anderegg_2018/anderegg_2018.csv.
Converted ../data/input/anderegg_2018/anderegg_2018.csv to utf-8-sig and saved as ../data/input/anderegg_2018/anderegg_2018_utf8sig.csv
Encoding UTF-8-SIG was detected for ../data/input/anderegg_2018/anderegg_2018_supp1.csv.
../data/input/anderegg_2018/anderegg_2018_supp1.csv is already in utf-8-sig encoding.
Encoding UTF-8-SIG was detected for ../data/input/anderegg_2018/anderegg_2018_supp2.csv.
../data/input/anderegg_2018/anderegg_2018_supp2.csv is already in utf-8-sig encoding.


In [57]:
# read the data
d1 = pd.read_csv(utf8_d1_path, index_col=0)
d2 = pd.read_csv(utf8_d2_path)

In [58]:
# join the data by species name (but of course the syntax are different)
# clean up syntax in d1
d1['Species'] = d1['Species'].str.lower().str.strip()
replacements = {'a. bidwillii':'austromyrtus bidwillii',
                'a. excelsa':'alphitonia excelsa',
                'annha':'annona hayesii',
                'astgr':'astronium graveolens',
                'b. australe':'brachychiton australis',
                'bursi':'bursera simaruba',
                'c. gillivraei':'cochlospermum gillivraei',
                'cavpl':'cavanillesia platanifolia',
                'cojru':'cojoba rufescens',
                'genam':'genipa americana',
                'quercus ilex_stpaul_les mages':'quercus ilex',
                'quercus ilex_stpaul_puechabon':'quercus ilex',
                'quercus ilex_stpaul_vic la gardiole':'quercus ilex',
                'eucualypus globulus':'eucalyptus globulus',
                'populus balsimifora':'populus balsamifera'
                }
d1['Species'] = d1['Species'].replace(replacements)

In [59]:
# clean up syntax in d2
d2['Species name'] = d2['Species name'].str.lower().str.strip()
d2['Species name'] = d2['Species name'].replace({'populus balsamifora':'populus balsamifera'})

In [61]:
data = d1.merge(d2, left_on='Species', right_on='Species name', how='left')
data

Unnamed: 0,Pathway,Type,Plantform,Leafspan,Tregion,Wregion,Wregion2,opt,Date,Time,...,Photo,VPD,Ci,Species name,Biome,Wb c,Wb d,N,Vcmax,Reference_y
0,C3,angiosperm,tree,evergreen,temperate,humid,0.977,opt,20/07/2009,7:03:51,...,7.037894,1.830000,165.275368,quercus ilex,BET,4.2,1.4,110,V,St. Paul et al. 2012
1,C3,angiosperm,tree,evergreen,temperate,humid,0.977,opt,28/04/2009,7:27:24,...,13.168188,1.360000,225.796292,quercus ilex,BET,4.2,1.4,110,V,St. Paul et al. 2012
2,C3,angiosperm,tree,evergreen,temperate,humid,0.977,opt,27/11/2009,8:52:30,...,14.126326,1.310000,251.358395,quercus ilex,BET,4.2,1.4,110,V,St. Paul et al. 2012
3,C3,angiosperm,tree,evergreen,temperate,humid,0.977,opt,21/06/2009,11:12:15,...,6.721385,1.910000,215.880258,quercus ilex,BET,4.2,1.4,110,V,St. Paul et al. 2012
4,C3,angiosperm,tree,evergreen,temperate,humid,0.977,opt,20/07/2009,11:30:24,...,3.526875,4.370000,236.524749,quercus ilex,BET,4.2,1.4,110,V,St. Paul et al. 2012
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3693,,,,,,,,,,,...,20.327120,2.135457,227.486152,eucalyptus globulus,BET,1.6,1.4,73,81.7,Hernandez et al. 2016
3694,,,,,,,,,,,...,19.286368,2.480808,191.230231,eucalyptus globulus,BET,1.6,1.4,73,81.7,Hernandez et al. 2016
3695,,,,,,,,,,,...,18.756451,2.241162,196.043212,eucalyptus globulus,BET,1.6,1.4,73,81.7,Hernandez et al. 2016
3696,,,,,,,,,,,...,20.703066,2.046599,210.016550,eucalyptus globulus,BET,1.6,1.4,73,81.7,Hernandez et al. 2016
