In [1]:
import os
import glob
import numpy as np
import pandas as pd

In [2]:
src_dir = 'D:/Ameri_meteo_data/'
meteo_dir = src_dir + 'Merged/'
output_dir = src_dir + 'output/'

In [3]:
not_exist_site = src_dir + 'samples_out_of_range.csv'
site_mapping = src_dir + 'mapping.csv'
unique_site = src_dir + 'uniqueSite.csv'

In [4]:
not_exist_data = pd.read_csv(not_exist_site)
not_exist_list = not_exist_data['Site_ID'].to_list()

site_mapping_data = pd.read_csv(site_mapping)
#site_mapping_data

unique_site_data = pd.read_csv(unique_site)
unique_site_list = unique_site_data['Site_ID'].to_list()

In [5]:
meteo_files = glob.glob(meteo_dir+'*.csv')

In [6]:
ameri_sites_data = pd.read_csv('AmeriFlux_siteinfo.csv')
ameri_sites_list = ameri_sites_data['site_id'].to_list()
#ameri_sites_list

In [7]:
#split rows per years
# return year list and start and end position for each year
def split_records(src_data: pd.DataFrame) -> (list, list, list):
    years = []
    start_pos = []
    end_pos = []

    start_year = ''

    rows = src_data['Year'].count()

    for idx in range(rows):
        if start_year == '':
            start_year = src_data.iloc[idx]['Year']
            years.append(start_year)
            start_pos.append(idx)
        else:
            if start_year == src_data.iloc[idx]['Year']:
                _end = idx
            else:
                end_pos.append(_end)
                start_year = src_data.iloc[idx]['Year']
                years.append(start_year)
                start_pos.append(idx)

    end_pos.append(_end)
    
    return years, start_pos, end_pos

In [8]:
def create_vpd(row):
    sh = row['spfh']
    ta = row['tair']
    pa = row['psurf']/1000.0

    es = 0.6108 * np.exp((17.27*ta)/(ta+237.3)) * 1000
    ea = sh * (pa*1000) / (0.378*sh + 0.622)    # [Pa]
    if ea > es:
        ea = es
    vpd = (es - ea) /100.0

    return round(vpd,4)

In [9]:
selected_cols = ['tair', 'psurf','VPD_era', 'wind','precip','swdown','lwdown']

In [10]:
new_col_names = {'tair':'Ta_era', 'psurf':'Pa_era', 'wind':'WS_era','precip':'Precip_era', 'swdown':'Rg_era',
                 'lwdown':'LWin_era'}

In [11]:
for site_name in ameri_sites_list:
    if site_name in not_exist_list:
        continue
        
    unique_id = site_mapping_data[site_mapping_data['Site_ID'] == site_name]['Mapping_to_unique'].tolist()[0]
    if unique_id not in unique_site_list:
        print("An error site {} in mapping, please check".format(unique_id))
        
    meteo_file = meteo_dir + unique_id + '.csv'
    if not os.path.exists(meteo_file):
        continue
        
    dst_dir = output_dir + site_name + '/06_meteo_era/'
    if not os.path.exists(dst_dir):
        os.makedirs(dst_dir)
    
    print('Process site {} and file {}........'.format(site_name, meteo_file))
    df = pd.read_csv(meteo_file,dtype={'Year':int,'DOY':int,'Month':int,'Day':int,'Hour':int})
    #df['VPD_era'] = 0.0
    years, start_pos, end_pos = split_records(df)

    #site_name = 'US-Ne2'
    for year, start, end in zip(years, start_pos, end_pos):
        file_name = dst_dir + site_name + '_' + str(year) + '.csv'
        _data = df.iloc[start: end+1]
        _data['VPD_era'] = _data.apply (lambda row: create_vpd(row) , axis=1)
        _data = _data[selected_cols].rename(columns=new_col_names)
        _data['LWincalc_era'] = _data['LWin_era']
        _data = _data.interpolate()
        _data.to_csv(file_name,index=False)
        print("    Output {}".format(file_name))
        
    

Process site CA-ARB and file D:/Ameri_meteo_data/Merged/CA-ARB.csv........
Output file D:/Ameri_meteo_data/output/CA-ARB/06_meteo_era/CA-ARB_1980.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [12]:
tmp = df.iloc[100:200]
tmp

Unnamed: 0,Site_ID,Year,DOY,Month,Day,Hour,tair,precip,swdown,spfh,spRH,wind,lwdown,psurf
100,CA-ARB,1980,5,1,5,4,-20.36,0.0070,0.0,0.000645,86.195724,3.714970,207.21,1012.9832
101,CA-ARB,1980,5,1,5,5,-21.83,0.0070,0.0,0.000567,86.246930,3.481738,207.21,1014.3072
102,CA-ARB,1980,5,1,5,6,-23.30,0.0070,0.0,0.000489,84.782530,3.240988,156.70,1015.6378
103,CA-ARB,1980,5,1,5,7,-25.42,0.0000,0.0,0.000413,86.530390,3.308927,156.70,1015.5370
104,CA-ARB,1980,5,1,5,8,-27.55,0.0000,0.0,0.000336,85.597450,3.412243,156.70,1015.4360
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,CA-ARB,1980,9,1,9,3,-21.83,0.0447,0.0,0.000531,79.536550,5.258897,211.98,999.1003
196,CA-ARB,1980,9,1,9,4,-21.95,0.0106,0.0,0.000525,79.458350,5.355689,211.98,998.7671
197,CA-ARB,1980,9,1,9,5,-22.07,0.0106,0.0,0.000519,79.310540,5.441415,211.98,998.4404
198,CA-ARB,1980,9,1,9,6,-22.20,0.0105,0.0,0.000513,79.221280,5.531302,201.03,998.1072


In [46]:
a_row = df.iloc[111:222]
a_row['tair']

111   -32.53
112   -30.71
113   -28.88
114   -27.06
115   -26.56
       ...  
217   -22.85
218   -22.83
219   -22.81
220   -24.63
221   -26.44
Name: tair, Length: 111, dtype: float64

In [14]:
tmp['VPD_era'] = tmp.apply (lambda row: create_vpd(row) , axis=1)
_tmp_data = tmp[selected_cols].rename(columns=new_col_names)
_tmp_data['LWincalc_era'] = _tmp_data['LWin_era']

_tmp_data = _tmp_data.interpolate()
_tmp_data

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,Ta_era,Pa_era,VPD_era,WS_era,Precip_era,Rg_era,LWin_era,LWincalc_era
100,-20.36,1012.9832,1.1973,3.714970,0.0070,0.0,207.21,207.21
101,-21.83,1014.3072,1.0525,3.481738,0.0070,0.0,207.21,207.21
102,-23.30,1015.6378,0.9237,3.240988,0.0070,0.0,156.70,156.70
103,-25.42,1015.5370,0.7625,3.308927,0.0000,0.0,156.70,156.70
104,-27.55,1015.4360,0.6266,3.412243,0.0000,0.0,156.70,156.70
...,...,...,...,...,...,...,...,...
195,-21.83,999.1003,1.0532,5.258897,0.0447,0.0,211.98,211.98
196,-21.95,998.7671,1.0421,5.355689,0.0106,0.0,211.98,211.98
197,-22.07,998.4404,1.0311,5.441415,0.0106,0.0,211.98,211.98
198,-22.20,998.1072,1.0193,5.531302,0.0105,0.0,201.03,201.03
