In [132]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import calendar
%matplotlib widget
plt.ion()
sns.set(rc={'figure.figsize': (11, 4)})
import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [114]:
ds = pd.read_csv("../data/ddf_combined_mlflx.csv")

In [115]:
sites = ds.sitename.unique()

In [123]:
# For every site, for every year
# take the average and sd of every month
# and fill the daily nans of that month with normal(average, sd) over the year - reproduce the trend

def impute_site(ds, site):
    # usage: ds[ds['sitename'] == site][["GPP_NT_VUT_REF"] = impute_site(ds, site)
    temp_df = ds[ds['sitename'] == site][["date", "GPP_NT_VUT_REF"]]
    temp_df['date'] = pd.to_datetime(temp_df['date'], format="%Y-%m-%d")
    temp_df = temp_df.set_index("date")
    m = temp_df.groupby(temp_df.index.month).mean()
    sd = temp_df.groupby(temp_df.index.month).std()
    
    for index, row in temp_df.iterrows():
        month = index.month - 1
        if pd.isnull(row['GPP_NT_VUT_REF']):
            row['GPP_NT_VUT_REF'] = np.random.normal(m.iloc[month], sd.iloc[month], size=1)[0]
    return temp_df['GPP_NT_VUT_REF']
    

In [129]:
l = len(sites)
for i, site in enumerate(sites):
    print(f"Site: {i+1}/{l} - {site}")
    ds.loc[ds['sitename'] == site, "GPP_NT_VUT_REF"] = impute_site(ds, site).values

Site: 1/71 - AR-Vir
Site: 2/71 - AU-Ade
Site: 3/71 - AU-ASM
Site: 4/71 - AU-DaP
Site: 5/71 - AU-DaS
Site: 6/71 - AU-Dry
Site: 7/71 - AU-Fog
Site: 8/71 - AU-Gin
Site: 9/71 - AU-How
Site: 10/71 - AU-Stp
Site: 11/71 - AU-Whr
Site: 12/71 - AU-Wom
Site: 13/71 - BE-Bra
Site: 14/71 - BE-Vie
Site: 15/71 - CH-Fru
Site: 16/71 - CH-Lae
Site: 17/71 - CH-Oe1
Site: 18/71 - CN-Cng
Site: 19/71 - CN-Qia
Site: 20/71 - CZ-wet
Site: 21/71 - DE-Akm
Site: 22/71 - DE-Geb
Site: 23/71 - DE-Gri
Site: 24/71 - DE-Hai
Site: 25/71 - DE-Kli
Site: 26/71 - DE-Obe
Site: 27/71 - DE-RuR
Site: 28/71 - DE-Spw
Site: 29/71 - DE-Tha
Site: 30/71 - DK-NuF
Site: 31/71 - DK-Sor
Site: 32/71 - FI-Hyy
Site: 33/71 - FI-Sod
Site: 34/71 - FR-Fon
Site: 35/71 - FR-LBr
Site: 36/71 - FR-Pue
Site: 37/71 - IT-Col
Site: 38/71 - IT-Cp2
Site: 39/71 - IT-Cpz
Site: 40/71 - IT-Isp
Site: 41/71 - IT-Lav
Site: 42/71 - IT-MBo
Site: 43/71 - IT-Noe
Site: 44/71 - IT-PT1
Site: 45/71 - IT-Ren
Site: 46/71 - IT-Ro1
Site: 47/71 - IT-SR2
Site: 48/71 - IT-SRo
S

In [133]:
def plot_target_by_months(ds, site, year):
    site_df = ds[ds['sitename'] == site][["date", "GPP_NT_VUT_REF"]]
    site_df['date'] = pd.to_datetime(site_df['date'], format="%Y-%m-%d")
    site_df = site_df.set_index("date")

    year_df = site_df[site_df.index.year == year]
    # year_df.groupby(year_df.index.month).boxplot()
    year_df['month'] = year_df.index.month
    year_df['month'] = year_df['month'].apply(lambda x: calendar.month_abbr[x])
    %matplotlib widget
    plt.ion()
    sns.boxplot(data=year_df, x='month', y="GPP_NT_VUT_REF").set_title(site + " in " + str(year))

In [134]:
plot_target_by_months(ds, "AR-Vir", 2009)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [137]:
plot_target_by_months(ds, "AR-Vir", 2012)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …