# Notebook to compute the Kd 490 anomaly 

In [1]:
# load libraries
import pandas as pd
import numpy as np

In [2]:
# Import file with processed data in .h5 format and open it as a dataframe 'df'
## NOTE: This data can be avoided if a dtaframe is already created ##
with pd.HDFStore('df_Kd_490.h5','r') as input:
    df_Kd_490=input.get('df_Kd_490')

In [7]:
df_Kd_490.head(5)

Unnamed: 0,Kd_490,days,month,site_name,week,year
0,,1,1,IES,0,2003
1,,1,1,MAR,0,2003
2,,1,1,LRC,0,2003
3,,1,1,ELP,0,2003
4,,1,1,ESS,0,2003


In [8]:
def arrange_df (df):
    'This function reindex, sort_values, interpolate NaN´s and append the average of the week and average of month'
    df = df.reindex(columns=['site_name','year','month','week','days','Kd_490']) # Re-order by column starting by 'site_name'
    df = df.sort_values(by=['site_name','year','month','week','days']) # Re-order in ascendent mode
    df=df.interpolate() # interpolate missing values averaging the nearest values.
    df['Kd_W'] = df.groupby(['site_name','year','month','week'])['Kd_490'].transform('mean') # add columns
    df['Kd_M'] = df.groupby(['site_name','year','month'])['Kd_490'].transform('mean') 
    
    # compute monthly and weekly climatology
    df['Kd_Mclim'] = df.groupby(['site_name','month'])['Kd_490'].transform('mean')
    df['Kd_Wclim'] = df.groupby(['site_name','week'])['Kd_490'].transform('mean')
    
    # here we computed the weekly anomaly
    df['Kd_W_Anomaly'] = df['Kd_W'] - df ['Kd_Wclim']
    return df

In [9]:
df_Kd_490 = arrange_df(df_Kd_490)

In [11]:
## Round values to 3 digits 
df_Kd_490['Kd_490'] = round(df_Kd_490['Kd_490'],3)
df_Kd_490['Kd_M'] = round(df_Kd_490['Kd_M'],3)
df_Kd_490['Kd_W'] = round(df_Kd_490['Kd_W'],3)
df_Kd_490['Kd_Mclim'] = round(df_Kd_490['Kd_Mclim'],3)
df_Kd_490['Kd_Wclim'] = round(df_Kd_490['Kd_Wclim'],3)
df_Kd_490['Kd_W_Anomaly'] = round(df_Kd_490['Kd_W_Anomaly'],3)

In [None]:
df_Kd_490.to_csv('Kd_490.csv')