In [1]:
import pandas as pd

In [2]:
data = pd.read_csv('data/full_data/result.csv')

In [3]:
data[(data.Variable == "AB1") & (data.Year == 2019) & (data.Aggregation == 'Indicator_normed')].dropna()

Unnamed: 0,ISO,Variable,Value,Aggregation,Year,Country,Continent,UNregion,IncomeLevel,Region
2820,ALB,AB1,52.38,Indicator_normed,2019.0,Albania,Europe,Southern Europe,Upper middle income,Europe & Central Asia
4308,DZA,AB1,13.02,Indicator_normed,2019.0,Algeria,Africa,Northern Africa,Lower middle income,Middle East & North Africa
5781,ASM,AB1,7.61,Indicator_normed,2019.0,American Samoa,Oceania,Polynesia,Upper middle income,East Asia & Pacific
7253,AND,AB1,95.05,Indicator_normed,2019.0,Andorra,Europe,Southern Europe,High income,Europe & Central Asia
14656,ARM,AB1,65.50,Indicator_normed,2019.0,Armenia,Asia,Western Asia,Upper middle income,Europe & Central Asia
...,...,...,...,...,...,...,...,...,...,...
340206,GBR,AB1,98.81,Indicator_normed,2019.0,United Kingdom,Europe,Northern Europe,High income,Europe & Central Asia
341694,USA,AB1,94.17,Indicator_normed,2019.0,United States,America,Northern America,High income,North America
346139,UZB,AB1,56.57,Indicator_normed,2019.0,Uzbekistan,Asia,Central Asia,Lower middle income,Europe & Central Asia
347612,VUT,AB1,40.96,Indicator_normed,2019.0,Vanuatu,Oceania,Melanesia,Lower middle income,East Asia & Pacific


In [None]:
data.replace('America', 'The Americas')

In [None]:
from processing.imputation import impute_data_using_rule, interpolate_linear


In [None]:
df = pd.read_csv('data/indicator/SL1/preprocessed/SL1_origin.M.csv')

In [None]:
df['URL'] = 'http://fenix.fao.org/faostat/internal/en/#data/ESB'

In [None]:
pd.read_csv('data/indicator/SL2/preprocessed/SL2_origin.M.csv')

In [None]:
pd.read_csv('data/indicator/SL2/processed/SL2_origin.M.csv')

In [None]:
df = pd.read_csv('data/indicator/SL1/raw/SL1_FAO.M.csv')

In [None]:
df[df['Value'] < 0] = 5 - df[df['Value'] < 0]

In [None]:
df.min()

In [None]:
import os
import pandas as pd
import plotly.express as px

indicators = [file for file in os.listdir('data/indicator') if len(file) == 3]

dfs = []

for indicator in indicators:
    for status in ['processed', 'preprocessed']:
        path = f'data/indicator/{indicator}/{status}'
        files = os.listdir(path)
        for file in files:
            df = pd.read_csv(f'{path}/{file}')
            df['status'] = status
            
            if status == 'processed':
                df['Variable'] = df['Indicator']
            df['Indicator'] = df['Variable'].apply(lambda x: x.split('.')[0])
            dfs.append(df)

data = pd.concat(dfs, axis=0)

In [None]:
indicator = 'SE2'
ISO = ['DJI']
df = data[(data.Indicator == indicator) & (data.ISO.isin(ISO))]

fig =px.scatter(df,
           x='Year',
           y='Value',
           facet_col='Variable',
           facet_col_wrap=2,
           symbol='From',
           color='status',
           hover_data={'Source': False,
                       'Description': False,
                       'Corrected': True,
                       'Imputed': True})
fig.update_yaxes(matches=None)
fig.update_yaxes(showticklabels=True, col=2)

# EXAMPLE

In [None]:
from processing.imputation import impute_data_using_rule, interpolate_linear
from processing.outliers_filtering import filter_outliers_from_ISO
import matplotlib.pyplot as plt
plt.style.use('ggplot')


In [None]:
def filter_outliers_from_ISO(df):
    ts = hampel(df['Value'].reset_index(drop=True), window_size=3, n=1).values
    df.loc[:, 'filtered_Value'] = ts
    df['Corrected'] = abs(df['filtered_Value'] - df['Value']) > 1e-3
    return df

In [None]:
ISO = 'QAT'
Variable = 'EW2'

df = data[(data.ISO == ISO) & (data.Variable == Variable) & (data.status == 'preprocessed')]

imputed_df = impute_data_using_rule(df, interpolate_linear)

processed_df = filter_outliers_from_ISO(imputed_df.copy())


plt.figure(figsize=(10, 6))
plt.plot(df.Year, df.Value, label='raw', marker='o')
plt.plot(imputed_df.Year, imputed_df.Value, label='imputed' , marker='o')
plt.plot(processed_df.Year, processed_df.filtered_Value, label='processed', marker='o')
plt.legend()
plt.show()