## 1. Introduction

The data presents the annual payments (not cumulative) made by the European Commission from the EU budget to the Member States broken down by Fund and by year. The totals paid by year (across funds) are also provided by Member State. The payment amounts show the payments made during each year broken down by interim payments and pre-financing. The % column show the % paid to each Member state compared to total available budget. 


## 2. Analysis preparation


### 2.1. Load packages

In [None]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns 
import datetime as dt
import folium
from folium.plugins import HeatMap, HeatMapWithTime
%matplotlib inline
import warnings
warnings.simplefilter(action='ignore')


### 2.2. Load data

In [None]:
data_df = pd.read_csv("/kaggle/input/european-union-structural-founds-absorption-rate/eu_sf_far_2007_2013.csv")

## 3. Data analysis


### 3.1. Glimpse the data

In [None]:
data_df.shape

In [None]:
data_df.head()

In [None]:
data_df.info()

In [None]:
data_df.describe()

### 3.2. Data visualization


Let's see the distribution of years, fund type and total paid, interim, advance and percent.



In [None]:
def plot_count(feature, value, title, df, size=1):
    f, ax = plt.subplots(1,1, figsize=(4*size,4))
    df = df.sort_values([value], ascending=False).reset_index(drop=True)
    g = sns.barplot(df[feature], df[value], palette='Set3')
    g.set_title("Number of {}".format(title))
    ax.set_xticklabels(ax.get_xticklabels(),rotation=90)
    plt.show()    

What type of founds there are?

In [None]:
data_df.fund.unique()

Let's look to the time distribution of total values for each fund.

In [None]:
_d_df = data_df.loc[data_df.fund=='TOTAL']
d_df = pd.DataFrame(_d_df.groupby(['year'])['total_paid'].sum()).reset_index()
plt.plot(d_df['year'], d_df['total_paid'])
plt.title("Total founds paid (2007-2016)")
plt.xlabel("Year")
plt.ylabel('Fund: Total')
plt.show()

In [None]:
_d_df = data_df.loc[data_df.fund=='ESF']
d_df = pd.DataFrame(_d_df.groupby(['year'])['total_paid'].sum()).reset_index()
plt.plot(d_df['year'], d_df['total_paid'])
plt.title("Total ESF founds paid (2007-2016)")
plt.xlabel("Year")
plt.ylabel('Fund: ESF')
plt.show()

In [None]:
_d_df = data_df.loc[data_df.fund=='ERDF']
d_df = pd.DataFrame(_d_df.groupby(['year'])['total_paid'].sum()).reset_index()
plt.plot(d_df['year'], d_df['total_paid'])
plt.title("Total ERDF founds paid (2007-2016)")
plt.xlabel("Year")
plt.ylabel('Fund: ERDF')
plt.show()

In [None]:
_d_df = data_df.loc[data_df.fund=='CF']
d_df = pd.DataFrame(_d_df.groupby(['year'])['total_paid'].sum()).reset_index()
plt.plot(d_df['year'], d_df['total_paid'])
plt.title("Total CF founds paid (2007-2016)")
plt.xlabel("Year")
plt.ylabel('Fund: CF')
plt.show()

Let's visualize total values per country per year.

In [None]:
def plot_time_variation_countries(df, countries, fund='TOTAL', payment='total_paid', size=3, is_log=False):
    f, ax = plt.subplots(1,1, figsize=(6*size,3*size))
    for country in countries:
        df_ = df[(df['country']==country) & (df['fund']==fund)] 
        df_[payment] = df_[payment].apply(lambda x: x+1)
        g = sns.lineplot(x="year", y=payment, data=df_)  
        ax.text(max(df_['year']), (df_.loc[df_['year']==max(df_['year']), payment]), str(country))
    plt.xlabel('year')
    plt.ylabel(f'Fund: {fund} values: {payment}')
    plt.title(f'Fund: {fund} values: {payment}')
    plt.xticks(rotation=90)
    if(is_log):
        ax.set(yscale="log")
    ax.grid(color='black', linestyle='dotted', linewidth=0.75)
    plt.show()  

In [None]:
countries = data_df.country.unique()

In [None]:
plot_time_variation_countries(df = data_df, countries = countries, fund='TOTAL', payment='total_paid', size=3, is_log=True)

In [None]:
plot_time_variation_countries(df = data_df, countries = countries, fund='ESF', payment='total_paid', size=3, is_log=True)

In [None]:
plot_time_variation_countries(df = data_df, countries = countries, fund='ERDF', payment='total_paid', size=3, is_log=True)

In [None]:
import os
country_codes_df = pd.read_csv(os.path.join("/kaggle", "input", "iso-country-codes-global", "wikipedia-iso-country-codes.csv"))
country_codes_df.columns = ['country', 'C2', 'C3', 'numeric', 'iso']
country_codes_df.head()

In [None]:
data_sel_df = data_df.merge(country_codes_df, on="country")
data_sel_df = data_sel_df.sort_values(by='year')

In [None]:
data_sel_df.head()

In [None]:
import plotly.express as px

def plot_animated_map(dd_df, criterion="total_paid", fund='TOTAL', title=""):
    d_df = dd_df.loc[dd_df.fund==fund]
    hover_text = []
    for index, row in d_df.iterrows():
        hover_text.append((f"country: {row['country']}\
        <br>total paid: {row['total_paid']}\
        <br>advance: {row['advance']}\
        <br>interim: {row['interim']}\
        <br>absorption: {row['absorption']}\
        <br>country code: {row['iso']}\
        "))
        
    d_df['hover_text'] = hover_text

    fig = px.choropleth(d_df, 
                        locations="C3",
                        hover_name='hover_text',
                        color=criterion,
                        animation_frame="year",
                        projection="natural earth",
                        color_continuous_scale=px.colors.sequential.Plasma,
                        width=600, height=600)
    fig.update_geos(   
        showcoastlines=True, coastlinecolor="DarkBlue",
        showland=True, landcolor="LightGrey",
        showocean=True, oceancolor="LightBlue",
        showlakes=True, lakecolor="Blue",
        showrivers=True, rivercolor="Blue",
        showcountries=True, countrycolor="DarkBlue"
    )
    fig.update_layout(title = title, geo_scope="europe")
    fig.show()

In [None]:
plot_animated_map(data_sel_df, fund='TOTAL', title="Total structural found per country per year")

In [None]:
plot_animated_map(data_sel_df, fund='TOTAL', title="TOTAL structural founds per country per year")

In [None]:
plot_animated_map(data_sel_df, criterion="absorption", fund='TOTAL', title="TOTAL structural founds absorption per country per year")

In [None]:
plot_animated_map(data_sel_df, criterion="absorption", fund='ESF', title="ESF structural founds absorption per country per year")