In [1]:
import pandas as pd
import numpy as np

import warnings
warnings.filterwarnings("ignore")

import sys
import os

# Get the directory where this notebook is located
notebook_dir = os.path.dirname(os.path.abspath(''))

# Add the PythonPrep directory to the path
pythonprep_dir = os.path.join(os.path.dirname(notebook_dir), 'PythonPrep') if 'PythonPrep' not in notebook_dir else notebook_dir
sys.path.append(pythonprep_dir)

from paths import main_path

In [2]:
df = pd.read_csv(main_path + '/Democracy/PythonData/tax-revenues-as-a-share-of-gdp-unu-wider.csv')
df


Unnamed: 0,Entity,Code,Year,Taxes including social contributions (as a share of GDP)
0,Afghanistan,AFG,2003,2.512631
1,Afghanistan,AFG,2004,4.076170
2,Afghanistan,AFG,2005,4.668273
3,Afghanistan,AFG,2006,7.115892
4,Afghanistan,AFG,2007,6.061553
...,...,...,...,...
6110,Zimbabwe,ZWE,2017,16.965162
6111,Zimbabwe,ZWE,2018,13.533631
6112,Zimbabwe,ZWE,2019,10.637546
6113,Zimbabwe,ZWE,2020,12.800961


In [3]:
df = df[df['Year'] >= 2001]
df

Unnamed: 0,Entity,Code,Year,Taxes including social contributions (as a share of GDP)
0,Afghanistan,AFG,2003,2.512631
1,Afghanistan,AFG,2004,4.076170
2,Afghanistan,AFG,2005,4.668273
3,Afghanistan,AFG,2006,7.115892
4,Afghanistan,AFG,2007,6.061553
...,...,...,...,...
6110,Zimbabwe,ZWE,2017,16.965162
6111,Zimbabwe,ZWE,2018,13.533631
6112,Zimbabwe,ZWE,2019,10.637546
6113,Zimbabwe,ZWE,2020,12.800961


In [4]:
first_years = df.groupby('Entity')['Year'].min()

countries_starting_in_2001 = first_years[first_years == 2001].index
filtered_df = df[(df['Entity'].isin(countries_starting_in_2001)) & (df['Taxes including social contributions (as a share of GDP)'] != 0)]
filtered_df = filtered_df[filtered_df['Entity'].isin(countries_starting_in_2001)]

In [5]:
df = filtered_df.copy()
def filter_countries_starting_in_2001(dataframe):
    first_years = dataframe.groupby("Entity")["Year"].min().reset_index()
    start_in_2001 = first_years[first_years["Year"] == 2001]["Entity"]
    filtered_df = dataframe[dataframe["Entity"].isin(start_in_2001)]
    return filtered_df

filtered_df = filter_countries_starting_in_2001(df)
filtered_df

Unnamed: 0,Entity,Code,Year,Taxes including social contributions (as a share of GDP)
27,Albania,ALB,2001,20.617838
28,Albania,ALB,2002,21.121912
29,Albania,ALB,2003,21.441029
30,Albania,ALB,2004,22.503714
31,Albania,ALB,2005,22.486690
...,...,...,...,...
6096,Zambia,ZMB,2018,16.077042
6097,Zambia,ZMB,2019,16.113216
6098,Zambia,ZMB,2020,15.706920
6099,Zambia,ZMB,2021,16.048060


In [6]:
filtered_df.Entity.nunique()

168

In [7]:
df = filtered_df

In [8]:
#df['growth_rate'] = df.groupby('Entity')['Taxes on income, profits and capital gains (TIPCG) (as a share of GDP)'].pct_change() * 100
df['growth_rate'] = df.groupby('Entity')['Taxes including social contributions (as a share of GDP)'].pct_change() * 100



In [9]:
periods = {
    '2001-2019': (2001, 2019),
    '2019-2022': (2019, 2022),
}

result_df = pd.DataFrame()

for period, (start_year, end_year) in periods.items():
    period_df = df[(df['Year'] >= start_year) & (df['Year'] <= end_year)]
    avg_life_expectancy = period_df.groupby('Entity')['growth_rate'].mean().rename(period)
    result_df = pd.concat([result_df, avg_life_expectancy], axis=1)

result_df.reset_index(inplace=True)
result_df['country'] = result_df['index']
result_df = result_df.drop('index', axis=1)
result_df = result_df[['country'] + [col for col in result_df.columns if col != 'country']]

result_df.rename(columns={
    '2001-2019': 'avg_taxshare_2001_2019',
    '2019-2022': 'avg_taxshare_2019_2022',
}, inplace=True)

result_df

Unnamed: 0,country,avg_taxshare_2001_2019,avg_taxshare_2019_2022
0,Albania,1.199923,-0.495293
1,Angola,-2.555262,0.862481
2,Anguilla,2.798285,
3,Antigua and Barbuda,0.588258,2.552743
4,Argentina,2.458281,0.695743
...,...,...,...
163,Uzbekistan,-0.366153,0.125697
164,Vanuatu,0.866242,-14.751801
165,Venezuela,5.000033,
166,Vietnam,1.076696,-2.285137


In [10]:
result_df = result_df.dropna()

In [11]:
###

In [12]:
# df = pd.read_csv(main_path + '/Democracy/PythonData/tax-revenues-as-a-share-of-gdp-unu-wider.csv')
# df.head()


In [13]:
# df = df[(df['Year']==2000) | (df["Year"]==2018)]
# pivot_df = df.pivot_table(index=['Entity', 'Code'], columns='Year', values='Taxes including social contributions (as a share of GDP)').reset_index()
# df_renamed = pivot_df.rename(columns={'Entity': 'country', 2000: 'avg_taxshare_2000', 2018: 'avg_taxshare_2018'})
# merged_df = pd.merge(result_df, df_renamed, on='country', how='left')
# result_df = merged_df.drop('Code', axis=1).dropna()

In [14]:
result_df

Unnamed: 0,country,avg_taxshare_2001_2019,avg_taxshare_2019_2022
0,Albania,1.199923,-0.495293
1,Angola,-2.555262,0.862481
3,Antigua and Barbuda,0.588258,2.552743
4,Argentina,2.458281,0.695743
5,Armenia,1.679258,1.957227
...,...,...,...
162,Uruguay,2.106938,-2.392892
163,Uzbekistan,-0.366153,0.125697
164,Vanuatu,0.866242,-14.751801
166,Vietnam,1.076696,-2.285137


In [15]:
###

In [None]:
result_df.to_csv(main_path + '/Democracy/Democracy_Main/MainAnalysis/input/outcomes/taxes_gdp/taxes_gdp.csv', index=False)

