In [None]:
import pandas as pd
import numpy as np
import openpyxl
import warnings
import sys
import os

# Get the directory where this notebook is located
notebook_dir = os.path.dirname(os.path.abspath(''))

# Add the PythonPrep directory to the path
pythonprep_dir = os.path.join(os.path.dirname(notebook_dir), 'PythonPrep') if 'PythonPrep' not in notebook_dir else notebook_dir
sys.path.append(pythonprep_dir)

from paths import main_path

warnings.filterwarnings("ignore")


In [None]:
path_input = main_path + "/Democracy/Democracy_Main/MainAnalysis/input"
file_path = f"{path_input}/outcomes/financial_flows/EWN-dataset_12-2022.xlsx"
df = pd.read_excel(file_path, sheet_name='Dataset', skiprows=0)
df


Unnamed: 0,Country,IFS_Code,Year,Portfolio equity assets,Portfolio equity liabilities,FDI assets,FDI liabilities,Debt assets,Debt liabilities,financial derivatives (assets),...,Portfolio debt assets,Portfolio debt liabilities,Other investment assets,Other investment liabilities,Current account balance,Capital account,Exchange rate (period avg),Exchange rate (end of period),net IIP excl gold / GDP domestic currency,net IIP / GDP domestic currency
0,"Afghanistan, I.R. of",512,1970,,,,,,,,...,,,,,,,433.547690,438.43342,,
1,"Afghanistan, I.R. of",512,1971,,,,,,,,...,,,,,,,433.547690,438.43342,,
2,"Afghanistan, I.R. of",512,1972,,,,,,,,...,,,,,,,433.547690,438.43342,,
3,"Afghanistan, I.R. of",512,1973,,,,,,,,...,,,,,,,433.547690,438.43342,,
4,"Afghanistan, I.R. of",512,1974,,,,,,,,...,,,,,,,433.547690,438.43342,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11123,Zimbabwe,698,2017,6.0,445.18123,214.041067,4687.980083,1420.259727,8829.025908,0.0,...,,68.360251,1420.259727,8760.665657,-271.191369,223.725495,,,,
11124,Zimbabwe,698,2018,5.0,365.54054,214.827802,5432.617283,1352.475727,8672.000000,0.0,...,,32.870000,1352.475727,8639.130000,-1379.626260,231.391685,,,,
11125,Zimbabwe,698,2019,6.0,109.46851,217.231536,5712.617283,1575.456727,9609.403936,0.0,...,,148.792050,1575.456727,9460.611886,920.472008,314.530557,8.150304,16.77341,-1.206630,
11126,Zimbabwe,698,2020,5.0,352.24309,213.731536,5906.977283,1649.062727,14485.201470,0.0,...,,61.165744,1649.062727,14424.035726,1096.259600,299.684263,51.329013,81.78660,-1.297276,


In [3]:
df = df[df['Year'] >= 2001]

In [4]:
df['financial_flow'] = df['net IIP excl gold / GDP domestic currency']
df = df[['Country', 'Year', 'financial_flow']]
df['financial_flow'] = df['financial_flow'].replace([np.inf, -np.inf], np.nan)
df = df.dropna()

In [5]:
first_years = df.groupby('Country')['Year'].min()
countries_starting_in_2001 = first_years[first_years == 2001].index
filtered_df = df[(df['Country'].isin(countries_starting_in_2001)) & (df['financial_flow'] != 0)]
filtered_df = filtered_df[filtered_df['Country'].isin(countries_starting_in_2001)]
df = filtered_df.copy()

def filter_countries_starting_in_2001(dataframe):
    first_years = dataframe.groupby("Country")["Year"].min().reset_index()
    start_in_2001 = first_years[first_years["Year"] == 2001]["Country"]
    filtered_df = dataframe[dataframe["Country"].isin(start_in_2001)]
    return filtered_df

filtered_df = filter_countries_starting_in_2001(df)
filtered_df

Unnamed: 0,Country,Year,financial_flow
83,Albania,2001,-0.105553
84,Albania,2002,-0.106354
85,Albania,2003,-0.142470
86,Albania,2004,-0.101666
87,Albania,2005,-0.175371
...,...,...,...
11112,Zimbabwe,2006,-0.987646
11113,Zimbabwe,2007,-2.274810
11125,Zimbabwe,2019,-1.206630
11126,Zimbabwe,2020,-1.297276


In [6]:
filtered_df.Country.nunique()

186

In [7]:
df = filtered_df

In [8]:
df['financial_flow'] = df.groupby('Country')['financial_flow'].pct_change() * 100



In [9]:
periods = {
#     '1981_1990': (1981, 1990),
#     '1991_2000': (1991, 2000),
    '2001_2019': (2001, 2019),
#     '2011_2020': (2011, 2020),
    '2020_2022': (2020, 2022),
}

result_df = pd.DataFrame()

for period, (start_year, end_year) in periods.items():
    period_df = df[(df['Year'] >= start_year) & (df['Year'] <= end_year)]
    avg_financial_flow = period_df.groupby('Country')['financial_flow'].mean().rename(f'financial_flow_{period}')
    
    result_df = pd.concat([result_df, avg_financial_flow], axis=1)

result_df.reset_index(inplace=True)
result_df['country'] = result_df['index']
result_df = result_df.drop('index', axis=1)
result_df = result_df[['country'] + [col for col in result_df.columns if col != 'country']]

result_df

Unnamed: 0,country,financial_flow_2001_2019,financial_flow_2020_2022
0,Albania,11.429872,1.678394
1,Algeria,-4.292092,-29.362643
2,Angola,-67.600971,-9.028219
3,Anguilla,3.324192,24.565238
4,Antigua and Barbuda,8.034132,12.834609
...,...,...,...
181,Vietnam,0.455604,-1.896064
182,West Bank and Gaza,-63.385714,29.849246
183,"Yemen, Republic of",25.196061,-10.425723
184,Zambia,-166.714451,-8.692711


In [10]:
###

In [None]:
# path_input = main_path + "/Democracy/MainAnalysis/input"
# file_path = f"{path_input}/outcomes/financial_flows/EWN-dataset_12-2022.xlsx"
# df = pd.read_excel(file_path, sheet_name='Dataset', skiprows=0)
# df


In [12]:
# df['financial_flow'] = df['net IIP excl gold / GDP domestic currency']
# df = df[['Country', 'Year', 'financial_flow']]
# df['financial_flow'] = df['financial_flow'].replace([np.inf, -np.inf], np.nan)
# df = df.dropna()

In [13]:
# first_years = df.groupby('Country')['Year'].min()
# countries_starting_in_2001 = first_years[first_years == 2001].index
# filtered_df = df[(df['Country'].isin(countries_starting_in_2001)) & (df['financial_flow'] != 0)]
# filtered_df = filtered_df[filtered_df['Country'].isin(countries_starting_in_2001)]
# df = filtered_df.copy()

# def filter_countries_starting_in_2001(dataframe):
#     first_years = dataframe.groupby("Country")["Year"].min().reset_index()
#     start_in_2001 = first_years[first_years["Year"] == 2001]["Country"]
#     filtered_df = dataframe[dataframe["Country"].isin(start_in_2001)]
#     return filtered_df

# filtered_df = filter_countries_starting_in_2001(df)
# filtered_df

In [14]:
# df = filtered_df

In [15]:
# df

In [16]:
# df = df[(df['Year']==2000) | (df["Year"]==2018)]
# pivot_df = df.pivot_table(index=['Country'], columns='Year', values='financial_flow').reset_index()
# df_renamed = pivot_df.rename(columns={'Country': 'country', 2000: 'financial_flow_2000', 2018: 'financial_flow_2018'})
# merged_df = pd.merge(result_df, df_renamed, on='country', how='left')
# result_df = merged_df.dropna()

In [17]:
# result_df

In [18]:
###

In [None]:
output_path = f"{path_input}/outcomes/old_outcomes/financial_flows.csv"
result_df.to_csv(output_path, index=False)

