In [48]:
import pandas as pd 

In [49]:
#loading datasets
df_fdi = pd.read_csv("../data/clean/fdi_percent_melt.csv")
df_fsi =  pd.read_csv("../data/clean/fsi_score_melt.csv")


In [50]:
#clean fdi df


df_fdi = df_fdi.rename(columns={'FDI as percentage of capital': 'FDI %'})

df_fdi.head()

Unnamed: 0.1,Unnamed: 0,Country,Date,FDI %
0,0,Anguilla,2008,49.54
1,1,Antigua and Barbuda,2008,30.97
2,2,Aruba,2008,2.25
3,3,Bahamas,2008,50.75
4,4,Bahrain,2008,29.72


In [51]:
#clean fsi df

df_fsi = df_fsi.drop('Unnamed: 0', axis=1)
df_fsi = df_fsi.drop('Country Code', axis=1)
df_fsi = df_fsi.rename(columns={'Financial Secrecy Index': 'FSI Score'})

df_fsi['Date'] = pd.to_datetime(df_fsi['Date'], format='%d-%m-%Y')
df_fsi['Date'] = df_fsi['Date'].dt.year

df_fsi.head()

Unnamed: 0,Country,Date,FSI Score
0,Aruba,2011,74.2
1,Anguilla,2011,0.0
2,United Arab Emirates,2011,0.0
3,American Samoa,2011,79.27
4,Antigua and Barbuda,2011,0.0


In [52]:
#drop unneccesary countries

values_to_keep = ['Fiji', 'Russia', 'Samoa', 'Trinidad and Tobago', 'US Virgin Islands', 'Vanuatu']
df_fsi = df_fsi[df_fsi['Country'].isin(values_to_keep)]
df_fdi = df_fdi[df_fdi['Country'].isin(values_to_keep)]
fdi_bl_final = df_fdi[df_fdi['Date'] >= 2017]
fsi_bl_final = df_fsi[df_fsi['Date'] >= 2017]

In [41]:
print(fdi_bl_final)

     Unnamed: 0              Country  Date  FDI %
301         301                 Fiji  2017  37.54
313         313                Samoa  2017   3.14
315         315  Trinidad and Tobago  2017  -9.02
319         319              Vanuatu  2017  15.98
333         333                 Fiji  2018  44.89
345         345                Samoa  2018   6.17
347         347  Trinidad and Tobago  2018 -16.89
351         351              Vanuatu  2018  16.00
365         365                 Fiji  2019  32.42
377         377                Samoa  2019  -1.40
379         379  Trinidad and Tobago  2019   4.03
383         383              Vanuatu  2019  23.49
397         397                 Fiji  2020  29.51
409         409                Samoa  2020   1.68
411         411  Trinidad and Tobago  2020  22.61
415         415              Vanuatu  2020  13.91
429         429                 Fiji  2021  48.73
441         441                Samoa  2021   3.30
443         443  Trinidad and Tobago  2021 -34.32


In [None]:
fdi_bl_final.to_csv('../data/clean/blacklisted_fdi.csv', index=True)

: 

In [58]:
fsi_bl_final.to_csv('../data/clean/blacklisted_fsi.csv', index=True)

In [42]:
# calculate the average change from start to finish FDI

# Sort the DataFrame by 'Country' and 'Date' to ensure chronological order
df_sorted = fdi_bl_final.sort_values(by=['Country', 'Date'])

# Filter to keep only the first and last entry for each 'Country'
first_last_entries = df_sorted.groupby('Country').agg(
    first_entry=('FDI %', 'first'),
    last_entry=('FDI %', 'last')
).reset_index()

# Merge the original DataFrame with the first and last entries
filtered_df = pd.merge(df_sorted, first_last_entries[['Country', 'first_entry', 'last_entry']], 
                       on='Country', how='inner')

# Filter rows where FDI % is either the first or last entry
filtered_df = filtered_df[(filtered_df['FDI %'] == filtered_df['first_entry']) | 
                          (filtered_df['FDI %'] == filtered_df['last_entry'])]

# Drop the helper columns
filtered_df = filtered_df.drop(columns=['first_entry', 'last_entry'])
#filtered_df = filtered_df[filtered_df['Country'] != 'Cayman Islands']
filtered_df

Unnamed: 0.1,Unnamed: 0,Country,Date,FDI %
0,301,Fiji,2017,37.54
6,493,Fiji,2023,10.32
7,313,Samoa,2017,3.14
13,505,Samoa,2023,-0.98
14,315,Trinidad and Tobago,2017,-9.02
20,507,Trinidad and Tobago,2023,-85.15
21,319,Vanuatu,2017,15.98
27,511,Vanuatu,2023,1.64


In [45]:

def calculate_percentage_change(group):
    if len(group) >= 2:  # Ensure there are at least 2 entries
        first_fdi = group.iloc[0]['FDI %']
        second_fdi = group.iloc[1]['FDI %']
        
        # Calculate the percentage change
        change = ((second_fdi - first_fdi)/first_fdi)*100
        return pd.Series({'Percentage Change in FDI%': change})
    else:
        return pd.Series({'Percentage Change in FDI %': None})  # If there's less than 2 entries, return None

# Apply the function to each group
percentage_change_df_fdi = filtered_df.groupby('Country').apply(calculate_percentage_change)

# Display the result
print(percentage_change_df_fdi)


                     Percentage Change in FDI%
Country                                       
Fiji                                -72.509323
Samoa                              -131.210191
Trinidad and Tobago                 844.013304
Vanuatu                             -89.737171


  percentage_change_df_fdi = filtered_df.groupby('Country').apply(calculate_percentage_change)


In [46]:
#adding average and median
 
average_change = percentage_change_df_fdi['Percentage Change in FDI%'].mean()
median_change = percentage_change_df_fdi['Percentage Change in FDI%'].median()

# Create new rows for the average and median
aggregate_row = pd.Series({'Percentage Change in FDI%': average_change}, name='Average')
median_row = pd.Series({'Percentage Change in FDI%': median_change}, name='Median')

# Append the new rows to the DataFrame
percentage_change_df_fdi_final = pd.concat([percentage_change_df_fdi, aggregate_row.to_frame().T, median_row.to_frame().T])

percentage_change_df_fdi_final['Percentage Change in FDI%'] = percentage_change_df_fdi_final['Percentage Change in FDI%'].round(1)

# Display the final DataFrame
print(percentage_change_df_fdi_final)

                     Percentage Change in FDI%
Fiji                                     -72.5
Samoa                                   -131.2
Trinidad and Tobago                      844.0
Vanuatu                                  -89.7
Average                                  137.6
Median                                   -81.1


In [54]:
# calculate the average change from start to finish FSI

# Sort the DataFrame by 'Country' and 'Date' to ensure chronological order
df_sorted = fsi_bl_final.sort_values(by=['Country', 'Date'])

# Filter to keep only the first and last entry for each 'Country'
first_last_entries = df_sorted.groupby('Country').agg(
    first_entry=('FSI Score', 'first'),
    last_entry=('FSI Score', 'last')
).reset_index()

# Merge the original DataFrame with the first and last entries
filtered_df = pd.merge(df_sorted, first_last_entries[['Country', 'first_entry', 'last_entry']], 
                       on='Country', how='inner')

# Filter rows where FDI % is either the first or last entry
filtered_df = filtered_df[(filtered_df['FSI Score'] == filtered_df['first_entry']) | 
                          (filtered_df['FSI Score'] == filtered_df['last_entry'])]

# Drop the helper columns
filtered_df = filtered_df.drop(columns=['first_entry', 'last_entry'])

def calculate_percentage_change(group):
    if len(group) >= 2:  # Ensure there are at least 2 entries
        first_fdi = group.iloc[0]['FSI Score']
        second_fdi = group.iloc[1]['FSI Score']
        
        # Calculate the percentage change
        change = ((second_fdi - first_fdi)/second_fdi)*100
        return pd.Series({'Percentage Change in FSI Score': change})
    else:
        return None  # If there are less than 2 entries, return None to exclude the group

# Apply the function to each group
percentage_change_df_nas = filtered_df.groupby('Country').apply(calculate_percentage_change)

# Display the result
percentage_change_df_fsi = percentage_change_df_nas.dropna()

percentage_change_df_fsi.head()

  change = ((second_fdi - first_fdi)/second_fdi)*100
  percentage_change_df_nas = filtered_df.groupby('Country').apply(calculate_percentage_change)


Unnamed: 0_level_0,Percentage Change in FSI Score
Country,Unnamed: 1_level_1
Fiji,-2.334096
Russia,-0.099867
Trinidad and Tobago,0.305188
Vanuatu,-20.994718


In [55]:
#adding average and median
 
average_change = percentage_change_df_fsi['Percentage Change in FSI Score'].mean()
median_change = percentage_change_df_fsi['Percentage Change in FSI Score'].median()

# Create new rows for the average and median
aggregate_row = pd.Series({'Percentage Change in FSI Score': average_change}, name='Average')
median_row = pd.Series({'Percentage Change in FSI Score': median_change}, name='Median')

# Append the new rows to the DataFrame
percentage_change_df_fsi_final = pd.concat([percentage_change_df_fsi, aggregate_row.to_frame().T, median_row.to_frame().T])

percentage_change_df_fsi_final['Percentage Change in FSI Score'] = percentage_change_df_fsi_final['Percentage Change in FSI Score'].round(1)

# Display the final DataFrame
print(percentage_change_df_fsi_final)

                     Percentage Change in FSI Score
Fiji                                           -2.3
Russia                                         -0.1
Trinidad and Tobago                             0.3
Vanuatu                                       -21.0
Average                                        -5.8
Median                                         -1.2


In [56]:
percentage_change_df_fdi_final.to_csv('../data/clean/percentage_change_df_fdi_blacklist.csv', index=True)

In [57]:
percentage_change_df_fsi_final.to_csv('../data/clean/percentage_change_df_fsi_blacklist.csv', index=True)