In [2]:
# Importing dependencies

import pandas as pd
from sodapy import Socrata
import warnings
warnings.filterwarnings('ignore')

In [4]:
# Unauthenticated client only works with public data sets.
client = Socrata("finances.worldbank.org", "IWNqQjLTuoqLzNuGAm5zyIV4f")

# First 2000 results, returned as JSON from API / converted to Python list of
# dictionaries by sodapy.
results = client.get("tdwh-3krx", limit = 900000)

# Converting to pandas dataframe
results_df = pd.DataFrame.from_records(results)
results_df.head(3)


852117

In [5]:
results_df.to_csv("")

852117

In [18]:
# Removing unnecessary columns
clean_df = results_df[["credit_number", "region", "country_code", "country", "credit_status", "currency_of_commitment", 
                       "original_principal_amount", "disbursed_amount", "agreement_signing_date"]]
clean_df.head(3)

Unnamed: 0,credit_number,region,country_code,country,credit_status,currency_of_commitment,original_principal_amount,disbursed_amount,agreement_signing_date
0,IDA00010,LATIN AMERICA AND CARIBBEAN,HN,Honduras,Fully Repaid,USD,9000000.0,8427973.97,1961-05-12T00:00:00.000
1,IDA00010,LATIN AMERICA AND CARIBBEAN,HN,Honduras,Fully Repaid,USD,9000000.0,8427973.97,1961-05-12T00:00:00.000
2,IDA00010,LATIN AMERICA AND CARIBBEAN,HN,Honduras,Fully Repaid,USD,9000000.0,8427973.97,1961-05-12T00:00:00.000


In [19]:
# Reviewing currencies 
currency = clean_df["currency_of_commitment"].unique()

# Reviewing regions
regions = clean_df["region"].unique()

print(currency, regions)

['USD' 'XDR' 'EUR' 'JPY' 'XAF'] ['LATIN AMERICA AND CARIBBEAN' 'AFRICA' 'AFRICA EAST' 'SOUTH ASIA'
 'EAST ASIA AND PACIFIC' 'MIDDLE EAST AND NORTH AFRICA'
 'EUROPE AND CENTRAL ASIA' 'AFRICA WEST' 'OTHER']


In [20]:
clean_df.dtypes

credit_number                object
region                       object
country_code                 object
country                      object
credit_status                object
currency_of_commitment       object
original_principal_amount    object
disbursed_amount             object
agreement_signing_date       object
dtype: object

In [21]:
# Converting columns to correct data types
clean_df["original_principal_amount"] = clean_df["original_principal_amount"].astype(float)
clean_df["disbursed_amount"] = clean_df["disbursed_amount"].astype(float)


In [22]:
# clean_df["agreement_signing_date2"] = clean_df["agreement_signing_date"].str.split("T")
clean_df["agreement_signing_date"] = pd.to_datetime(clean_df["agreement_signing_date"])

clean_df.head(3)

Unnamed: 0,credit_number,region,country_code,country,credit_status,currency_of_commitment,original_principal_amount,disbursed_amount,agreement_signing_date
0,IDA00010,LATIN AMERICA AND CARIBBEAN,HN,Honduras,Fully Repaid,USD,9000000.0,8427973.97,1961-05-12
1,IDA00010,LATIN AMERICA AND CARIBBEAN,HN,Honduras,Fully Repaid,USD,9000000.0,8427973.97,1961-05-12
2,IDA00010,LATIN AMERICA AND CARIBBEAN,HN,Honduras,Fully Repaid,USD,9000000.0,8427973.97,1961-05-12


In [23]:
clean_df["agreement_year"] = pd.DatetimeIndex(clean_df["agreement_signing_date"]).year

clean_df.head()

Unnamed: 0,credit_number,region,country_code,country,credit_status,currency_of_commitment,original_principal_amount,disbursed_amount,agreement_signing_date,agreement_year
0,IDA00010,LATIN AMERICA AND CARIBBEAN,HN,Honduras,Fully Repaid,USD,9000000.0,8427973.97,1961-05-12,1961.0
1,IDA00010,LATIN AMERICA AND CARIBBEAN,HN,Honduras,Fully Repaid,USD,9000000.0,8427973.97,1961-05-12,1961.0
2,IDA00010,LATIN AMERICA AND CARIBBEAN,HN,Honduras,Fully Repaid,USD,9000000.0,8427973.97,1961-05-12,1961.0
3,IDA00010,LATIN AMERICA AND CARIBBEAN,HN,Honduras,Fully Repaid,USD,9000000.0,8427973.97,1961-05-12,1961.0
4,IDA00010,LATIN AMERICA AND CARIBBEAN,HN,Honduras,Fully Repaid,USD,9000000.0,8427973.97,1961-05-12,1961.0


In [42]:
grouped_df = clean_df.groupby(["region", "country_code", "country", "agreement_year", 'currency_of_commitment']).agg(
total_original_principal_amount = ("original_principal_amount", "sum"), 
total_disbursed_amount = ("disbursed_amount", "sum"))

grouped_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,total_original_principal_amount,total_disbursed_amount
region,country_code,country,agreement_year,currency_of_commitment,Unnamed: 5_level_1,Unnamed: 6_level_1
AFRICA,3A,Africa,2001.0,XDR,5.600000e+08,5.947673e+08
AFRICA,3A,Africa,2004.0,XDR,6.720000e+08,5.680490e+08
AFRICA,3A,Africa,2005.0,XDR,2.800000e+09,2.859733e+09
AFRICA,3A,Africa,2011.0,XDR,7.985000e+09,6.296284e+09
AFRICA,3A,Africa,2012.0,XDR,4.654500e+09,1.473507e+09
...,...,...,...,...,...,...
SOUTH ASIA,PK,Pakistan,2017.0,XDR,2.658091e+10,1.539391e+10
SOUTH ASIA,PK,Pakistan,2018.0,XDR,2.168330e+10,4.964706e+09
SOUTH ASIA,PK,Pakistan,2019.0,XDR,1.941680e+10,1.248193e+09
SOUTH ASIA,PK,Pakistan,2020.0,JPY,6.000000e+08,0.000000e+00


In [43]:
grouped_df.reset_index(inplace=True)
grouped_df

Unnamed: 0,region,country_code,country,agreement_year,currency_of_commitment,total_original_principal_amount,total_disbursed_amount
0,AFRICA,3A,Africa,2001.0,XDR,5.600000e+08,5.947673e+08
1,AFRICA,3A,Africa,2004.0,XDR,6.720000e+08,5.680490e+08
2,AFRICA,3A,Africa,2005.0,XDR,2.800000e+09,2.859733e+09
3,AFRICA,3A,Africa,2011.0,XDR,7.985000e+09,6.296284e+09
4,AFRICA,3A,Africa,2012.0,XDR,4.654500e+09,1.473507e+09
...,...,...,...,...,...,...,...
4480,SOUTH ASIA,PK,Pakistan,2017.0,XDR,2.658091e+10,1.539391e+10
4481,SOUTH ASIA,PK,Pakistan,2018.0,XDR,2.168330e+10,4.964706e+09
4482,SOUTH ASIA,PK,Pakistan,2019.0,XDR,1.941680e+10,1.248193e+09
4483,SOUTH ASIA,PK,Pakistan,2020.0,JPY,6.000000e+08,0.000000e+00


In [45]:
grouped_df2 = grouped_df.loc[grouped_df["agreement_year"] >= 1990]
grouped_df2

Unnamed: 0,region,country_code,country,agreement_year,currency_of_commitment,total_original_principal_amount,total_disbursed_amount
0,AFRICA,3A,Africa,2001.0,XDR,5.600000e+08,5.947673e+08
1,AFRICA,3A,Africa,2004.0,XDR,6.720000e+08,5.680490e+08
2,AFRICA,3A,Africa,2005.0,XDR,2.800000e+09,2.859733e+09
3,AFRICA,3A,Africa,2011.0,XDR,7.985000e+09,6.296284e+09
4,AFRICA,3A,Africa,2012.0,XDR,4.654500e+09,1.473507e+09
...,...,...,...,...,...,...,...
4480,SOUTH ASIA,PK,Pakistan,2017.0,XDR,2.658091e+10,1.539391e+10
4481,SOUTH ASIA,PK,Pakistan,2018.0,XDR,2.168330e+10,4.964706e+09
4482,SOUTH ASIA,PK,Pakistan,2019.0,XDR,1.941680e+10,1.248193e+09
4483,SOUTH ASIA,PK,Pakistan,2020.0,JPY,6.000000e+08,0.000000e+00


In [48]:
# Copying results to a csv file
grouped_df.to_csv("data.csv")