In [32]:
import datapackage
import pandas as pd
import plotly.express as px

# Load the data
data_url = 'https://datahub.io/core/corruption-perceptions-index/datapackage.json'
package = datapackage.Package(data_url)
resources = package.resources
for resource in resources:
    if resource.tabular:
        data = pd.read_csv(resource.descriptor['path'])
        break  # Exit the loop once the tabular data is found

In [33]:
country_codes = pd.read_csv('iso3.csv')
print(f"Corruption data loaded. Columns: {data.columns}")
print(f"Country codes loaded. Columns: {country_codes.columns}")

# rename "name" col as "Jurisdiction"
country_codes.rename(columns={'name': 'Jurisdiction', "iso3": "Country_Code"}, inplace=True)

Corruption data loaded. Columns: Index(['Jurisdiction', '1998', '1999', '2000', '2001', '2002', '2003', '2004',
       '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013',
       '2014', '2015'],
      dtype='object')
Country codes loaded. Columns: Index(['iso3', 'name'], dtype='object')


In [35]:
for year in data.columns[1:]:  # Skip the 'Jurisdiction' column
    if int(year) < 2012:
        data[year] = pd.to_numeric(data[year], errors='coerce') * 10


In [37]:
# Get descriptive statistics for each year
stats = data.describe()

for year in data.columns[1:]:  # Skip the 'Jurisdiction' column
    print(f'{year}: {data[year].min()} to {data[year].max()}')


1998: 14.0 to 100.0
1999: 15.0 to 100.0
2000: 12.0 to 100.0
2001: 4.0 to 99.0
2002: 12.0 to 97.0
2003: 13.0 to 97.0
2004: 15.0 to 97.0
2005: 17.0 to 97.0
2006: 18.0 to 96.0
2007: 14.0 to 94.0
2008: 10.0 to 93.0
2009: 11.0 to 94.0
2010: 11.0 to 93.0
2011: 9.8 to 94.60000000000001
2012: - to 90
2013: - to 91
2014: - to 92
2015: - to 91


In [38]:
# Transform the data to long-form
data_long = data.melt(id_vars='Jurisdiction', var_name='Year', value_name='Score')

data_long['Score'] = pd.to_numeric(data_long['Score'], errors='coerce')

# Assume you have a DataFrame named country_codes with columns 'Jurisdiction' and 'Country_Code'
# Merge the data with the country codes
data_long = pd.merge(data_long, country_codes, on='Jurisdiction')

In [50]:
# Get the min and max values of the 'Score' column
min_score = data_long['Score'].min()
max_score = data_long['Score'].max()

fig = px.choropleth(
    data_long,
    locations='Country_Code',
    color='Score',
    hover_name='Jurisdiction',
    animation_frame='Year',
    color_continuous_scale='YlOrRd',  # You can change this to any other warm colormap
    title='Corruption Perceptions Index Over Time',
    range_color=[min_score, max_score],  # Set consistent color scale
    width=1000,
    height=600
)

fig.update_layout(
    title={
        'text': "Corruption Perceptions Index Over Time",
        'x': 0.5,  # Center title
        'xanchor': 'center'
    }, 
    annotations=[
        dict(
            text='Source: Transparency International',
            showarrow=False,
            xref='paper',
            yref='paper',
            x=0.5,  # x position
            y=1.15,  # y position
            xanchor='center',
            yanchor='top',
            font=dict(size=14)
        ),
        dict(
            text='A higher score indicates less corruption. More information can be found '
                 '<a href="https://www.transparency.org/en/cpi/2022">here</a>.',
            showarrow=False,
            xref='paper',
            yref='paper',
            x=0.5,
            y=-0.2,
            xanchor='center',
            yanchor='top',
            font=dict(size=12)
        )
    ]
)


fig.show()


In [51]:
import plotly.io as pio

pio.write_html(fig, file='index.html')
