In [None]:
#Import the necessary libraries:

In [48]:
import pandas as pd
import numpy as np
from scipy.stats import pearsonr

In [None]:
#We import pandas for data manipulation, numpy for numerical computation and scipy.stats for calculating the Pearson correlation coefficient.

In [49]:
# load the gini data 
df_gini = pd.read_csv('economic-inequality-gini-index.csv', header=0)
print(df_gini.head())

    Entity Code  Year  Gini coefficient
0  Albania  ALB  1996          0.270103
1  Albania  ALB  2002          0.317390
2  Albania  ALB  2005          0.305957
3  Albania  ALB  2008          0.299847
4  Albania  ALB  2012          0.289605


In [None]:
#We read in the data file containing the Gini coefficients.

In [50]:
# load the gdp data
df_gdp = pd.read_csv('gdp-per-capita-maddison-2020.csv')
df_gdp.drop(columns='417485-annotations', inplace=True)
print(df_gdp.head())

        Entity Code    Year  GDP per capita
0  Afghanistan  AFG  1950.0          1156.0
1  Afghanistan  AFG  1951.0          1170.0
2  Afghanistan  AFG  1952.0          1189.0
3  Afghanistan  AFG  1953.0          1240.0
4  Afghanistan  AFG  1954.0          1245.0


In [None]:
#We read in the data file containing the GDP data, and then drop the 417485-annotations column, which contains no useful information.

In [51]:
# Merge the GDP and Gini data on 'Entity' and 'Year'
left_merged = pd.merge(df_gdp, df_gini, how='left', on=['Entity', 'Year'])

In [None]:
#We merge the GDP and Gini data on the 'Entity' and 'Year' columns using a left join.

In [52]:
# Group the data by year and calculate mean GDP and Gini coefficient for each year
grouped = left_merged.groupby('Year').mean()

In [None]:
#We group the data by year and calculate the mean GDP and Gini coefficient for each year.

In [53]:
# Calculate the Pearson correlation coefficient, dropping any rows with missing data
data = grouped[['GDP per capita', 'Gini coefficient']].dropna()
corr, pval = pearsonr(data['GDP per capita'].values, data['Gini coefficient'].values)
print(f"Correlation coefficient: {corr:.2f}, p-value: {pval:.2f}")

Correlation coefficient: -0.12, p-value: 0.40


In [None]:
#We calculate the Pearson correlation coefficient between the GDP per capita and the Gini coefficient, dropping any rows with missing data. 
#The correlation coefficient is then printed to the console.

In [None]:
#Is there a relation between a country's Gross Domestrict Product (GDP) and its income inequality?

In [None]:
# we can see that a Pearson correlation coefficient has been calculated between a country's GDP per capita and its Gini coefficient,
# which is a measure of income inequality.
# The correlation coefficient measures the strength and direction of the linear relationship between two variables, with values ranging from -1 to 1.
# A positive correlation coefficient indicates a positive relationship between the two variables, meaning that as GDP per capita increases,
# so does the Gini coefficient, and vice versa. A negative correlation coefficient indicates a negative relationship,
# meaning that as GDP per capita increases, the Gini coefficient decreases, and vice versa.

# However, it is important to note that correlation does not imply causation,
# and there could be other factors influencing the relationship between GDP per capita and the Gini coefficient,
# such as political and social policies, economic systems, and cultural factors.
# Therefore, we should be cautious in interpreting the results and avoid making causal claims based solely on the correlation coefficient.