In [30]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
from sklearn.model_selection import train_test_split
from warnings import filterwarnings
filterwarnings('ignore')
import scipy.stats as stats

In [19]:
df_country = pd.read_csv('country.csv')
df_country.head()

Unnamed: 0,Country,Corruption_Index,Gini_Index
0,Hong Kong,77,53.7
1,South Korea,53,30.2
2,China,40,46.2
3,Italy,47,32.7
4,Mongolia,38,36.5


In [20]:
df_country['Country'].value_counts()

Country
Hong Kong        1
South Korea      1
Greece           1
Argentina        1
Romania          1
Portugal         1
Russia           1
United States    1
France           1
Denmark          1
Sweden           1
Germany          1
Canada           1
UK               1
Norway           1
Austria          1
Mongolia         1
Italy            1
China            1
Thailand         1
Name: count, dtype: int64

In [28]:
df_country['Country'].nunique()

20

In [21]:
df_country.shape

(20, 3)

In [22]:
df_country[['Corruption_Index','Gini_Index']].describe()

Unnamed: 0,Corruption_Index,Gini_Index
count,20.0,20.0
mean,61.7,34.74
std,20.61706,7.384579
min,29.0,23.5
25%,43.0,30.175
50%,65.5,33.85
75%,81.0,39.575
max,90.0,53.7


In [23]:
Y = df_country['Corruption_Index']
X = df_country['Gini_Index']

In [24]:
# adding constant for the intercept
X = sm.add_constant(X)

SLR_model = sm.OLS(Y,X).fit()

In [25]:
SLR_model.summary()

0,1,2,3
Dep. Variable:,Corruption_Index,R-squared:,0.215
Model:,OLS,Adj. R-squared:,0.172
Method:,Least Squares,F-statistic:,4.936
Date:,"Sat, 27 Jul 2024",Prob (F-statistic):,0.0394
Time:,07:48:22,Log-Likelihood:,-85.965
No. Observations:,20,AIC:,175.9
Df Residuals:,18,BIC:,177.9
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,106.6950,20.682,5.159,0.000,63.243,150.147
Gini_Index,-1.2952,0.583,-2.222,0.039,-2.520,-0.070

0,1,2,3
Omnibus:,1.213,Durbin-Watson:,1.192
Prob(Omnibus):,0.545,Jarque-Bera (JB):,1.005
Skew:,0.333,Prob(JB):,0.605
Kurtosis:,2.127,Cond. No.,175.0


In [26]:
SLR_model.summary2()

0,1,2,3
Model:,OLS,Adj. R-squared:,0.172
Dependent Variable:,Corruption_Index,AIC:,175.9296
Date:,2024-07-27 07:48,BIC:,177.9211
No. Observations:,20,Log-Likelihood:,-85.965
Df Model:,1,F-statistic:,4.936
Df Residuals:,18,Prob (F-statistic):,0.0394
R-squared:,0.215,Scale:,352.12

0,1,2,3,4,5,6
,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
const,106.6950,20.6822,5.1588,0.0001,63.2433,150.1468
Gini_Index,-1.2952,0.5830,-2.2217,0.0394,-2.5200,-0.0704

0,1,2,3
Omnibus:,1.213,Durbin-Watson:,1.192
Prob(Omnibus):,0.545,Jarque-Bera (JB):,1.005
Skew:,0.333,Prob(JB):,0.605
Kurtosis:,2.127,Condition No.:,175.0


In [27]:
b0, b1 = SLR_model.params
b0, b1

(106.69501055874487, -1.2951931651912754)

From this, the coefficents are 106.69501055874487 (b0) and -1.2951931651912754 (b1).
We can write the LR equation as Y = b0 + b1*X

So, here we can say that for every one unit increase in the Gini Index, the Corruption Perception Index is expected to increase by b1, ie., -1.2951931651912754 units.

0.215 is the R-squared value. This means that 21.5% of the variation in the Corruption Perception Index is explained by the Gini Index.

p-value is 0.0394 which is lesser than 0.1 (alpha value). This mean that we fail to reject the null hypothesis and that there IS a statistically significant relationship between corruption perception index and Gini index.

Now, here: \
b1 = -1.2951931651912754 \
b0 = 106.69501055874487 \
Degrees of freedom = n - 2 = 20 -2 = 18

In [32]:
b1 = -1.2951931651912754 
se_b1 = 106.69501055874487
df = 18

t_critical = stats.t.ppf(1 - 0.025, df)

lower_bound = b1 - t_critical * se_b1
upper_bound = b1 + t_critical * se_b1

lower_bound, upper_bound

(-225.45309243180031, 222.86270610141776)