# Linear Regression Modelling for Global Interest Rates

In [1]:
# Update sklearn to prevent version mismatches
!pip install sklearn --upgrade

Requirement already up-to-date: sklearn in c:\users\smaheshw\appdata\local\continuum\anaconda3\lib\site-packages (0.0)


In [3]:
# Import depedencies
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import os
import bamboolib

In [4]:
# Load libraries
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,confusion_matrix,accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.preprocessing import MinMaxScaler,LabelEncoder
from sklearn.feature_selection import SelectFromModel

In [5]:
# File path 
data_file = os.path.join("","Resources","data","CombinedData.csv")
model_result = os.path.join("","Resources","results","LinearRegression.csv")

In [6]:
# Read data
df_macrodata = pd.read_csv(data_file)

In [7]:
# Display Sample data
df_macrodata

Unnamed: 0.1,Unnamed: 0,country,date,GDPRate,UnemploymentRate,InflationRate,InterestRate,CorruptionIndex
0,0,Afghanistan,1/1/17,1.434553e+06,11.20,4.975896,12.6,15.0
1,1,Afghanistan,1/1/12,1.086160e+06,1.70,6.441171,7.2,8.0
2,2,Afghanistan,1/1/08,5.411126e+05,2.50,26.418735,12.6,15.0
3,3,Albania,1/1/19,2.604024e+00,11.50,1.411092,5.9,35.0
4,4,Albania,1/1/18,5.486044e+00,12.30,2.028059,4.5,36.0
...,...,...,...,...,...,...,...,...
1611,1611,Zimbabwe,1/1/16,1.000000e+00,5.25,-1.566399,4.8,22.0
1612,1612,Zimbabwe,1/1/15,2.000000e+00,5.30,-2.409525,7.9,21.0
1613,1613,Zimbabwe,1/1/14,2.000000e+00,5.34,-0.212964,9.7,21.0
1614,1614,Zimbabwe,1/1/13,2.000000e+00,5.40,1.631629,0.3,21.0


In [9]:
# Check data size
df_macrodata.shape

(1616, 8)

In [10]:
# Check datafrme for summary and dtypes
df_macrodata.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1616 entries, 0 to 1615
Data columns (total 8 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Unnamed: 0        1616 non-null   int64  
 1   country           1616 non-null   object 
 2   date              1616 non-null   object 
 3   GDPRate           1616 non-null   float64
 4   UnemploymentRate  1616 non-null   float64
 5   InflationRate     1616 non-null   float64
 6   InterestRate      1616 non-null   float64
 7   CorruptionIndex   1616 non-null   float64
dtypes: float64(5), int64(1), object(2)
memory usage: 101.1+ KB


In [11]:
# Check null values
df_macrodata.isna().sum()

Unnamed: 0          0
country             0
date                0
GDPRate             0
UnemploymentRate    0
InflationRate       0
InterestRate        0
CorruptionIndex     0
dtype: int64

In [12]:
# Check dependent variable unique value
df_macrodata['InterestRate'].unique()

array([ 12.6     ,   7.2     ,   5.9     ,   4.5     ,   4.7     ,
         7.4     ,   7.3     ,   6.3     ,   9.5     ,   9.7     ,
         9.9     ,   8.      ,  10.      ,   8.6     ,   9.3     ,
        10.2     ,   8.3     ,  11.3     ,  19.1     ,  17.      ,
        16.2     ,  13.8     ,  12.2     ,  12.9     ,  10.6     ,
       -12.2     ,  -6.5     ,  -5.6     ,  -4.9     ,  21.1     ,
        12.4     ,   8.8     ,  -9.9     ,  -6.9     ,  39.      ,
        -5.7     ,  12.8     ,   2.      ,  17.8     ,  36.6     ,
         1.1     ,   1.7     ,   3.3     ,   1.5     ,   6.      ,
         6.2     ,   4.4     ,   5.      ,   1.4     ,   1.      ,
         4.2     ,   3.      ,   2.4     ,   3.6     ,   3.4     ,
         2.1     ,   5.3     ,   6.8     ,   8.1     ,   0.3     ,
        28.9     ,  19.4     ,  17.7     ,  15.1     ,  -4.5     ,
        48.      ,  -6.4     ,  -1.2     ,   7.9     ,  16.8     ,
         6.4     ,   1.2     ,   1.6     ,   3.8     ,   5.6  

In [13]:
#Statistical summary :df descriptions
print(df_macrodata.describe())

        Unnamed: 0       GDPRate  UnemploymentRate  InflationRate  \
count  1616.000000  1.616000e+03       1616.000000    1616.000000   
mean    807.500000  2.655837e+03          7.732043       6.012438   
std     466.643333  4.768691e+04          6.119097      11.603082   
min       0.000000 -6.666000e+01          0.100000     -60.496383   
25%     403.750000  4.115199e+00          3.600000       1.953480   
50%     807.500000  6.872538e+00          6.100000       4.014027   
75%    1211.250000  1.165691e+01          9.700000       7.334440   
max    1615.000000  1.434553e+06         37.200000     293.700000   

       InterestRate  CorruptionIndex  
count   1616.000000      1616.000000  
mean       6.859474        39.801553  
std        9.889895        25.392061  
min      -63.800000         0.000000  
25%        2.375000        25.000000  
50%        5.200000        36.000000  
75%        9.400000        54.000000  
max       93.900000        97.000000  
