In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

In [2]:
climate_data = pd.read_csv('./climate-change.csv')

In [3]:
climate_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 308 entries, 0 to 307
Data columns (total 11 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Year      308 non-null    int64  
 1   Month     308 non-null    int64  
 2   MEI       308 non-null    float64
 3   CO2       308 non-null    float64
 4   CH4       308 non-null    float64
 5   N2O       308 non-null    float64
 6   CFC-11    308 non-null    float64
 7   CFC-12    308 non-null    float64
 8   TSI       308 non-null    float64
 9   Aerosols  308 non-null    float64
 10  Temp      308 non-null    float64
dtypes: float64(9), int64(2)
memory usage: 26.6 KB


In [4]:
climate_data.head()

Unnamed: 0,Year,Month,MEI,CO2,CH4,N2O,CFC-11,CFC-12,TSI,Aerosols,Temp
0,1983,5,2.556,345.96,1638.59,303.677,191.324,350.113,1366.1024,0.0863,0.109
1,1983,6,2.167,345.52,1633.71,303.746,192.057,351.848,1366.1208,0.0794,0.118
2,1983,7,1.741,344.15,1633.22,303.795,192.818,353.725,1366.285,0.0731,0.137
3,1983,8,1.13,342.25,1631.35,303.839,193.602,355.633,1366.4202,0.0673,0.176
4,1983,9,0.428,340.17,1648.4,303.901,194.392,357.465,1366.2335,0.0619,0.149


In [5]:
X, y = climate_data.iloc[:, 2:-1], climate_data.iloc[:, -1]
X_train, y_train = climate_data[climate_data.Year<=2006].iloc[:, 2:-1], climate_data[climate_data.Year<=2006].iloc[:, -1]
X_test, y_test = climate_data[climate_data.Year>2006].iloc[:, 2:-1], climate_data[climate_data.Year>2006].iloc[:, -1]

In [6]:
len(X_test)

24

In [6]:
X = np.concatenate((X, np.ones((X.shape[0], 1))), 1)
weights = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(y)

$\beta = (X^{T}X)^{-1}X^{T}Y$

In [7]:
weights

array([ 6.63218016e-02,  5.20745962e-03,  6.37102910e-05, -1.69285445e-02,
       -7.27783632e-03,  4.27197297e-03,  9.58620921e-02, -1.58183744e+00,
       -1.27695776e+02])

## Linear Regression using sklearn

In [7]:
mlr = LinearRegression()

In [8]:
mlr.fit(X_train, y_train)

LinearRegression()

In [10]:
mlr.coef_

array([ 6.42053134e-02,  6.45735927e-03,  1.24041896e-04, -1.65280033e-02,
       -6.63048889e-03,  3.80810324e-03,  9.31410835e-02, -1.53761324e+00])

In [9]:
y_pred = mlr.predict(X_test)

In [14]:
print(y_test.iloc[0], y_pred[0])

0.601 0.4686024216990887


In [10]:
err = r2_score(y_test, y_pred)

In [12]:
err*100

18.37783533313837

In [13]:
acc = 100 - err*100

In [14]:
acc

81.62216466686164

#### sampling random values using uniform in the given range

In [15]:
test = []
for i in range(5):
    temp = [np.random.uniform(-1.635, 3.9045), np.random.uniform(340.25793, 400.8917), np.random.uniform(1629.2541, 1814.18),\
            np.random.uniform(303.677, 330.8543), np.random.uniform(191.258, 280.145), np.random.uniform(350.45, 543.15983),\
            np.random.uniform(1365.426, 1367.3162), np.random.uniform(0.0016, 0.1621)]
    test.append(temp)

In [16]:
mlr.predict(test)

array([ 0.00541081,  0.3820176 ,  0.12919262,  0.18893781, -0.09135767])