__Importing libraries__

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import numpy as np
from sklearn import metrics
from sklearn.preprocessing import PolynomialFeatures

__Getting data of CO2 Emissions and Sea level rise__

In [2]:
co2df = pd.read_excel(r'co2.xlsx')

In [3]:
#co2df.head()

In [4]:
df0 = co2df.loc[co2df['Entity'] == 'World'].groupby(['Year'])['Annual CO2 emissions'].sum().reset_index(name ='CO2 emissions ' +'('+'tonnes'+')')

In [5]:
df1 = df0.loc[df0['Year']>=1880]
#df1

In [6]:
df1.to_csv("CO2 Emissions.csv", index = False)

In [7]:
sealeveldf = pd.read_excel(r'sea-level.xlsx')

In [8]:
#sealeveldf.head()

In [9]:
co2emmisiondf = pd.read_csv("CO2 Emissions.csv")

In [10]:
#co2emmisiondf.head()

In [11]:
result = co2emmisiondf.join(sealeveldf.set_index('Year'), on='Year')

In [12]:
#result

In [13]:
result = result.set_index('Year')

In [14]:
result.head()

Unnamed: 0_level_0,CO2 emissions (tonnes),CSIRO - Adjusted sea level (inches)
Year,Unnamed: 1_level_1,Unnamed: 2_level_1
1880,853707200.0,0.0
1881,882407500.0,0.220472
1882,931924700.0,-0.440945
1883,991035700.0,-0.232283
1884,1002178000.0,0.590551


__Correlation between CO2 Emissions and sea level__

In [15]:
correlation = result.corr(method = 'pearson')

In [16]:
correlation

Unnamed: 0,CO2 emissions (tonnes),CSIRO - Adjusted sea level (inches)
CO2 emissions (tonnes),1.0,0.970388
CSIRO - Adjusted sea level (inches),0.970388,1.0


In [17]:
correlation = result.corr(method = 'pearson')

_strong positive correlation between CO2 emissions and Sea level rise_

__Linear Regression__

In [18]:
#co2emmisiondf.head()

In [19]:
X = co2emmisiondf['Year']
Y = co2emmisiondf['CO2 emissions (tonnes)']

In [20]:
Xarr=np.asarray(X)

In [21]:
Yarr=np.asarray(Y)

In [22]:
X_train, X_test, Y_train, Y_test = train_test_split(Xarr, Y, test_size = 0.05, random_state = 5)

In [23]:
X_train.shape

(134,)

In [24]:
model = LinearRegression()

In [25]:
X_train_reshaped = X_train.reshape(-1,1)
X_test_reshaped = X_test.reshape(-1,1)

In [26]:
print(X_train_reshaped.shape, X_test_reshaped.shape)

(134, 1) (8, 1)


In [27]:
model.fit(X_train_reshaped, Y_train)

LinearRegression()

In [28]:
prediction = model.predict(X_test_reshaped)

In [29]:
r_square_err = metrics.r2_score(Y_test, prediction)

In [30]:
r_square_err

0.8683041340065996

In [31]:
input_data = [2009]
arr = np.asarray(input_data)

In [32]:
# need 2d array
#call .reshape() because this array must be two-dimensional
reshaped_arr = arr.reshape(-1,1) # everything becomes columns with 1 row
reshaped_arr

array([[2009]])

In [33]:
prediction = model.predict(reshaped_arr)
print(prediction)

[2.71294875e+10]


__Polynomial Regression__

In [34]:
x_ = PolynomialFeatures(degree=2, include_bias=False).fit_transform(X_train_reshaped)

In [35]:
#x_

In [36]:
model = LinearRegression().fit(x_, Y_train)

In [37]:
x_test_ = PolynomialFeatures(degree=2, include_bias=False).fit_transform(X_test_reshaped)

In [38]:
prediction = model.predict(x_test_)

In [39]:
r_square_err = metrics.r2_score(Y_test, prediction)

In [40]:
r_square_err

0.9903606516909601

In [41]:
input_data = [2009]
arr = np.asarray(input_data)

In [42]:
# need 2d array
#call .reshape() because this array must be two-dimensional
reshaped_arr = arr.reshape(-1,1)
reshaped_arr

array([[2009]])

In [43]:
reshaped_arr.shape

(1, 1)

In [44]:
y_test1_ = PolynomialFeatures(degree=2, include_bias=False).fit_transform(reshaped_arr)

In [45]:
prediction = model.predict(y_test1_)
print(prediction)

[3.14901095e+10]


_Original value of CO2 Emissions in 2009 = 31607042410 or 3.1607e+10_

_*very satisfying  result*_

_Polynomial regression is a better fit_

__Now predicting for other years__

2025

In [46]:
input_data = [2025]
arr = np.asarray(input_data)

In [47]:
# need 2d array
#call .reshape() because this array must be two-dimensional
reshaped_arr = arr.reshape(-1,1)
reshaped_arr

array([[2025]])

In [48]:
y_test1_ = PolynomialFeatures(degree=2, include_bias=False).fit_transform(reshaped_arr)

In [49]:
prediction = model.predict(y_test1_)
print(prediction)

[4.09963832e+10]


__Creating a function__

In [50]:
def co2emission_predictor(input_data):
    arr = np.asarray(input_data)
    # need 2d array
    #call .reshape() because this array must be two-dimensional
    reshaped_arr = arr.reshape(-1,1)
    reshaped_arr
    y_test1_ = PolynomialFeatures(degree=2, include_bias=False).fit_transform(reshaped_arr)
    prediction = model.predict(y_test1_)
    return prediction

In [51]:
co2emission_predictor([2030])

array([4.42342117e+10])

In [52]:
co2emission_predictor([2035])

array([4.75992392e+10])

In [53]:
co2emission_predictor([2040])

array([5.10914657e+10])

In [54]:
co2emission_predictor([2045])

array([5.47108913e+10])

In [55]:
co2emission_predictor([2050])

array([5.84575159e+10])

In [56]:
df_estimates1=[]
df_estimates1 = pd.DataFrame({'Year': [2025, 2030, 2035, 2040, 2045, 2050],
                   'co2 emissions (tonnes)': [co2emission_predictor([2025])[0],co2emission_predictor([2030])[0]
                                              , co2emission_predictor([2035])[0], co2emission_predictor([2040])[0]
                                              , co2emission_predictor([2045])[0], co2emission_predictor([2050])[0]                                       
                                             ]
                             })

In [57]:
df_estimates1

Unnamed: 0,Year,co2 emissions (tonnes)
0,2025,40996380000.0
1,2030,44234210000.0
2,2035,47599240000.0
3,2040,51091470000.0
4,2045,54710890000.0
5,2050,58457520000.0


Now applying regression to result dataframe (CO2 Emissions and sea level)

In [58]:
#result.head()

In [59]:
X = result['CO2 emissions (tonnes)']
Y = result['CSIRO - Adjusted sea level (inches)']

In [60]:
Xarr=np.asarray(X)

In [61]:
Yarr=np.asarray(Y)

In [62]:
X_train, X_test, Y_train, Y_test = train_test_split(Xarr, Y, test_size = 0.05, random_state = 2)

In [63]:
X_train.shape

(134,)

In [64]:
model = LinearRegression()

In [65]:
X_train_reshaped = X_train.reshape(-1,1)
X_test_reshaped = X_test.reshape(-1,1)

In [66]:
print(X_train_reshaped.shape, X_test_reshaped.shape)

(134, 1) (8, 1)


In [67]:
model.fit(X_train_reshaped, Y_train)

LinearRegression()

In [68]:
prediction = model.predict(X_test_reshaped)

In [69]:
r_square_err = metrics.r2_score(Y_test, prediction)
r_square_err

0.9557969083494541

_Satisfying result_

_C02 Emission in 2009 = 31607042410 tonnes_

In [70]:
input_data = [31607042410]
arr = np.asarray(input_data)

In [71]:
# need 2d array
#call .reshape() because this array must be two-dimensional
reshaped_arr = arr.reshape(-1,1)
reshaped_arr

array([[31607042410]], dtype=int64)

In [72]:
reshaped_arr.shape

(1, 1)

In [73]:
prediction = model.predict(reshaped_arr)
print(prediction)

[8.57032238]


__Original sea level increase in 2009 was = 8.531496054 inches__

_very satisfying result_

In [74]:
df_estimates1['co2 emissions (tonnes)']

0    4.099638e+10
1    4.423421e+10
2    4.759924e+10
3    5.109147e+10
4    5.471089e+10
5    5.845752e+10
Name: co2 emissions (tonnes), dtype: float64

In [75]:
def sea_level_predictor(input_data):
    arr = np.asarray(input_data)
    # need 2d array
    #call .reshape() because this array must be two-dimensional
    reshaped_arr = arr.reshape(-1,1)
    prediction = model.predict(reshaped_arr)
    return prediction

In [76]:
co2em = []
for emissions in df_estimates1['co2 emissions (tonnes)']:
    co2em.append(emissions)
slr = []
sea_level_predictor(co2em)
slr = sea_level_predictor(co2em).tolist()
df_estimates2=[]
df_estimates2 = pd.DataFrame({'Year': [2025, 2030, 2035, 2040, 2045, 2050],
                   'sea level (inches)': slr
                             })

In [77]:
df_estimates2

Unnamed: 0,Year,sea level (inches)
0,2025,10.777018
1,2030,11.537977
2,2035,12.32883
3,2040,13.149578
4,2045,14.00022
5,2050,14.880757


In [78]:
result_estimates = df_estimates1.join(df_estimates2.set_index('Year'), on='Year')
#result_estimates

In [79]:
result_estimates = result_estimates.set_index('Year')

In [80]:
result_estimates

Unnamed: 0_level_0,co2 emissions (tonnes),sea level (inches)
Year,Unnamed: 1_level_1,Unnamed: 2_level_1
2025,40996380000.0,10.777018
2030,44234210000.0,11.537977
2035,47599240000.0,12.32883
2040,51091470000.0,13.149578
2045,54710890000.0,14.00022
2050,58457520000.0,14.880757


In [81]:
result_estimates.to_csv("Estimates.csv", index = True)