# Importing the necessary libraries


In [1]:
import plotly
plotly.__version__

'4.10.0'

In [2]:
import plotly.express as ex
import plotly.graph_objs as go
import pandas as pd
import numpy as np           
from scipy.io import netcdf  
from mpl_toolkits.basemap import Basemap

ModuleNotFoundError: No module named 'mpl_toolkits.basemap'

# Description of the Data and Analysis

"Temperature Anomaly" is the difference between the long-term average temperature  and the temperature that is actually occurring. In other words, the long-term average temperature is one that would be expected; the anomaly is the difference between what you would expect and what is happening.

A positive anomaly means that the temperature was warmer than normal; a negative anomaly indicates that the temperature was cooler than normal. Actual temperature measurements are often difficult to gather. Some areas in the world have few temperature measurement stations, and temperatures must be estimated over large regions. Using anomalies allows more accurate descriptions over larger areas than actual temperatures and provides a frame of reference that allows easier analysis. 



# Importing CSV Dataset

In [None]:
# Dataset available at: https://datahub.io/core/global-temp#readme
df = pd.read_csv("annual_csv.csv")
df.head()

NOAA National Climatic Data Center (NCDC)'s 
Global component of Climate at a Glance (GCAG):

Global temperature anomaly data come from the Global Historical Climatology Network-Monthly (GHCN-M) data set and International Comprehensive Ocean-Atmosphere Data Set (ICOADS), which have data from 1880 to the present. These two datasets are blended into a single product to produce the combined global land and ocean temperature anomalies. The available timeseries of global-scale temperature anomalies are calculated with respect to the 20th century average.

Average global mean temperature anomalies in degrees Celsius relative to a base period. GISTEMP base period: 1951-1980. GCAG base period: 20th century average. The term temperature anomaly means a departure from a reference value or long-term average. A positive anomaly indicates that the observed temperature was warmer than the reference value, while a negative anomaly indicates that the observed temperature was cooler than the reference value.

# Testing the Plotly's basic plot

In [None]:
fig = ex.line(df, x = 'Year', y = 'Mean')
fig.show()

#  Cleaning and unifying messy and complex data sets 

In [None]:
# Only the GCAG data and its columns 'Year' and 'Mean' will be used for this analysis
GCAG_df = df[df.Source == 'GCAG'][['Year','Mean']].rename(columns={"Mean": "GCAG_Mean"})

# GISTEMP Global Land-Ocean Temperature Index
GISTEMP_df = df[df.Source == 'GISTEMP'][['Mean']].rename(columns={"Mean": "GISTEMP_Mean"})

GCAG_df.index = range(len(GCAG_df))
GISTEMP_df.index = range(len(GISTEMP_df))

In [None]:
GCAG_df.head()

In [None]:
GISTEMP_df.head()

# Concatenating GCAG and GISTEMP Mean Data

In [None]:
df_final = pd.concat([GCAG_df,GISTEMP_df],axis = 1)
df_final.head()

# Visualization with the Plotly

In [None]:
fig = go.Figure(layout = go.Layout(
        xaxis=dict(showgrid = True,title = "Year",color = 'black'),
        yaxis=dict(showgrid = True,title = "Temperature Change",color = 'black'),
    ))

fig.add_trace(go.Scatter(
                x=df_final.Year,
                y=df_final['GCAG_Mean'],
                name="GCAG_Mean",
                line_color='INDIANRED',
                opacity=0.8))

fig.add_trace(go.Scatter(
                x=df_final.Year,
                y=df_final['GISTEMP_Mean'],
                name="GISTEMP_Mean",
                line_color='TEAL',
                opacity=0.8))

# Use date string to set xaxis range
fig.update_layout(title_text = "Change in Temperature between 1880 ~ 2016", title_x=0.5, title_font_family="Times New Roman", \
                  title_font_size = 22, paper_bgcolor = 'rgba(233,233,233,1)', plot_bgcolor = 'rgba(240,235,228,1)')
fig.show()

# Applying Linear Predictive Model with 'sklearn'

In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
X = df_final.iloc[:,0:1]
GCAG_y = df_final.iloc[:,1]
GISTEMP_y = df_final.iloc[:,2]

In [None]:
model = LinearRegression()
model.fit(X,GCAG_y)

In [None]:
pred_gcag = model.predict(X)

In [None]:
model.fit(X,GISTEMP_y)

In [None]:
pred_gistemp = model.predict(X)

# Visualization of the regression result to the previous plot

In [None]:
fig = go.Figure( layout=go.Layout(
        xaxis=dict(showgrid = True,title = "Year", color = 'black'),
        yaxis=dict(showgrid = True,title = "Temperature Change", color = 'black'),
    ))

fig.add_trace(go.Scatter(
                x=df_final.Year,
                y=df_final['GCAG_Mean'],
                name="GCAG_Mean",
                line_color= 'rgb(203, 67, 53)',
                opacity=0.8))

fig.add_trace(go.Scatter(
                x=df_final.Year,
                y=df_final['GISTEMP_Mean'],
                name="GISTEMP_Mean",
                line_color='rgb(40, 116, 166)',
                opacity=0.8))

fig.add_trace(go.Scatter(
                x = df_final.Year,
                y = pred_gcag,
                name="GCAG_Predicted",
                line_color='rgb(22, 160, 133)',
                opacity=0.8))

fig.add_trace(go.Scatter(
                x = df_final.Year,
                y = pred_gistemp,
                name="GISTEMP_Predicted",
                line_color='rgb(142, 68, 173)',
                opacity=0.8))

# Use date string to set xaxis range
fig.update_layout(title_text = "Change in Temperature between 1880 ~ 2016", title_x=0.5, title_font_family="Times New Roman", \
                  title_font_size = 22, paper_bgcolor = 'rgba(233,233,233,1)', plot_bgcolor = 'rgba(240,235,228,1)')
fig.show()

## Smoother prediction line with Polynomial Features

In [None]:
from sklearn.preprocessing import PolynomialFeatures
poly_reg = PolynomialFeatures(degree = 2)
X_poly = poly_reg.fit_transform(X)
poly_reg.fit(X_poly, GCAG_y)
lin_reg_2 = LinearRegression()
lin_reg_2.fit(X_poly, GCAG_y)

In [None]:
poly_reg.fit(X_poly, GISTEMP_y)
lin_reg_3 = LinearRegression()
lin_reg_3.fit(X_poly, GISTEMP_y)

In [None]:
pred_poly_gcag = lin_reg_2.predict(X_poly)

In [None]:
pred_poly_gistemp = lin_reg_3.predict(X_poly)

In [None]:
fig = go.Figure( layout=go.Layout(
        xaxis=dict(showgrid = True,title = "Year", title_font = dict(family = 'Times New Roman', size = 16), color = 'black'),
        yaxis=dict(showgrid = True,title = "Global Temperature Anomaly", title_font = dict(family = 'Times New Roman', size = 16)\
                   , color = 'black'),
    ))

fig.add_trace(go.Scatter(
                x=df_final.Year,
                y=df_final['GCAG_Mean'],
                name="GCAG_Mean",
                line_color='rgb(203, 67, 53)',
                opacity=0.8))

fig.add_trace(go.Scatter(
                x=df_final.Year,
                y=df_final['GISTEMP_Mean'],
                name="GISTEMP_Mean",
                line_color='rgb(40, 116, 166)',
                opacity=0.8))

fig.add_trace(go.Scatter(
                x = df_final.Year,
                y = pred_poly_gcag,
                name="GCAG_Predicted Poly",
                line_color='rgb(22, 160, 133)',
                opacity=0.8))

fig.add_trace(go.Scatter(
                x = df_final.Year,
                y = pred_poly_gistemp,
                name="GISTEMP_Predicted Poly",
                line_color='rgb(142, 68, 173)',
                opacity=0.8))

# Use date string to set xaxis range
fig.update_layout(title_text = "Global Average Temperature 1880 - 2016", title_x=0.5, title_font_family="Times New Roman", \
                  title_font_size = 22, paper_bgcolor = 'rgba(233,233,233,1)', plot_bgcolor = 'rgba(240,235,228,1)')
fig.show()

## Prediction with a specific year as an argument
Available from the starting year 1880 to infinity, based on the polynomial regression model

In [None]:
temp_rise_gcag = lin_reg_2.predict(poly_reg.fit_transform([[2020]]))
temp_rise_gistemp = lin_reg_3.predict(poly_reg.fit_transform([[2020]]))
print("The temperature rise in GCAG is {}, and GISTEMPT is {}".format(temp_rise_gcag, temp_rise_gistemp))

 # Conclusion

Based on the prediction model, the global mean temerature in the year 2020 was extimated to be 0.8220652, by GCAG, and 0.83537488, by GISTEMPT above the 1951 to 1980 average. And the value has been clearly increasing from the base period: 1951-1980. Therefore, this analysis concludes that global average temperature in increasing over time.