# Global Warming

In [None]:
# 3rd party library imports
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy.stats import norm
import seaborn as sns
import statsmodels.formula.api as smf
from statsmodels.stats.anova import anova_lm
from statsmodels.tsa.api import acf

sns.set()

df = pd.read_csv('case1502.csv')

In [None]:
_ = sns.lineplot(data=df, x='Year', y='Temperature')

## The serial correlation coefficient based on regression residuals

In [None]:
df['t'] = (df['Year'] - 1900)/100

In [None]:
model = smf.ols('Temperature ~ t + I(t ** 2)', data=df).fit()
model.summary()

In [None]:
anova_lm(model)

In [None]:
resid = model.resid
r1 = acf(resid)[1]
r1

## Regression with Filtered Variables

In [None]:
df['u'] = df['t'] - r1 * df['t'].shift(1)
df['v'] = df['Temperature'] - r1 * df['Temperature'].shift(1)
filtered_model = smf.ols('v ~ u + I(u ** 2)', data=df).fit()
filtered_model.summary()

In [None]:
anova_lm(filtered_model)

In [None]:
fig, axes = plt.subplots(nrows=2)
sns.lineplot(x=df['Year'], y=model.resid, ax=axes[0])
sns.lineplot(x=df['Year'], y=filtered_model.resid, ax=axes[1])
axes[0].set_ylabel('\N{DEGREE SIGN} C')
axes[1].set_ylabel('\N{DEGREE SIGN} C')

## Was serial correlation even present?

In [None]:
num_runs = (np.abs(np.sign(filtered_model.resid).diff()) > 0).sum() + 1
m = (filtered_model.resid < 0).sum()
p = (filtered_model.resid > 0).sum()
mu = (2 * m * p) / ( m + p) + 1
sigma = np.sqrt((2 * m * p) * (2 * m * p - m - p) / ((m + p) ** 2 * (m + p + 1)))
z = (num_runs - mu + 0.5) / sigma
p = (1 - norm.cdf(np.abs(z))) * 2
p