In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime, timedelta 
from statsmodels.tsa.seasonal import seasonal_decompose
import seaborn as sns

%matplotlib inline

import warnings 
warnings.filterwarnings("ignore")

In [None]:
df=pd.read_csv("../input/global-rise-in-temperatures-in-each-country/GlobalLandTemperatures_GlobalLandTemperaturesByCountry.csv")

In [None]:
df.head()

In [None]:
df.info

In [None]:
df.describe()

In [None]:
df.dtypes

In [None]:
df.shape

In [None]:
df.isnull().sum()

In [None]:
df = df.dropna(how='any', axis=0)

In [None]:
df.shape

In [None]:
df.rename(columns={"dt":"Date", "AverageTemperature":"Avg_temp", "AverageTemperatureUncertainty":"confidence_interval_temp"}, inplace=True)
df.head()

In [None]:
df["Date"] = pd.to_datetime(df["Date"])
df.set_index("Date", inplace=True)
df.index

In [None]:
df.describe()

In [None]:
df["Year"] = df.index.year
df.head()

In [None]:
df.describe()

In [None]:
latest_df = df.loc["1980":"2013"]
latest_df.head()

In [None]:
latest_df[["Country", "Avg_temp"]].groupby(["Country"]).mean().sort_values("Avg_temp")

In [None]:
plt.figure(figsize=(9,4))
sns.lineplot(x="Year", y="Avg_temp", data=latest_df)
plt.show()

In [None]:
resample_df = latest_df[["Avg_temp"]].resample("A").mean()

In [None]:
resample_df.head()

In [None]:
resample_df.plot(title="Temperature Changes from 1980-2013", figsize=(8,5))
plt.ylabel("Temperature", fontsize=12)
plt.xlabel("Year", fontsize=12)
plt.legend()

In [None]:
from statsmodels.tsa.stattools import adfuller

print("Dickey Fuller Test Results:")
test_df = adfuller(resample_df.iloc[:,0].values, autolag="AIC")
df_output = pd.Series(test_df[0:4], index=["Test Statistic", "p-value", "Lags Used", "Number of Observations Used"])
for key, value in test_df[4].items():
    df_output["Critical Value (%s)"%key] = value
print(df_output)

In [None]:
decomp = seasonal_decompose(resample_df, freq=3)

trend = decomp.trend
seasonal = decomp.seasonal
residual = decomp.resid

In [None]:
plt.subplot(411)
plt.plot(resample_df)
plt.xlabel("Original")
plt.figure(figsize=(6,5))

plt.subplot(412)
plt.plot(trend)
plt.xlabel("Trend")
plt.figure(figsize=(6,5))

plt.subplot(413)
plt.plot(seasonal)
plt.xlabel("Seasonal")
plt.figure(figsize=(6,5))

plt.subplot(414)
plt.plot(residual)
plt.xlabel("Residual")
plt.figure(figsize=(6,5))

plt.tight_layout()

In [None]:
rol_mean = resample_df.rolling(window=3, center=True).mean()

ewm = resample_df.ewm(span=3).mean()

rol_std = resample_df.rolling(window=3, center=True).std()

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12,5))

ax1.plot(resample_df, label='Original')
ax1.plot(rol_mean, label='Rolling Mean')
ax1.plot(ewm, label='Exponentially Weighted Mean')
ax1.set_title("Temperature Changes from 1980-2013", fontsize=14)
ax1.set_ylabel("Temperature", fontsize=12)
ax1.set_xlabel("Year", fontsize=12)
ax1.legend()

ax2.plot(rol_std, label='Rolling STD')
ax2.set_title("Temperature Changes from 1980-2013", fontsize=14)
ax2.set_ylabel("Temperature", fontsize=12)
ax2.set_xlabel("Year", fontsize=12)
ax2.legend()

plt.tight_layout()
plt.show()