# Pandas Convert to Datetime

https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html

In [None]:
temp_data['Date'] = pd.to_datetime(temp_data['Date'], format='%d/%m/%y')
temp_data.set_index('Date',inplace=True)

# Resample Dates

In [None]:
temp_monthly = temp_data.resample('MS')
month_mean = temp_monthly.mean()

In [None]:
temp_bidaily = temp_data.resample('12H').asfreq()
temp_bidaily.head()

temp_bidaily_fill = temp_bidaily.ffill()
temp_bidaily_fill.head()

# Slicing

In [None]:
# Slice the timeseries to contain data after year 1990 
post_90 = CO2['1990':]

# Retrieve the data between 1st Jan 1990 to 1st Jan 1991
mid_slice = CO2['1990-01':'1991-01']

# Missing Vals

In [None]:
temp_data.isnull().sum()

In general, the .fillna() method can be used along with methods like .bfill() of .ffill() as an argument/criterion for filling in missing values . .bfill() (backward filling) looks for the next valid entry in the time series and fills the gaps with this value. Similarly, .ffill() can be used to copy forward the previous valid entry of the time series (as demonstrated above).

In [None]:
CO2_final = CO2.fillna('bfill')

# Plot

In [None]:
# Line Plot


# Dot Plot
nyse.plot(figsize = (20,6), style = '.b');


# Grouping for a given time interval by averaging

In [1]:
# Use pandas grouper to group values using annual frequency
year_groups = nyse.groupby(pd.Grouper(freq ='A'))

nyse_annual = pd.DataFrame()

for yr, group in year_groups:
    nyse_annual[yr.year] = group.values.ravel()
    
# Plot the yearly groups as subplots
nyse_annual.plot(figsize = (13,8), subplots=True, legend=True);

# to plot on one plot
nyse_annual.plot(figsize = (15,5), subplots=False, legend=True);

nyse_annual.boxplot(figsize = (12,7));

# HEatmap. Transpose first
year_matrix = nyse_annual.T
plt.matshow(year_matrix, interpolation=None, aspect='auto', cmap=plt.cm.Spectral_r);

NameError: name 'nyse' is not defined

# Trends

## Rolling Statistics

In [None]:
roll_mean = ts.rolling(window=8, center=False).mean()
roll_std = ts.rolling(window=8, center=False).std()

In [None]:
fig = plt.figure(figsize=(12,7))
plt.plot(ts, color='blue', label='Original')
plt.plot(roll_mean, color='red', label='Rolling Mean')
plt.plot(roll_std, color='black', label = 'Rolling Std')
plt.legend(loc='best')
plt.title('Rolling Mean & Standard Deviation')
plt.show(block=False)

## Dickey Fuller Test

The Dickey-Fuller test is a statistical test for testing stationarity. 

The null-hypothesis for the test is that 
    - <b> the time series is not stationary</b> . 
    
    So if the test statistic is less than the critical value, we reject the null hypothesis and say that the series is stationary. 

In [None]:
from statsmodels.tsa.stattools import adfuller

dftest = adfuller(ts)

# Extract and display test results in a user friendly manner
dfoutput = pd.Series(dftest[0:4], index=['Test Statistic', 'p-value', '#Lags Used', 'Number of Observations Used'])
for key,value in dftest[4].items():
    dfoutput['Critical Value (%s)'%key] = value
print(dftest)

# Remove Trends

## Take Log or Sq Root or Cube Root

In [None]:
data = pd.Series(np.log(final_series), index=index)

data = pd.Series(np.sqrt(final_series), index=index)

## subtract rolling mean

In [None]:
data_minus_roll_mean = data - roll_mean

## Weighted Rolling Mean

https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.ewm.html

In [None]:
# Use Pandas ewm() to calculate Exponential Weighted Moving Average
exp_roll_mean = data.ewm(halflife=2).mean()

In [None]:
# Subtract the moving average from the original data
data_minus_exp_roll_mean = data - exp_roll_mean
data_minus_exp_roll_mean.head(15)

## Differencing

In this technique, we take the difference of an observation at a particular time instant with that at the previous instant (i.e. a so-called 1-period "lag").

This mostly works pretty well in improving stationarity. First-order differencing can be done in Pandas using the .diff() method with periods=1 (denoting a 1-period lag). Details on .diff() can be found here.

In [None]:
data_diff = data.diff(periods=1)
data_diff.head(10)

# Time Series Decomp

In [None]:
from statsmodels.tsa.seasonal import seasonal_decompose
decomposition = seasonal_decompose(np.log(ts))

# Gather the trend, seasonality, and residuals 
trend = decomposition.trend
seasonal = decomposition.seasonal
residual = decomposition.resid

# Auto Correlation

In [None]:
diet_shift_1 = diet.shift(periods=1)

lag_1 = pd.concat([diet_shift_1, diet], axis=1)

lag_1.corr()

In [None]:
# The Auto Correlation Function
plt.figure(figsize=(12,5))
pd.plotting.autocorrelation_plot(diet);


In [None]:
# with differencing
gtrends_diff = gtrends.diff(periods=1)

diet_diff = gtrends_diff[['Diet']].dropna()

plt.figure(figsize=(12,6))
pd.plotting.autocorrelation_plot(diet_diff);

In [1]:
# Partial Auto COrrelation
from statsmodels.graphics.tsaplots import plot_pacf
from matplotlib.pylab import rcParams

rcParams['figure.figsize'] = 14, 5

plot_pacf(diet, lags=100);

# ARMA Models in Statsmodels

In [None]:
from statsmodels.graphics.tsaplots import plot_pacf
from statsmodels.graphics.tsaplots import plot_acf

fig, ax = plt.subplots(figsize=(16,3))
plot_acf(series, ax=ax, lags=40);

fig, ax = plt.subplots(figsize=(16,3))
plot_pacf(series, ax=ax, lags=40);

In [None]:
# Import ARMA
from statsmodels.tsa.arima_model import ARMA
import statsmodels.api as sm

# Instantiate an AR(1) model to the simulated data
mod_arma = ARMA(series, order=(1,0))

res_arma = mod_arma.fit()
res_arma.summary()

In [None]:
ARMA(1,0), ARMA(2,2) and ARMA(2,1) all seem to have decent fits with significant parameters. 
Depending on whether you pick AIC or BIC as a model selection criterion, 
your result may vary. In this situation, you'd generally go for a model with fewer parameters, 
so ARMA(1,0) seems fine. Note that we have a relatively short time series, 
which can lead to a more difficult model selection process.
"""