In [1]:
import yfinance as yf
import plotly.graph_objects as go

In [2]:
ticker_symbol = 'AAPL'
start_date = '2021-01-01'
end_date = '2023-01-01'
data = yf.download(ticker_symbol, start=start_date, end=end_date)

  data = yf.download(ticker_symbol, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


In [3]:
data

Price,Close,High,Low,Open,Volume
Ticker,AAPL,AAPL,AAPL,AAPL,AAPL
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2021-01-04,125.974457,130.062953,123.394807,129.975346,143301900
2021-01-05,127.531998,128.242629,125.020489,125.468283,97664900
2021-01-06,123.239052,127.570919,123.024891,124.329321,155088000
2021-01-07,127.444374,128.135532,124.465612,124.952339,109578200
2021-01-08,128.544357,129.108961,126.772667,128.914259,105158200
...,...,...,...,...,...
2022-12-23,129.900284,130.451959,127.713276,128.974252,63814900
2022-12-27,128.097473,129.456968,126.806945,129.427415,69007800
2022-12-28,124.166779,129.082614,123.999307,127.742826,85438400
2022-12-29,127.683716,128.540781,125.831659,126.087790,75703700


In [4]:
# Create the plot
fig = go.Figure()
fig.add_trace(go.Scatter(x=data.index, y=data['Close'], mode='lines', name='Close Price'))
fig.update_layout(title=f'{ticker_symbol} Stock Prices from Yahoo Finance',
                  xaxis_title='Date',
                  yaxis_title='Price',
                  template='simple_white',
                  width=900,
                  height=500) 
fig.show()

# Stationary Time Series

A time series is a time series whose statistical properties do not change over time.

A time series is stationary if its:

Mean is constant

Variance is constant

Autocovariance (relationship between points at different lags) is constant over time


A time series ùëãùë° is strictly stationary if the joint probability distribution of

(Xt1‚Äã‚Äã,Xt2‚Äã‚Äã,‚Ä¶,Xtk‚Äã‚Äã)

is the same as the distribution of

(Xt1‚Äã+œÑ‚Äã,Xt2‚Äã+œÑ‚Äã,‚Ä¶,Xtk‚Äã+œÑ‚Äã)

for any time shift ùúè.
i.e.
if you slide the time window, the statistical behavior stays the same.

In [7]:
import plotly.express as px
import pandas as pd
import numpy as np 
from statsmodels.tsa.stattools import adfuller


### Example 1: White Noise (Stationary Time Series)
White noise is a classic example of a stationary time series. It has a constant mean (usually zero), constant variance, and no autocorrelation (meaning past values don't predict future values).


In [8]:
# Generate a stationary time series (white noise)
np.random.seed(42)
stationary_data = np.random.normal(loc=0, scale=1, size=200)
stationary_df = pd.DataFrame({'Value': stationary_data, 'Time': pd.date_range(start='2021-01-01', periods=len(stationary_data), freq='D')})

fig_stationary = px.line(stationary_df, x='Time', y='Value', title='Stationary Time Series (White Noise)')
fig_stationary.show()

print(f"Mean of stationary series: {np.mean(stationary_data):.2f}")
print(f"Variance of stationary series: {np.var(stationary_data):.2f}")


Mean of stationary series: -0.04
Variance of stationary series: 0.86


### Example 2: Random Walk with Drift (Non-Stationary Time Series)
A random walk with a drift is a common example of a non-stationary time series. It tends to wander without returning to a mean, and its variance increases over time. The 'drift' adds a constant upward or downward trend.


In [9]:
# Generate a non-stationary time series (random walk with drift)
np.random.seed(42)
non_stationary_data = np.cumsum(np.random.normal(loc=0.1, scale=1, size=200))
non_stationary_df = pd.DataFrame({'Value': non_stationary_data, 'Time': pd.date_range(start='2021-01-01', periods=len(non_stationary_data), freq='D')})

fig_non_stationary = px.line(non_stationary_df, x='Time', y='Value', title='Non-Stationary Time Series (Random Walk with Drift)')
fig_non_stationary.show()

print(f"Mean of non-stationary series (first half): {np.mean(non_stationary_data[:100]):.2f}")
print(f"Mean of non-stationary series (second half): {np.mean(non_stationary_data[100:]):.2f}")
print(f"Variance of non-stationary series (first half): {np.var(non_stationary_data[:100]):.2f}")
print(f"Variance of non-stationary series (second half): {np.var(non_stationary_data[100:]):.2f}")


Mean of non-stationary series (first half): -1.36
Mean of non-stationary series (second half): 5.62
Variance of non-stationary series (first half): 8.34
Variance of non-stationary series (second half): 25.51


### Augmented Dickey-Fuller (ADF) Test
The Augmented Dickey-Fuller (ADF) test is a statistical test used to determine if a time series is stationary. The null hypothesis (H0) of the ADF test is that the time series is non-stationary (it has a unit root).

- **If the p-value is less than or equal to the significance level (e.g., 0.05)**, we reject the null hypothesis and conclude that the time series is stationary.
- **If the p-value is greater than the significance level**, we fail to reject the null hypothesis, meaning the time series is likely non-stationary.


In [10]:
# Perform ADF test on the stationary series
result_stationary = adfuller(stationary_data)

print('ADF Test for Stationary Series:')
print(f'ADF Statistic: {result_stationary[0]:.2f}')
print(f'P-value: {result_stationary[1]:.3f}')
print('Critical Values:')
for key, value in result_stationary[4].items():
    print(f'   {key}: {value:.2f}')

if result_stationary[1] <= 0.05:
    print("Conclusion: The time series is stationary (reject H0).")
else:
    print("Conclusion: The time series is non-stationary (fail to reject H0).")


ADF Test for Stationary Series:
ADF Statistic: -14.74
P-value: 0.000
Critical Values:
   1%: -3.46
   5%: -2.88
   10%: -2.57
Conclusion: The time series is stationary (reject H0).


In [None]:
# Perform ADF test on the non-stationary series
result_non_stationary = adfuller(non_stationary_data)

print('\nADF Test for Non-Stationary Series:')
print(f'ADF Statistic: {result_non_stationary[0]:.2f}')
print(f'P-value: {result_non_stationary[1]:.3f}')
print('Critical Values:')
for key, value in result_non_stationary[4].items():
    print(f'   {key}: {value:.2f}')

if result_non_stationary[1] <= 0.05:
    print("Conclusion: The time series is stationary (reject H0).")
else:
    print("Conclusion: The time series is non-stationary (fail to reject H0).")


In [7]:
import plotly.express as px
import pandas as pd
import numpy as np 
from statsmodels.tsa.stattools import adfuller

In [9]:
data = pd.read_csv('air_passengers.csv')
data

Unnamed: 0,Month,#Passengers
0,1949-01,112
1,1949-02,118
2,1949-03,132
3,1949-04,129
4,1949-05,121
...,...,...
139,1960-08,606
140,1960-09,508
141,1960-10,461
142,1960-11,390


In [10]:
def plotting(title, data, x, y, x_label, y_label):
    fig = px.line(data, x=data[x], y=data[y], labels={x: x_label, y: y_label})
    fig.show()

In [11]:
# Plot the data
plotting(title='Airline Passengers', data=data, x='Month', y='#Passengers', x_label='Date', y_label='Passengers')