In [5]:
import pandas as pd
import statsmodels.tsa.stattools as ts


def create_dataframe(data_csv):
    """
    Read pricing data csv download for Google (GOOG)
    OHLCV data from 01/09/2004-31/08/2020 into a DataFrame.

    Parameters
    ----------
    data_csv : `csv`
        CSV file containing pricing data

    Returns
    -------
    `pd.DataFrame`
        A DataFrame containing Google (GOOG) OHLCV data from
        01/09/2004-31/08/2020. Index is a Datetime object.

    """
    # Create a pandas DataFrame containing the Google OHLCV data
    # from 1/1/2000 to 1/1/2013
    goog = pd.read_csv(data_csv, index_col="Date")
    # Convert index to a Datetime object
    goog.index = pd.to_datetime(goog.index)
    return goog


def augmented_dickey_fuller(goog):
    """
    Carry out the Augmented Dickey-Fuller test for Google data.

    Parameters
    ----------
    goog : `pd.DataFrame`
      A DataFrame containing Google (GOOG) OHLCV data from
          01/09/2004-31/08/2020. Index is a Datetime object.

    Returns
    -------
    None
    """
    # Output the results of the Augmented Dickey-Fuller test for Google
    # with a lag order value of 1
    adf = ts.adfuller(goog['Adj Close'], 1)
    print(adf)



data_csv = "GOOG.csv"

goog_df = create_dataframe(data_csv)
goog_adf = augmented_dickey_fuller(goog_df)

FileNotFoundError: [Errno 2] No such file or directory: 'GOOG.csv'

In [4]:
from numpy import cumsum, log, polyfit, sqrt, std, subtract
from numpy.random import randn


def hurst(ts):
    """
    Returns the Hurst Exponent of the time series vector ts

    Parameters
    ----------
    ts : `numpy.array`
        Time series upon which the Hurst Exponent will be calculated

    Returns
    -------
    'float'
        The Hurst Exponent from the poly fit output
    """
    # Create the range of lag values
    lags = range(2, 100)

    # Calculate the array of the variances of the lagged differences
    tau = [sqrt(std(subtract(ts[lag:], ts[:-lag]))) for lag in lags]

    # Use a linear fit to estimate the Hurst Exponent
    poly = polyfit(log(lags), log(tau), 1)

    # Return the Hurst exponent from the polyfit output
    return poly[0]*2.0


# Create a Gometric Brownian Motion, Mean-Reverting and Trending Series
gbm = log(cumsum(randn(100000))+1000)
mr = log(randn(100000)+1000)
tr = log(cumsum(randn(100000)+1)+1000)

# Output the Hurst Exponent for each of the above series
# and the price of Google (the Adjusted Close price) for
# the ADF test given above in the article
print("Hurst(GBM):   %s" % hurst(gbm))
print("Hurst(MR):    %s" % hurst(mr))
print("Hurst(TR):    %s" % hurst(tr))

# Assuming you have run the above code to obtain 'goog'!
print("Hurst(GOOG):  %s" % hurst(goog['Adj Close'].values))

Hurst(GBM):   0.49334241453738614
Hurst(MR):    -0.00010301934533106833
Hurst(TR):    0.9598619177362532


NameError: name 'goog' is not defined

# References
<hr style = "border:2px solid black" ></hr>


- https://www.quantstart.com/articles/Basics-of-Statistical-Mean-Reversion-Testing/
- https://www.quantstart.com/articles/Basics-of-Statistical-Mean-Reversion-Testing-Part-II/
    
