In [1]:
import os as os
import json
import pandas as pd
import pandas_datareader.data as web
import numpy as np
import matplotlib.pylab as plt
%matplotlib inline
from pandas.tseries import *
import matplotlib.dates as mdates
import matplotlib.cbook as cbook
years = mdates.YearLocator()
months = mdates.MonthLocator
myFmt = mdates.DateFormatter('%Y-%m-%d')

In [2]:
def moving_average(df, n):
    MA = pd.Series(df['Close'].rolling(n, min_periods=n).mean(), name='MA_' + str(n))
    df = df.join(MA)
    return df


def exponential_moving_average(df, n):
    EMA = pd.Series(df['Close'].ewm(span=n, min_periods=n).mean(), name='EMA_' + str(n))
    df = df.join(EMA)
    return df

def relative_strength_index(df, n):
    def rsi(values):
        up = values[values>0].mean()
        down = -1*values[values<0].mean()
        return 100 * up / (up + down)
    Momentum = (df['Close']-df['Close'].shift(1)).fillna(0)
    RSI = pd.Series(Momentum.rolling(center=False, window=n).apply(rsi, raw = True).fillna(0), name='RSI_' + str(n))
    df = df.join(RSI)
    return df

def stochastic_K(df, n):
    STOK = pd.Series(((df['Close'] - df['Low'].rolling(window=n, center=False).mean()) / 
                      (df['High'].rolling(window=n, center=False).max() - df['Low'].
                       rolling(window=n, center=False).min())) * 100,name='STOK_' + str(n))
    df = df.join(STOK)
    return df

def MACD(df, n_fast, n_slow):
    EMAfast = pd.Series(df['Close'].ewm(span=n_fast, min_periods=n_slow).mean())
    EMAslow = pd.Series(df['Close'].ewm(span=n_slow, min_periods=n_slow).mean())
    MACD = pd.Series(EMAfast - EMAslow, name='MACD_' + str(n_fast) + '_' + str(n_slow))
    #MACDsign = pd.Series(MACD.ewm(span=9, min_periods=9).mean(), name='MACDsign_' + str(n_fast) + '_' + str(n_slow))
    #MACDdiff = pd.Series(MACD - MACDsign, name='MACDdiff_' + str(n_fast) + '_' + str(n_slow))
    df = df.join(MACD)
    return df

def CCI(df, n):
    TP = (df['High'] + df['Low'] + df['Close']) / 3
    CCI = pd.Series((TP - TP.rolling(window=n, center=False).mean()) / 
                    (0.015 * TP.rolling(window=n, center=False).std()), name = 'CCI_' + str(n))
    df = df.join(CCI)
    return df

def average_true_range(df, n):
    d = pd.DataFrame([])
    d['HL'] = df['High'] - df['Low']
    d['absHC'] = abs(df['High'] - df['Close'].shift(1))
    d['absLC'] = abs(df['Low'] - df['Close'].shift(1))
    TR = d[['HL','absHC','absLC']].max(axis = 1)
    ATR = pd.Series(TR.rolling(window=n).mean(),name = 'ATR_' + str(n))
    df = df.join(ATR)
    return df

def AC_DT(df):
    for index, row in df.iterrows():
        if row['High'] != row['Low']:
            AC = ((row['Close'] - row['Low']) - (row['High'] - row['Close'])) / (row['High'] - row['Low']) * row['Volume']
        else:
            AC = 0
        df.set_value(index, 'acc_dist', AC)
    return df

def returns(df):   
    daily_pct_c = df['Close'].pct_change()
    daily_pct_c.fillna(0, inplace=True)   
    #print(daily_pct_c)
    # Daily log returns
    # daily_log_returns = np.log(df['Close'].pct_change()+1)
    return daily_pct_c

def Volatility(df,n,period = 252):
    VOL = pd.Series(returns(df).rolling(n).std() * np.sqrt(period),name = 'VOL_' + str(n))
    df = df.join(VOL)
    return df

def EWMAVol(df,n,Lambda = 0.94):
    Returns = returns(df)**2
    SampleSize = len(Returns)
    e = np.arange(SampleSize-1,-1,-1)
    r = np.repeat(Lambda,SampleSize)
    s = (np.power(r,e))
    Vol = pd.DataFrame([])
    for x in range(0,n):
        Vol = Vol.append({'Close': 0}, ignore_index=True)
    for i in range(0, SampleSize - n):
        if i == 0:
            Datas = Returns[-n:]
            weights = s[-n:]
        else:
            Datas = Returns[-(n+i):-i]
            weights = s[-(n+i):-i]
        #weights = weights.reshape(-1,1)
        #print(np.sum(weights*Datas))
        EWMAVol = pd.Series(np.sum(weights*Datas))
        Vol = Vol.append(EWMAVol, ignore_index = True)
    Vol.set_index(df.index, inplace = True)
    #print(Vol)
    Volatility = pd.Series(Vol.iloc[:,1],name ='EWMA_Vol_' + str(n))
    df = df.join(Volatility)
    return df

def Yang_z_vol(df, n, time_win=252):
    lho = (df['High'] / df['Open']).apply(np.log)
    llo = (df['Low'] / df['Open']).apply(np.log)
    lco = (df['Close'] / df['Open']).apply(np.log)   
    loc_sq = ((df['Open'] / df['Close'].shift(1)).apply(np.log)**2)
    lcc_sq = ((df['Close'] / df['Close'].shift(1)).apply(np.log)**2)  
    close_vol = lcc_sq.rolling(n).sum() * (1.0 / (n - 1.0))   
    open_vol = loc_sq.rolling(n).sum() * (1.0 / (n - 1.0))  
    window = (lho * (lho - lco) + llo * (llo - lco)).rolling(n).sum() * (1.0 / (n - 1.0))  
    k = 0.34 / (1 + (n + 1) / (n - 1))
    result = pd.Series((open_vol + k * close_vol + (1 - k) * window).apply(np.sqrt) * np.sqrt(time_win), name ='Yang_Zang_Vol_' + str(n))
    df = df.join(result)
    return df