In [1]:
import pandas as pd
import numpy as np
import time
import datetime

In [6]:
# url = 'https://launchpad.net/~mario-mariomedina/+archive/ubuntu/talib/+files'
# ext = '0.4.0-oneiric1_amd64.deb -qO'
# !wget $url/libta-lib0_$ext libta.deb
# !wget $url/ta-lib0-dev_$ext ta.deb
# !dpkg -i libta.deb ta.deb
# !pip install ta-lib
import talib as ta
import pandas_ta as pta

In [2]:
data = pd.read_csv('data.csv').drop(columns=["Unnamed: 0"], axis=1)
data

Unnamed: 0,date,open_x,high_x,low_x,close_x,volume_x,market cap_x,open_y,high_y,low_y,close_y,volume_y,market cap_y,open,high,low,close,volume,market cap
0,2015-08-07,2.8300,3.5400,2.5200,2.7700,1.643290e+05,1.666106e+08,1.0,1.0,1.0000,1.0000,1.890550e+05,4.516000e+05,278.74,280.39,276.37,279.58,4.248480e+07,4.045993e+09
1,2015-08-08,2.7900,2.8000,0.7147,0.7533,6.741880e+05,4.548689e+07,1.0,1.0,1.0000,1.0000,2.229000e+03,4.516000e+05,279.74,279.93,260.71,261.00,5.853300e+07,3.778049e+09
2,2015-08-09,0.7061,0.8798,0.6292,0.7019,5.321700e+05,4.239957e+07,1.0,1.0,1.0000,1.0000,6.570000e+02,4.516000e+05,261.12,267.00,260.47,265.08,2.378960e+07,3.838130e+09
3,2015-08-10,0.7140,0.7299,0.6365,0.7084,4.052830e+05,4.281836e+07,1.0,1.0,1.0000,1.0000,1.522650e+05,4.516000e+05,265.48,267.03,262.60,264.47,2.097940e+07,3.830352e+09
4,2015-08-11,0.7081,1.1300,0.6632,1.0700,1.463100e+06,6.456929e+07,1.0,1.0,1.0000,1.0000,5.260000e+02,4.516000e+05,264.34,270.39,264.09,270.39,2.543390e+07,3.917143e+09
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2717,2023-01-14,1451.4300,1563.7400,1450.9900,1550.7100,1.544463e+10,1.897660e+11,1.0,1.0,1.0000,1.0000,5.370827e+10,6.628300e+10,19910.54,21075.14,19907.83,20976.30,3.896778e+10,4.040522e+11
2718,2023-01-15,1550.7300,1556.9500,1520.8900,1552.4800,6.774614e+09,1.899829e+11,1.0,1.0,1.0000,1.0000,3.002822e+10,6.638018e+10,20977.48,20993.75,20606.99,20880.80,1.929841e+10,4.022347e+11
2719,2023-01-16,1552.5200,1594.0400,1529.5700,1576.8300,8.454485e+09,1.929632e+11,1.0,1.0,1.0000,1.0000,3.595737e+10,6.636375e+10,20882.22,21360.87,20715.75,21169.63,2.679249e+10,4.078182e+11
2720,2023-01-17,1577.1100,1594.0000,1553.3400,1567.8500,7.599463e+09,1.918634e+11,1.0,1.0,1.0000,1.0000,3.225087e+10,6.649171e+10,21175.83,21438.66,20978.53,21161.52,2.499998e+10,4.076820e+11


In [4]:
HIGH = 'high'
OPEN = 'open'
CLOSE = 'close'
VOLUME = 'volume'
LOW = 'low'

In [13]:
def range_percent_change(dataframe, method, length: int) -> float:
        """
        Rolling Percentage Change Maximum across interval.

        :param dataframe: DataFrame The original OHLC dataframe
        :param method: High to Low / Open to Close
        :param length: int The length to look back
        """
        if method == 'HL':
            return (dataframe[HIGH].rolling(length).max() - dataframe[LOW].rolling(length).min()) / dataframe[LOW].rolling(length).min()
        elif method == 'OC':
            return (dataframe[OPEN].rolling(length).max() - dataframe[CLOSE].rolling(length).min()) / dataframe[CLOSE].rolling(length).min()
        else:
            raise ValueError(f"Method {method} not defined!")

# Williams %R
def williams_r(dataframe, period: int = 14) -> pd.Series:
    """Williams %R, or just %R, is a technical analysis oscillator showing the current closing price in relation to the high and low
        of the past N days (for a given N). It was developed by a publisher and promoter of trading materials, Larry Williams.
        Its purpose is to tell whether a stock or commodity market is trading near the high or the low, or somewhere in between,
        of its recent trading range.
        The oscillator is on a negative scale, from −100 (lowest) up to 0 (highest).
    """

    highest_high = dataframe[HIGH].rolling(center=False, window=period).max()
    lowest_low = dataframe[LOW].rolling(center=False, window=period).min()

    WR = pd.Series(
        (highest_high - dataframe[CLOSE]) / (highest_high - lowest_low),
        name=f"{period} Williams %R",
        )

    return WR * -100

def chaikin_money_flow(dataframe, n=20, fillna=False):
    mfv = ((dataframe[CLOSE] - dataframe[LOW]) - (dataframe[HIGH] - dataframe[CLOSE])) / (dataframe[HIGH] - dataframe[LOW])
    mfv = mfv.fillna(0.0)  # float division by zero
    mfv *= dataframe[VOLUME]
    cmf = (mfv.rolling(n, min_periods=0).sum()
           / dataframe[VOLUME].rolling(n, min_periods=0).sum())
    if fillna:
        cmf = cmf.replace([np.inf, -np.inf], np.nan).fillna(0)
    return pd.Series(cmf, name='cmf')


data['ema_8'] = ta.EMA(data[CLOSE], timeperiod=8)
data['ema_50'] = ta.EMA(data[CLOSE], timeperiod=50)

data['cti'] = pta.cti(data[CLOSE], length=10)
data['cti_30'] = pta.cti(data[CLOSE], length=30)

crsi_closechange = data[CLOSE] / data[CLOSE].shift(1)
crsi_updown = np.where(crsi_closechange.gt(1), 1.0, np.where(crsi_closechange.lt(1), -1.0, 0.0))
data['crsi'] =  (ta.RSI(data[CLOSE], timeperiod=3) + ta.RSI(crsi_updown, timeperiod=2) + ta.ROC(data[CLOSE], 100)) / 3

In [14]:
# ROC
data['roc'] = ta.ROC(data[CLOSE], timeperiod=9)

# RSI
data['rsi'] = ta.RSI(data[CLOSE], timeperiod=14)

data['r_50'] = williams_r(data, period=50)
data['hl_pct_change_5'] = range_percent_change(data, 'HL', 5)
data['cmf'] = chaikin_money_flow(data, 20)

In [15]:
def T3(dataframe, length=5):
    df = dataframe.copy()

    df['xe1'] = ta.EMA(df[CLOSE], timeperiod=length)
    df['xe2'] = ta.EMA(df['xe1'], timeperiod=length)
    df['xe3'] = ta.EMA(df['xe2'], timeperiod=length)
    df['xe4'] = ta.EMA(df['xe3'], timeperiod=length)
    df['xe5'] = ta.EMA(df['xe4'], timeperiod=length)
    df['xe6'] = ta.EMA(df['xe5'], timeperiod=length)
    b = 0.7
    c1 = -b * b * b
    c2 = 3 * b * b + 3 * b * b * b
    c3 = -6 * b * b - 3 * b - 3 * b * b * b
    c4 = 1 + 3 * b + b * b * b + 3 * b * b
    df['T3Average'] = c1 * df['xe6'] + c2 * df['xe5'] + c3 * df['xe4'] + c4 * df['xe3']

    return df['T3Average']

data['T3'] = T3(data)

In [17]:
data['low_5'] = data[LOW].shift().rolling(5).min()
data['safe_dump_50'] = ((data['hl_pct_change_5'] < 0.66) | (data[CLOSE] < data['low_5']) | (data[CLOSE] > data[OPEN]))
def vwap(df):
  v = df[VOLUME].values
  tp = (df[LOW] + df[CLOSE] + df[HIGH]).div(3).values
  return df.assign(Weighted_Price = (tp * v).cumsum() / v.cumsum())

data=vwap(data)

In [20]:
data = data.dropna()

In [21]:
data.to_csv('final_data.csv')

NameError: name 'data' is not defined