In [5]:
from datetime import datetime as dt, timedelta
import json
import sys
import time

import concurrent
from functools import partial
import multiprocessing.dummy as mp 
from multiprocessing import Pool, Manager, cpu_count, Manager , Process
from multiprocessing.pool import ThreadPool
import multiprocessing

from statistics import median, mean, stdev
import warnings

from bs4 import BeautifulSoup
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import requests
import seaborn as sns
import ta


warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', 999)
pd.set_option('display.max_rows', 999)
#!{sys.executable} -m pip install seaborn

HEADERS = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) '\
                         'AppleWebKit/537.36 (KHTML, like Gecko) '\
                         'Chrome/75.0.3770.80 Safari/537.36'}

Extract Data from finviz

In [4]:
def finviz_pull(url):
    # Finviz with filters: Average Volume > 500k / RSI 40 / Current Volume > 1M / Market Cap +Small (no Micro)
    #url = 'https://finviz.com/screener.ashx?v=131&f=sh_avgvol_o500,sh_curvol_o1000,ta_rsi_os40'
    #url = 'https://finviz.com/screener.ashx?v=131&f=cap_smallover,sh_avgvol_o500,sh_curvol_o1000,ta_rsi_os40'
    #url = 'https://finviz.com/screener.ashx?v=131&f=cap_smallover,sh_avgvol_o500,sh_curvol_o500&ft=3&o=-volume'
    
    # Headers
    s = requests.Session()
    
    # Add headers
    s.headers.update(HEADERS)
    
    # Extract data from Finviz - parse html
    screener = s.get(url)
    soup = BeautifulSoup(screener.text, 'html.parser')

    # Figure out number of stocks
    total_stocks_element = soup.find(class_ = 'count-text').text[7:]
    stop_position = total_stocks_element.find(' ')
    total_stocks = int(total_stocks_element[:stop_position])
    
    # Empty list to store stocks
    my_stocks = []

    # Pages of stocks
    page = 1
    while True:
        new_url = url + '&r=' + str(page)

        # Pull data and parse html
        stock_data = s.get(new_url)
        soup = BeautifulSoup(stock_data.text, 'html.parser')

        # Table with stocks
        table_element_1 = soup.find_all(class_='table-dark-row-cp')
        table_element_2 = soup.find_all(class_='table-light-row-cp')
        table_element = table_element_1 + table_element_2

        row_counter = 0
        # For each line extract the symbol, name and industry
        for idx, row in enumerate(table_element):
            # Creating table with all 'a' elements
            symbol_table = row.find_all('a')
            
            # Symbol
            symbol = symbol_table[1].text
            # Name
            symbol_name = symbol_table[2].text
            # Industry
            symbol_sector = symbol_table[3].text
            
            # Append all
            my_stocks.append([symbol, symbol_name, symbol_sector])
            row_counter += 1
        if row_counter == 0:
            print("Done loading")
        else:
            print(f"Page {page} processed, {row_counter} rows added")
            
    return my_stocks

Function to query data

In [6]:
def yahoo_finance_query(symbol, period1_date, period2_date, interval='30m', pre_post = 'true'):
    # Convert dates to timestamps
    period1 = int(time.mktime(time.strptime((period1_date + ' 00:00:00'), '%Y-%m-%d %H:%M:%S')))
    period2 = int(time.mktime(time.strptime((period2_date), '%Y-%m-%d %H:%M:%S')))
    
    # Headers
    s = requests.Session()

    # Add headers
    s.headers.update(HEADERS)
    
    # Proxies
    proxies = {
                "http": 'http://103.28.121.58:3128', 
                "https": 'http://103.28.121.58:3128'}

    # Create url
    params = {"symbol": symbol,
             "period1": period1,
             "period2": period2,
             "interval": interval,
             "includePrePost": pre_post}
    url = f'https://query1.finance.yahoo.com/v8/finance/chart/{symbol}'
    # print(url)
    # Get data
    stock_price = s.get(url, params=params) #, headers=headers, verify=False)#, proxies=proxies)
    
    # Return JSON
    price_json = stock_price.json()
    return price_json

Parse data out of JSON and create a dataframe

In [7]:
def create_dataframe(price_json):
    
    dict_keys = timestamps = price_json['chart']['result'][0].keys()
    
    if 'timestamp' in dict_keys:
    
        # Parse data out of JSON
        with price_json['chart']['result'][0] as base_data:
            timestamps = base_data['timestamp']
            quote = base_data['indicators']['quote'][0]
            
            high_price = quote['high']
            volume = quote['volume']
            open_price = quote['open']
            low_price = quote['low']
            close_price = quote['close']

        # Zip all lists
        zipped = list(zip(timestamps, high_price, low_price, open_price, close_price, volume))

        # Column Names
        column_names = ['timestamp', 'high_price', 'low_price', 'open_price', 'close_price', 'volume']

        # Create Dataframe, add column with symbol and return df
        df = pd.DataFrame(zipped, columns = column_names)
        df['symbol'] = price_json['chart']['result'][0]['meta']['symbol']

        return df
    
    else:
        return None

Create Function to do all of this for a list of symbols

In [222]:
def get_prices(symbols_list
               , result_list
               , period1
               , interval='1d'
               , period2 = datetime.today().strftime("%Y-%m-%d %H:%M:%S")
               , pre_post = 'false'):
    
    # Create % completion
    length = len(symbols_list)
    
    # Create df_master
    df_master = 'Nothing'
    
    # Go through list of symbols, get JSON and create dataframe
    for idx, symbol in enumerate(symbols_list):
        
        # Pull price and put in a dataframe
        price_json = yahoo_finance_query(symbol[0], period1, period2, interval, pre_post)
        
        df_symbol = create_dataframe(price_json)
        
        if type(df_symbol) == pd.DataFrame:
        
            # Add Name and Industry columns
            df_symbol['name'] = symbol[1]
            df_symbol['industry'] = symbol[2]

            #If this is the first symbol it will create a dataframe, otherwise, it appends the data
            if idx == 0 or type(df_master) != pd.DataFrame:
                df_master = df_symbol.copy()
            else:
                df_master = df_master.append(df_symbol)

            # Print Completion
            print('Running Yahoo Finance Query: ' + str(idx + 1) + ' of ' + str(length) + ' - '+ symbol[0] + ' - ' + str(round((((idx + 1)/length) * 100),2)) + '%')

            if idx % 100 == 0:
                time.sleep(5)
        
    # Convert timestamp column to datetime and return df_master
    df_master['timestamp'] = pd.to_datetime(df_master['timestamp'], unit='s')
   
    # Add interval column
    df_master['interval'] = interval
    
    # Reset index
    #df_master.reset_index(inplace=True, drop=True)
    
    # Just date
    df_master['just_date'] = df_master['timestamp'].dt.date
    
    print('Done')
    
    result_list.append(df_master)

# Fourth Iteration

In [124]:
def run_indicators_2(df
                   , results_list
                   , symbol
                   , NaN = np.nan
                   , min_max_rows = [5, 10, 15, 20, 25, 30, 60, 90, 120]):
    
    # RSI Bins
    bins = list(range(0,101,10))
    labels = list(range(10))
    
    # Add empty Columns
    df['low_is_min_7'] = NaN
    df['low_is_min_14'] = NaN
    
    # Add ta features filling NaN values
    df_2 = df[df['symbol'] == symbol]
    
    # Create empty_df
    empty_df = df.copy()
    empty_df['momentum_rsi'] = 0
    empty_df['index'] = 0
    empty_df = empty_df[empty_df['symbol'] == 'Nothing Here']
    
    # Check if min == to 7 day min
    #df_2['low_is_min_7'].mask((df_2['low_price'] == df_2['min_7']), True, inplace=True)
    #df_2['low_is_min_14'].mask((df_2['low_price'] == df_2['min_14']), True, inplace=True)
    
    # Check for number of days
    length = len(df_2)
    
    if length > 14:
        # Close price shift
        df_2['close_price_shift'] = df_2['close_price'].shift(-1)
        
         # Shift prices one day
        df_2[['high_previous', 'low_previous', 'close_previous', 'open_previous']] = df_2[['high_price', 'low_price', 'close_price', 'open_price']].shift(1)
        
        # Support and Resistance Prices
        for row in min_max_rows:
            # Column names
            min_column_name = 'low_min_d' + str(row)
            min_column_name_shift = 'low_min_d' + str(row) + '_shift1_' + str(row)
            min_column_name_shift2 = 'low_min_d' + str(row) + '_shift2_' + str(2 * row)
            max_column_name = 'high_max_d' + str(row)
            max_column_name_shift = 'high_max_d' + str(row) + '_shif1_' + str(row)
            max_column_name_shift2 = 'high_max_d' + str(row) + '_shif2_' + str(2 * row)
            close_column_name = 'close_min_d' + str(row)
            close_column_name2 = 'close_max_d' + str(row)
            open_column_name = 'open_min_d' + str(row)
            open_column_name2 = 'open_max_d' + str(row)
            
            # Calculations
            df_2[min_column_name] = df_2['low_price'].rolling(row).min()
            df_2[min_column_name_shift] = df_2[min_column_name].shift(row)
            df_2[min_column_name_shift2] = df_2[min_column_name].shift(row * 2)
            df_2[max_column_name] = df_2['high_price'].rolling(row).max()
            df_2[max_column_name_shift] = df_2[max_column_name].shift(row)
            df_2[max_column_name_shift2] = df_2[max_column_name].shift(row * 2)
            df_2[close_column_name] = df_2['close_price'].rolling(row).min()
            df_2[open_column_name] = df_2['open_price'].rolling(row).min()
            df_2[close_column_name2] = df_2['close_price'].rolling(row).max()
            df_2[open_column_name2] = df_2['open_price'].rolling(row).max()
        
        # Reset Index
        #df_2 = df_2.reset_index()
        
        # Year from timestamp
        df_2['year'] = pd.DatetimeIndex(df_2['timestamp']).year
        
        # Shift Price
        df_2['close_price_shift'] = df_2['close_price'].shift(1)
        df_2['daily_return'] = (df_2['close_price'] / df_2['close_price_shift'])

        # Calculate Past Returns
        for i in [1, 2, 3, 5, 7]:
            df_2[f'moving_{i}d_return'] = (df_2['daily_return']).rolling(window=i).apply(np.prod, raw=True)
        
        # Calculate min daily moves
        for i in [30, 60, 90]:
            df_2[f'moving_{i}d_return_mean'] = df_2['daily_return'].rolling(window=i).mean()
            df_2[f'moving_{i}d_return_std'] = df_2['daily_return'].rolling(window=i).std()
            df_2[f'moving_{i}d_min_return'] = df_2[f'moving_{i}d_return_mean'] - (2 * df_2[f'moving_{i}d_return_std'])
                    
        # Simple Moving Average
        for i in [10, 20, 100, 200]:
            df_2[f'sma_{i}d'] = df_2['close_price'].rolling(window=i).mean()
            df_2[f'sma_{i}d_shift'] = df_2[f'sma_1{i}d'].shift(1) 
            df_2[f'sma_{i}d_coef'] = (-df_2[f'sma_{i}d'].shift(1) + df_2[f'sma_{i}d']) / 2
            df_2[f'sma_{i}d_std'] = df_2['close_price'].rolling(window=i).std()
            df_2[f'stationary_sma_{i}d'] = df_2['close_price'] - df_2[f'sma_{i}d']
            df_2[f'stationary_sma_{i}d_zscore'] = df_2[f'stationary_sma_{i}d'] / df_2[f'sma_{i}d_std']
            df_2[f'stationary_sma_{i}d_zscore_shift'] = df_2[f'stationary_sma_{i}d_zscore'].shift(i / 2)
            df_2[f'stationary_sma_{i}d_zscore_shift_2'] = df_2[f'stationary_sma_{i}d_zscore'].shift(i)
            df_2[f'stationary_sma_{i}d_zscore_min'] = df_2[f'stationary_sma_{i}d_zscore'].rolling(window=i).min()
            df_2[f'stationary_sma_{i}d_zscore_max'] = df_2[f'stationary_sma_{i}d_zscore'].rolling(window=i).max()
            df_2[f'stationary_sma_{i}d_zscore_min_shift_1'] = df_2[f'stationary_sma_{i}d_zscore_min'].shift(i / 2)
            df_2[f'stationary_sma_{i}d_zscore_min_shift_2'] = df_2[f'stationary_sma_{i}d_zscore_min'].shift(i)
            df_2[f'stationary_sma_{i}d_zscore_min_shift_3'] = df_2[f'stationary_sma_{i}d_zscore_min'].shift(i * 1.5)
            df_2[f'stationary_sma_{i}d_zscore_min_all'] = df_2[[f'stationary_sma_{i}d_zscore_min_shift_1',
                                                                f'stationary_sma_{i}d_zscore_min_shift_2',
                                                                f'stationary_sma_{i}d_zscore_min_shift_3']].min(axis=1)
            df_2[f'stationary_sma_{i}d_zscore_min_all_shift'] = df_2[f'stationary_sma_{i}d_zscore_min_all'].shift(i / 2)
            df_2[f'stationary_sma_{i}d_zscore_min_all_shift_2'] = df_2[f'stationary_sma_{i}d_zscore_min_all'].shift(i)

        # Get RSI
        df_2['momentum_rsi'] = ta.momentum.RSIIndicator(close=df_2['close_price'], n=7).rsi()
        df_2['momentum_rsi_low'] = ta.momentum.RSIIndicator(close=df_2['low_price'], n=7).rsi()
        df_2['momentum_rsi_high'] = ta.momentum.RSIIndicator(close=df_2['high_price'], n=7).rsi()
        
        # Create bins of rsi and label them 
        df_2['rsi_bins'] = pd.cut(df_2['momentum_rsi'], bins=bins, labels=labels)
        df_2['rsi_bins'] = pd.to_numeric(df_2['rsi_bins'], errors='coerce')
        df_2['rsi_bins_low'] = pd.cut(df_2['momentum_rsi_low'], bins=bins, labels=labels)
        df_2['rsi_bins_low'] = pd.to_numeric(df_2['rsi_bins_low'], errors='coerce')
        df_2['rsi_bins_high'] = pd.cut(df_2['momentum_rsi_high'], bins=bins, labels=labels)
        df_2['rsi_bins_high'] = pd.to_numeric(df_2['rsi_bins_high'], errors='coerce')
        
        # Create a shift of the bin, to compare current with the previous
        for i in [1, 2, 3]:
            df_2[f'rsi_bins_shift_{i}d'] = df_2['rsi_bins'].shift(i).fillna(0)
            df_2[f'rsi_bins_shift_{i}d_low'] = df_2['rsi_bins_low'].shift(i).fillna(0)
            df_2[f'rsi_bins_shift_{i}d_high'] = df_2['rsi_bins_high'].shift(i).fillna(0)
        
        # Get min rsi
        df_2['rsi_7'] = df_2['momentum_rsi'].rolling(7).min()
        df_2['rsi_14'] = df_2['momentum_rsi'].rolling(14).min()
        df_2['rsi_21'] = df_2['momentum_rsi'].rolling(21).min()
        df_2['rsi_28'] = df_2['momentum_rsi'].rolling(28).min()
        df_2['rsi_35'] = df_2['momentum_rsi'].rolling(35).min()
        df_2['rsi_70'] = df_2['momentum_rsi'].rolling(70).min()
        df_2['rsi_105'] = df_2['momentum_rsi'].rolling(105).min()
        df_2['rsi_140'] = df_2['momentum_rsi'].rolling(140).min()
        df_2['rsi_175'] = df_2['momentum_rsi'].rolling(175).min()
        df_2['rsi_210'] = df_2['momentum_rsi'].rolling(210).min()
        
        # Get rsi std
        df_2['rsi_std_35'] = df_2['rsi_bins'].rolling(35).std()
        df_2['rsi_std_70'] = df_2['rsi_bins'].rolling(70).std()
        df_2['rsi_std_140'] = df_2['rsi_bins'].rolling(140).std()
        df_2['rsi_avg_35'] = df_2['rsi_bins'].rolling(35).mean()
        df_2['rsi_avg_70'] = df_2['rsi_bins'].rolling(70).mean()
        df_2['rsi_avg_140'] = df_2['rsi_bins'].rolling(140).mean()
        
        # Get min rsi
        df_2['rsi_35_min_bin'] = df_2['rsi_bins'].rolling(35).min()
        df_2['rsi_70_min_bin'] = df_2['rsi_bins'].rolling(70).min()
        df_2['rsi_105_min_bin'] = df_2['rsi_bins'].rolling(105).min()
        df_2['rsi_140_min_bin'] = df_2['rsi_bins'].rolling(140).min()
        df_2['rsi_175_min_bin'] = df_2['rsi_bins'].rolling(175).min()
        df_2['rsi_210_min_bin'] = df_2['rsi_bins'].rolling(210).min()
        df_2['rsi_35_min_bin_low'] = df_2['rsi_bins_low'].rolling(35).min()
        df_2['rsi_70_min_bin_low'] = df_2['rsi_bins_low'].rolling(70).min()
        df_2['rsi_35_min_bin_high'] = df_2['rsi_bins_high'].rolling(35).min()
        df_2['rsi_70_min_bin_high'] = df_2['rsi_bins_high'].rolling(70).min()
        
        # Get Min rsi shift
        df_2['rsi_35_min_bin_shift_1d'] = df_2['rsi_35_min_bin'].shift(1)
        df_2['rsi_35_min_bin_shift_2d'] = df_2['rsi_35_min_bin'].shift(2)
        df_2['rsi_35_min_bin_shift_1d_low'] = df_2['rsi_35_min_bin_low'].shift(1)
        df_2['rsi_35_min_bin_shift_2d_low'] = df_2['rsi_35_min_bin_low'].shift(2)
        df_2['rsi_35_min_bin_shift_1d_high'] = df_2['rsi_35_min_bin_high'].shift(1)
        df_2['rsi_35_min_bin_shift_2d_high'] = df_2['rsi_35_min_bin_high'].shift(2)
        
        # Get shifted min rsi
        df_2['rsi_35_min_bin_shifted_1'] = df_2['rsi_35_min_bin'].shift(35)
        df_2['rsi_35_min_bin_shifted_2'] = df_2['rsi_35_min_bin'].shift(70)
        df_2['rsi_35_min_bin_shifted_1_low'] = df_2['rsi_35_min_bin_low'].shift(35)
        df_2['rsi_35_min_bin_shifted_2_low'] = df_2['rsi_35_min_bin_low'].shift(70)
        df_2['rsi_35_min_bin_shifted_1_high'] = df_2['rsi_35_min_bin_high'].shift(35)
        df_2['rsi_35_min_bin_shifted_2_high'] = df_2['rsi_35_min_bin_high'].shift(70)
        df_2['rsi_35_min_bin_shifted_3'] = df_2['rsi_35_min_bin'].shift(105)
        df_2['rsi_70_min_bin_shifted_1'] = df_2['rsi_70_min_bin'].shift(70)
        df_2['rsi_70_min_bin_shifted_2'] = df_2['rsi_70_min_bin'].shift(140)
        df_2['rsi_70_min_bin_shifted_3'] = df_2['rsi_70_min_bin'].shift(210)
        
        # Get max rsi
        df_2['rsi_35_max_bin'] = df_2['rsi_bins'].rolling(35).max()
        df_2['rsi_70_max_bin'] = df_2['rsi_bins'].rolling(70).max()
        df_2['rsi_105_max_bin'] = df_2['rsi_bins'].rolling(105).max()
        df_2['rsi_140_max_bin'] = df_2['rsi_bins'].rolling(140).max()
        df_2['rsi_175_max_bin'] = df_2['rsi_bins'].rolling(175).max()
        df_2['rsi_210_max_bin'] = df_2['rsi_bins'].rolling(210).max()
        df_2['rsi_35_max_bin_high'] = df_2['rsi_bins_high'].rolling(35).max()
        df_2['rsi_70_max_bin_high'] = df_2['rsi_bins_high'].rolling(70).max()
        df_2['rsi_105_max_bin_high'] = df_2['rsi_bins_high'].rolling(105).max()
        df_2['rsi_140_max_bin_high'] = df_2['rsi_bins_high'].rolling(140).max()
        
        # Get shifted min rsi
        df_2['rsi_35_max_bin_shifted_1'] = df_2['rsi_35_max_bin'].shift(35)
        df_2['rsi_35_max_bin_shifted_2'] = df_2['rsi_35_max_bin'].shift(70)
        
        # Get shifted min rsi
        df_2['rsi_35_max_bin_shifted_1'] = df_2['rsi_35_max_bin'].shift(35)
        df_2['rsi_35_max_bin_shifted_2'] = df_2['rsi_35_max_bin'].shift(70)
        df_2['rsi_35_max_bin_shifted_3'] = df_2['rsi_35_max_bin'].shift(105)
        df_2['rsi_70_max_bin_shifted_1'] = df_2['rsi_70_max_bin'].shift(70)
        df_2['rsi_70_max_bin_shifted_2'] = df_2['rsi_70_max_bin'].shift(140)
        df_2['rsi_70_max_bin_shifted_3'] = df_2['rsi_70_max_bin'].shift(210)
        
        # Median min rsi
        df_2['rsi_median_min_2'] = (df_2['rsi_35_min_bin'] + df_2['rsi_70_min_bin']) / 2
        df_2['rsi_median_min_4'] = (df_2['rsi_35_min_bin'] + df_2['rsi_70_min_bin'] + 
                                     df_2['rsi_105_min_bin'] + df_2['rsi_140_min_bin']) / 4
        df_2['rsi_median_min_6'] = (df_2['rsi_35_min_bin'] + df_2['rsi_70_min_bin'] + 
                                     df_2['rsi_105_min_bin'] + df_2['rsi_140_min_bin'] +
                                     df_2['rsi_175_min_bin'] + df_2['rsi_210_min_bin']) / 6
        
        # Difference Median RSI and RSI
        df_2['rsi_median_min_2_diff'] = df_2['rsi_bins'] - df_2['rsi_median_min_2']
        df_2['rsi_median_min_4_diff'] = df_2['rsi_bins'] - df_2['rsi_median_min_4']
        df_2['rsi_median_min_6_diff'] = df_2['rsi_bins'] - df_2['rsi_median_min_6']
        
        # Rsi Signals
        df_2['rsi_signal_start_35_1'] = np.where((df_2['rsi_bins'] == df_2['rsi_35_min_bin_shifted_1']), 1, 0)
        df_2['rsi_signal_start_35_2'] = np.where((df_2['rsi_bins'] == df_2['rsi_35_min_bin_shifted_2']), 1, 0)
        df_2['rsi_signal_start_35_3'] = np.where((df_2['rsi_bins'] == df_2['rsi_35_min_bin_shifted_3']), 1, 0)
        df_2['rsi_signal_start_70_1'] = np.where((df_2['rsi_bins'] == df_2['rsi_70_min_bin_shifted_1']), 1, 0)
        df_2['rsi_signal_start_70_2'] = np.where((df_2['rsi_bins'] == df_2['rsi_70_min_bin_shifted_2']), 1, 0)
        df_2['rsi_signal_start_70_3'] = np.where((df_2['rsi_bins'] == df_2['rsi_70_min_bin_shifted_3']), 1, 0)
        
        # Sum of signals
        df_2['sum_rsi_signal_start'] = (df_2['rsi_signal_start_35_1'] + df_2['rsi_signal_start_35_2'] + 
                                        df_2['rsi_signal_start_35_3'] + df_2['rsi_signal_start_70_1'] + 
                                        df_2['rsi_signal_start_70_2'] + df_2['rsi_signal_start_70_3'] )
        
        # MACD
        macd = ta.trend.MACD(close=df_2['close_price'])
        df_2['macd_line'] = macd.macd()
        df_2['macd_hist'] = macd.macd_diff()
        df_2['macd_signal_line'] = macd.macd_signal()
        df_2['macd_hist_1d_shift'] = df_2['macd_hist'].shift(1)
        df_2['macd_hist_2d_shift'] = df_2['macd_hist'].shift(2)
        df_2['macd_hist_3d_shift'] = df_2['macd_hist'].shift(3)
        
        # Shift line
        df_2['macd_line_shift_1d'] = df_2['macd_line'].shift(1)
        df_2['macd_line_shift_2d'] = df_2['macd_line'].shift(2)
        df_2['macd_line_shift_3d'] = df_2['macd_line'].shift(3)
        df_2['macd_sig_line_shift_1d'] = df_2['macd_signal_line'].shift(1)
        df_2['macd_sig_line_shift_2d'] = df_2['macd_signal_line'].shift(2)
        df_2['macd_sig_line_shift_3d'] = df_2['macd_signal_line'].shift(3)
        
        # MACD - mins
        df_2['macd_hist_2d_min'] = df_2['macd_hist'].rolling(2).min()
        df_2['macd_hist_3d_min'] = df_2['macd_hist'].rolling(3).min()
        df_2['macd_hist_5d_min'] = df_2['macd_hist'].rolling(5).min()
        df_2['macd_hist_7d_min'] = df_2['macd_hist'].rolling(7).min()
        df_2['macd_hist_35d_min'] = df_2['macd_hist'].rolling(35).min()
        df_2['macd_hist_70d_min'] = df_2['macd_hist'].rolling(70).min()
        df_2['macd_hist_140d_min'] = df_2['macd_hist'].rolling(140).min()
        
        # MACD - min shifts
        df_2['macd_hist_70d_min_shift_1'] = df_2['macd_hist_70d_min'].shift(70)
        df_2['macd_hist_70d_min_shift_2'] = df_2['macd_hist_70d_min'].shift(140)
        
        # MACD - shift
        df_2['macd_hist_3d_min_shift'] = df_2['macd_hist_3d_min'].shift(1)
        df_2['macd_hist_5d_min_shift'] = df_2['macd_hist_5d_min'].shift(1)
        df_2['macd_hist_7d_min_shift'] = df_2['macd_hist_7d_min'].shift(1)
        
        # MACD - min - shift
        df_2['macd_hist_3d_min_diff'] = df_2['macd_hist_3d_min'] - df_2['macd_hist_3d_min_shift']
        df_2['macd_hist_5d_min_diff'] = df_2['macd_hist_5d_min'] - df_2['macd_hist_5d_min_shift']
        df_2['macd_hist_7d_min_diff'] = df_2['macd_hist_7d_min'] - df_2['macd_hist_7d_min_shift']
        
        # MACD - rolling std - avg
        df_2['macd_hist_std_35'] = df_2['macd_hist'].rolling(35).std()
        df_2['macd_hist_std_70'] = df_2['macd_hist'].rolling(70).std()
        df_2['macd_hist_std_140'] = df_2['macd_hist'].rolling(140).std()
        df_2['macd_hist_avg_35'] = df_2['macd_hist'].rolling(35).mean()
        df_2['macd_hist_avg_70'] = df_2['macd_hist'].rolling(70).mean()
        df_2['macd_hist_avg_140'] = df_2['macd_hist'].rolling(140).mean()
        
        # MACD - Growth
        df_2['macd_hist_3d_min_coef'] = (-df_2['macd_hist'].shift(3) + df_2['macd_hist'].shift(1)) / 2
        df_2['macd_hist_5d_min_coef'] = (-df_2['macd_hist'].shift(5) + df_2['macd_hist'].shift(1)) / 4
        df_2['macd_hist_7d_min_coef'] = (-df_2['macd_hist'].shift(7) + df_2['macd_hist'].shift(1)) / 6
        
        # MACD - Growth - Line
        df_2['macd_line_3d_min_coef'] = (-df_2['macd_line'].shift(3) + df_2['macd_line'].shift(1)) / 2
        df_2['macd_line_5d_min_coef'] = (-df_2['macd_line'].shift(5) + df_2['macd_line'].shift(1)) / 4
        df_2['macd_line_7d_min_coef'] = (-df_2['macd_line'].shift(7) + df_2['macd_line'].shift(1)) / 6
        
        # MACD - Growth - shift
        df_2['macd_hist_3d_min_coef_shift_1'] = df_2['macd_hist_3d_min_coef'].shift(1)
        df_2['macd_hist_3d_min_coef_shift_2'] = df_2['macd_hist_3d_min_coef'].shift(2)
        df_2['macd_hist_3d_min_coef_shift_3'] = df_2['macd_hist_3d_min_coef'].shift(3)
        
        # MACD - Growth - shift - Line
        df_2['macd_line_3d_min_coef_shift_1'] = df_2['macd_line_3d_min_coef'].shift(1)
        df_2['macd_line_3d_min_coef_shift_2'] = df_2['macd_line_3d_min_coef'].shift(2)
        df_2['macd_line_3d_min_coef_shift_3'] = df_2['macd_line_3d_min_coef'].shift(3)
         
        # Initialize Bollinger Bands Indicator
        indicator_bb = ta.volatility.BollingerBands(close=df_2["close_price"], n=14, ndev=2)

        # Add Bollinger Bands features
        df_2['bb_bbm'] = indicator_bb.bollinger_mavg()
        df_2['bb_bbh'] = indicator_bb.bollinger_hband()
        df_2['bb_bbl'] = indicator_bb.bollinger_lband()
        df_2['bb_std'] = (df_2['bb_bbh'] - df_2['bb_bbm']) / 2
        df_2['bb_bbl_diff_std'] = (df_2['close_price'] - df_2['bb_bbl']) / df_2['bb_std']
        
        # SMA
        sma = ta.momentum.AwesomeOscillatorIndicator(high=df_2['high_price'], low=df_2['low_price'])
        df_2['sma_oscillator'] = sma.ao()
        
        # Check if min - price only
        #df_2['score_1_price'] = np.where((df_2['min_7'] == df_2['low_price']), 1, 0)
        #df_2['score_2_price'] = np.where((df_2['min_14'] == df_2['low_price']), 1, 0)
        #df_2['score_3_price'] = np.where((df_2['min_21'] == df_2['low_price']), 1, 0)
        #df_2['score_4_price'] = np.where((df_2['min_35'] == df_2['low_price']), 1, 0)
        #df_2['score_5_price'] = np.where((df_2['min_70'] == df_2['low_price']), 1, 0)
        #df_2['score_6_price'] = np.where((df_2['min_105'] == df_2['low_price']), 1, 0)
        #df_2['score_7_price'] = np.where((df_2['min_140'] == df_2['low_price']), 1, 0)
        #df_2['score_8_price'] = np.where((df_2['min_175'] == df_2['low_price']), 1, 0)
        #df_2['score_9_price'] = np.where((df_2['min_210'] == df_2['low_price']), 1, 0)
        #df_2['total_score_price'] = (df_2['score_1_price'] + df_2['score_2_price'] + df_2['score_3_price'] 
        #                           + df_2['score_4_price'] + df_2['score_5_price'] + df_2['score_6_price'] 
        #                           + df_2['score_7_price'] + df_2['score_8_price'] + df_2['score_9_price']) / 9

        # Buy Signal - RSI Only
        #df_2['score_1_rsi'] = np.where((df_2['rsi_7'] == df_2['momentum_rsi']), 1, 0)
        #df_2['score_2_rsi'] = np.where((df_2['rsi_14'] == df_2['momentum_rsi']), 1, 0)
        #df_2['score_3_rsi'] = np.where((df_2['rsi_21'] == df_2['momentum_rsi']), 1, 0)
        #df_2['score_4_rsi'] = np.where((df_2['rsi_35'] == df_2['momentum_rsi']), 1, 0)
        #df_2['score_5_rsi'] = np.where((df_2['rsi_70'] == df_2['momentum_rsi']), 1, 0)
        #df_2['score_6_rsi'] = np.where((df_2['rsi_105'] == df_2['momentum_rsi']), 1, 0)
        #df_2['score_7_rsi'] = np.where((df_2['rsi_140'] == df_2['momentum_rsi']), 1, 0)
        #df_2['score_8_rsi'] = np.where((df_2['rsi_175'] == df_2['momentum_rsi']), 1, 0)
        #df_2['score_9_rsi'] = np.where((df_2['rsi_210'] == df_2['momentum_rsi']), 1, 0)
        #df_2['total_score_rsi'] = (df_2['score_1_rsi'] + df_2['score_2_rsi'] + df_2['score_3_rsi'] + df_2['score_4_rsi'] 
        #                         + df_2['score_5_rsi'] + df_2['score_6_rsi'] + df_2['score_7_rsi'] + df_2['score_8_rsi'] 
        #                         + df_2['score_9_rsi']) / 9

        
        # Remove first 14 days
        df_2 = df_2.reset_index(drop=True)
        df_2 = df_2.reset_index()
        df_2 = df_2[df_2['index'] > 14]
    
        # Create group numbers / temporary index
        df_2['fake_index'] = range(len(df_2))
        
        # Create Buy Signal
        df_2['buy_signal?1'] = np.nan
        df_2['buy_signal?2'] = np.nan
        df_2['buy_signal?3'] = np.nan
        df_2['buy_signal?4'] = np.nan
        df_2['buy_signal?1'].mask(
                                        (
                                            (df_2['rsi_bins'] < df_2['rsi_35_max_bin'])
                                            & (df_2['rsi_bins'] > df_2['rsi_bins_shift_2d'])
                                            & (df_2['macd_hist'] >= df_2['macd_hist_1d_shift'])
                                            #& (df_2['macd_hist_1d_shift'] >= df_2['macd_hist_2d_shift'])
                                            & (df_2['macd_line_3d_min_coef'] > 0)
                                            & (df_2['macd_line_3d_min_coef'] > df_2['macd_line_5d_min_coef'])
                                            & (df_2['macd_line'] > df_2['macd_signal_line'])
                                            #& (df_2['macd_line'] < 0)
                                            #& (df_2['macd_hist_3d_min_coef_shift_3'] < df_2['macd_hist_3d_min_coef_shift_2'])
                                            #& (df_2['macd_line_3d_min_coef_shift_1'] > df_2['macd_line_3d_min_coef_shift_2'])
                                            & ((df_2['bb_bbm'] + df_2['bb_std']) > df_2['close_price'])
                                            #& (df_2['moving_2d_return'] > 1)
                                        )
                                        , True, inplace = True)

        df_2['buy_signal?2'].mask(
                                        (
                                            ((df_2['bb_bbm'] + df_2['bb_std']) > df_2['close_price'])
                                            & (df_2['macd_line'] > df_2['macd_signal_line']) 
                                            & (df_2['macd_line'] < df_2['macd_sig_line_shift_3d'])
                                        )
                                        , True, inplace = True)

        df_2['buy_signal?3'].mask(
                                        (
                                            (df_2['macd_hist_1d_shift'] > df_2['macd_hist_3d_shift'])
                                            & (df_2['rsi_bins'] > df_2['rsi_bins_shift_3d'])
                                            & (df_2['macd_hist'] > df_2['macd_hist_3d_shift'])
                                            & (df_2['macd_hist_2d_shift'] > df_2['macd_hist_3d_shift'])
                                            & (df_2['macd_hist_3d_shift'] > (df_2['macd_hist_avg_35'] - (3 * df_2['macd_hist_std_35'])))
                                            & (df_2['macd_hist_3d_shift'] < (df_2['macd_hist_avg_35'] - (1.75 * df_2['macd_hist_std_35'])))
                                            & (df_2['macd_hist'] < 0)
                                        )
                                        , True, inplace = True)
        
        df_2['buy_signal?4'].mask(
                                        (
                                            (df_2['stationary_sma_10d_zscore_shift_2'] <= (0.8 * df_2['stationary_sma_10d_zscore_min_all_shift_2']))
                                            #& (df_2['stationary_sma_10d_zscore_shift_2'] <= (0.8 * df_2['stationary_sma_10d_zscore_min_all_shift_2']))
                                            & (df_2['moving_7d_return'] > 1)
                                            & (df_2['rsi_bins'] < df_2['rsi_175_max_bin'])
                                            & (df_2['rsi_bins'] > df_2['rsi_175_min_bin'])
                                            & (df_2['macd_hist'] >= df_2['macd_hist_1d_shift'])
                                        )
                                        , True, inplace=True)
        
        # Sell Signals
        df_2['sell_signal?'] = np.nan
        df_2['sell_signal?'].mask(
                            #        (df_2['macd_hist'] < df_2['macd_hist_1d_shift'])
                            #|
                            #        (df_2['macd_line'] < df_2['macd_signal_line'])
                            #|
                                    (
                                        (df_2['daily_return'] < df_2['moving_60d_min_return'])
                                        & (df_2['daily_return'] < 1)
                                    )
                            |
                                    (df_2['rsi_bins'] >= df_2['rsi_35_max_bin'])
                            #|
                            #        (df_2['macd_line'] < df_2['macd_signal_line'])
                            |
                                    (df_2['stationary_sma_20d_zscore'] > (df_2['stationary_sma_20d_zscore_max'] * 0.9))
                            |
                                    (df_2['low_price'] <= df_2['low_min_d10'])
                                    
            
                                , True, inplace = True)
        
        # Future Returns
        future = df_2[['timestamp', 'close_price']]
        future.sort_values(by=['timestamp'], ascending=False, inplace = True)
        future['next_day_price'] = future['close_price'].shift(1)
        future['next_day_return'] = (future['next_day_price'] / future['close_price'])
        future['next_3d_return'] = (future['next_day_return']).rolling(window=3).apply(np.prod, raw=True)
        future['next_5d_return'] = (future['next_day_return']).rolling(window=5).apply(np.prod, raw=True)
        future['next_7d_return'] = (future['next_day_return']).rolling(window=7).apply(np.prod, raw=True)
        future['next_14d_return'] = (future['next_day_return']).rolling(window=14).apply(np.prod, raw=True)
        future['next_21d_return'] = (future['next_day_return']).rolling(window=21).apply(np.prod, raw=True)
        
        df_2 = pd.merge(df_2, future, on='timestamp', how='left')
        
        return results_list.append(df_2)
    
    else:
        return results_list.append(empty_df)

In [208]:
def run_indicators_list(stocks_list, results_list, df):
    total = len(stocks_list)
    for idx, stock in enumerate(stocks_list):
        run_indicators_2(df, results_list, stock)
        print((idx+1)/total)
    print('Done')

In [7]:
def create_charts_1(df, symbol, field='close_price_x', include_sell=True):
    # Filter using the symbol
    snap_df = df[df['symbol'] == symbol]
    
    # Plot Close Price
    snap_df[field].plot(figsize=(16,8), color='blue', alpha=0.3, label=field)
    
    # Buy and Sell Signals 
    snap_df[snap_df['buy_signal?1'] == True][field].plot(linestyle='', marker='^', color='green', alpha = 0.5, markersize = 9, label='Buy Signal 1')
    snap_df[snap_df['buy_signal?2'] == True][field].plot(linestyle='', marker='^', color='orange', alpha = 0.5, markersize = 9, label='Buy Signal 2')
    snap_df[snap_df['buy_signal?3'] == True][field].plot(linestyle='', marker='^', color='blue', alpha = 0.5, markersize = 9, label='Buy Signal 3')
    
    if include_sell == True:
        snap_df[snap_df['sell_signal?'] == True][field].plot(linestyle='', marker='o', color='red', alpha = 0.5, markersize = 7, label='Sell Signal')
    
    # Title
    plt.title((symbol + ' - Buy/Sell Signals using ' + field), fontsize=15)
    plt.legend()
    
    plt.show();
              

In [8]:
def create_charts_2(df, symbol, field='close_price_x', include_sell=True):
    # Filter using the symbol
    snap_df = df[df['symbol'] == symbol]
    
    # Plot Close Price
    snap_df[field].plot(figsize=(16,8), color='blue', alpha=0.3, label=field)
    snap_df['bb_bbl'].plot(figsize=(16,8), color='red', alpha=0.3)
    snap_df['bb_bbh'].plot(figsize=(16,8), color='red', alpha=0.3)
    
    # Buy and Sell Signals 
    snap_df[snap_df['buy_signal?1'] == True][field].plot(linestyle='', marker='^', color='green', alpha = 0.5, markersize = 9, label='Buy Signal 1')
    snap_df[snap_df['buy_signal?2'] == True][field].plot(linestyle='', marker='^', color='orange', alpha = 0.5, markersize = 9, label='Buy Signal 2')
    snap_df[snap_df['buy_signal?3'] == True][field].plot(linestyle='', marker='^', color='blue', alpha = 0.5, markersize = 9, label='Buy Signal 3')
    
    if include_sell == True:
        snap_df[snap_df['sell_signal?'] == True][field].plot(linestyle='', marker='o', color='red', alpha = 0.5, markersize = 7, label='Sell Signal')
    
    # Title
    plt.title((symbol + ' - Buy/Sell Signals using Bollinger Bands'), fontsize=15)
    plt.legend()
    
    plt.show();

In [9]:
def create_charts_3(df, symbol, include_sell=True):
    # Filter using the symbol
    snap_df = df[df['symbol'] == symbol]
    
    # Plot Close Price
    snap_df['macd_signal_line'].plot(figsize=(16,8), color='blue', alpha=0.3, label='MACD Signal Line')
    snap_df['macd_line'].plot(figsize=(16,8), color='red', alpha=0.3, label='MACD Line')
    
    # Buy and Sell Signals 
    snap_df[snap_df['buy_signal?1'] == True]['macd_signal_line'].plot(linestyle='', marker='^', color='green', alpha = 0.5, markersize = 9, label='Buy Signals 1')
    snap_df[snap_df['buy_signal?2'] == True]['macd_signal_line'].plot(linestyle='', marker='^', color='orange', alpha = 0.5, markersize = 9, label='Buy Signals 2')
    snap_df[snap_df['buy_signal?3'] == True]['macd_signal_line'].plot(linestyle='', marker='^', color='blue', alpha = 0.5, markersize = 9, label='Buy Signals 3')
    
    if include_sell == True:
        snap_df[snap_df['sell_signal?'] == True]['macd_signal_line'].plot(linestyle='', marker='^', color='red', markersize = 10, label='Sell Signals')

    # Title
    plt.title((symbol + ' - Buy/Sell Signals using MACD'), fontsize=15)
    plt.legend()
    
    plt.show();

In [99]:
def chunks(lst, n):
    """Yield successive n-sized chunks from lst."""
    for i in range(0, len(lst), n):
        yield lst[i:i + n]

## Update Screener and Prices

In [54]:
my_stocks =  finviz_pull('https://finviz.com/screener.ashx?v=111&f=sh_avgvol_o300,ta_perf_52wup&ft=4&o=volume')
#my_stocks =  finviz_pull('https://finviz.com/screener.ashx?v=111&f=sh_avgvol_o300&ft=4&o=volume')


In [201]:
# Create Chunks of list
lists = chunks(my_stocks, 200)
my_stocks_lists = [i for i in lists]

### Run All Prices

In [202]:
if __name__ == "__main__":
    pool = mp.Pool(processes = cpu_count() - 2)
    manager = mp.Manager()

    L = manager.list()

    l = my_stocks_lists

    [pool.apply_async(get_prices, args=(n, L, '2017-01-01')) for n in l]
    pool.close()
    pool.join()

Running Yahoo Finance Query: 1 of 200 - UCTT - 0.5%
Running Yahoo Finance Query: 1 of 200 - VHT - 0.5%
Running Yahoo Finance Query: 1 of 188 - LEN - 0.53%
Running Yahoo Finance Query: 1 of 200 - AKG - 0.5%
Running Yahoo Finance Query: 1 of 200 - EL - 0.5%
Running Yahoo Finance Query: 2 of 200 - GCAP - 1.0%
Running Yahoo Finance Query: 2 of 188 - DHI - 1.06%
Running Yahoo Finance Query: 2 of 200 - ZYXI - 1.0%
Running Yahoo Finance Query: 2 of 200 - TSG - 1.0%
Running Yahoo Finance Query: 2 of 200 - ARE - 1.0%
Running Yahoo Finance Query: 3 of 200 - TRI - 1.5%
Running Yahoo Finance Query: 3 of 200 - SPAQ - 1.5%
Running Yahoo Finance Query: 3 of 188 - HALO - 1.6%
Running Yahoo Finance Query: 3 of 200 - AUPH - 1.5%
Running Yahoo Finance Query: 4 of 200 - RBA - 2.0%
Running Yahoo Finance Query: 3 of 200 - SEM - 1.5%
Running Yahoo Finance Query: 4 of 188 - GSK - 2.13%Running Yahoo Finance Query: 4 of 200 - CPAH - 2.0%

Running Yahoo Finance Query: 4 of 200 - FATE - 2.0%
Running Yahoo Finance

### Create Dataframe with all prices

In [203]:
df = pd.concat(L)

In [118]:
###### Run all Prices
#df = get_prices(my_stocks, '2017-01-01', interval='1d', pre_post = 'false')

### Update last 5 days

In [226]:
# Get previous 5 days of data
previous_5days = (datetime.today() - timedelta(days=5)).strftime("%Y-%m-%d")
#df_today = get_prices(my_stocks, previous_5days, interval='1h', pre_post = 'false')

if __name__ == "__main__":
    pool = mp.Pool(processes = cpu_count() - 2)
    manager = mp.Manager()

    prev_5days_prices = manager.list()

    l = my_stocks_lists

    [pool.apply_async(get_prices, args=(n, prev_5days_prices, previous_5days, '1h')) for n in l]
    pool.close()
    pool.join()

Running Yahoo Finance Query: 1 of 200 - UCTT - 0.5%
Running Yahoo Finance Query: 1 of 200 - EL - 0.5%
Running Yahoo Finance Query: 1 of 200 - VHT - 0.5%
Running Yahoo Finance Query: 1 of 188 - LEN - 0.53%
Running Yahoo Finance Query: 3 of 200 - SPAQ - 1.5%
Running Yahoo Finance Query: 4 of 200 - CPAH - 2.0%
Running Yahoo Finance Query: 5 of 200 - AEY - 2.5%
Running Yahoo Finance Query: 6 of 200 - FXH - 3.0%
Running Yahoo Finance Query: 7 of 200 - IBDM - 3.5%
Running Yahoo Finance Query: 8 of 200 - QTEC - 4.0%
Running Yahoo Finance Query: 9 of 200 - FXL - 4.5%
Running Yahoo Finance Query: 10 of 200 - XT - 5.0%
Running Yahoo Finance Query: 11 of 200 - MESO - 5.5%
Running Yahoo Finance Query: 12 of 200 - AGN - 6.0%
Running Yahoo Finance Query: 13 of 200 - PAPR - 6.5%
Running Yahoo Finance Query: 14 of 200 - PACQ - 7.0%
Running Yahoo Finance Query: 15 of 200 - CBLI - 7.5%
Running Yahoo Finance Query: 16 of 200 - BOND - 8.0%
Running Yahoo Finance Query: 17 of 200 - RACE - 8.5%
Running Yahoo

In [227]:
# Concatenate
df_today = pd.concat(prev_5days_prices)
df = pd.concat([df, df_today])
df = df.sort_values(['symbol', 'timestamp']).drop_duplicates(subset=['symbol', 'just_date'], keep='last')

### Run Indicators

In [204]:
# Get l
stocks_only = [i[0] for i in my_stocks

In [205]:
# Create Chunks of list
lists = chunks(stocks_only, 200)
my_stocks_symbols = [i for i in lists]

In [206]:
if __name__ == "__main__":
    pool = mp.Pool(processes=cpu_count() - 1)
    manager = mp.Manager()

    df1s = manager.list()

    l = my_stocks_symbols

    [pool.apply_async(run_indicators_list, args=(n, df1s, df)) for n in l]
    pool.close()
    pool.join()

0.0050.0050.005319148936170213


0.005
0.005
0.01
0.01
0.010638297872340425
0.01
0.01
0.015
0.015
0.015957446808510637
0.015
0.015
0.02
0.02
0.02127659574468085
0.02
0.02
0.025
0.025
0.026595744680851064
0.025
0.025
0.03
0.03
0.031914893617021274
0.03
0.03
0.035
0.035
0.035
0.03723404255319149
0.035
0.04
0.04
0.0425531914893617
0.04
0.04
0.045
0.045
0.047872340425531915
0.045
0.045
0.05
0.05
0.05319148936170213
0.05
0.05
0.055
0.055
0.05851063829787234
0.055
0.055
0.06
0.06382978723404255
0.06
0.06
0.06
0.065
0.065
0.06914893617021277
0.065
0.065
0.07
0.07
0.07446808510638298
0.07
0.07
0.075
0.075
0.0797872340425532
0.075
0.075
0.08
0.08
0.0851063829787234
0.08
0.08
0.085
0.085
0.085
0.09042553191489362
0.085
0.09
0.09
0.09
0.09574468085106383
0.09
0.095
0.095
0.095
0.095
0.10106382978723404
0.1
0.1
0.1
0.1
0.10638297872340426
0.105
0.105
0.105
0.11170212765957446
0.105
0.11
0.11
0.11702127659574468
0.11
0.11
0.115
0.115
0.12234042553191489
0.115
0.115
0.12
0.1276595744680851
0.12
0.12

In [217]:

# For each symbol, run the strategy and create a dataframe with results
#for idx, symbol in enumerate(my_stocks):

#    print(symbol[0] + ' - ' + str(round((idx / length) * 100, 2) ) + '% // ' + str(round(time.time() - start_time, 1)) + ' seconds')
    
#    df_1 = run_indicators_2(df, symbol[0])
#    df1s.append(df_1)

#print('Done')

Select Number of days to look back

In [209]:
# Create dataframe
clean_data = pd.concat(df1s)
#clean_data = clean_data[clean_data['year'] >= 2017]

In [210]:
days = 3

In [211]:
min_date = clean_data['timestamp'].max() - timedelta(days=days)
min_date

Timestamp('2020-05-08 13:30:00')

In [212]:
clean_data.reset_index().to_feather('/Users/renovieira/Desktop/trades.feather')

See if there are any buys
### Signal 1

In [213]:
relevant_columns = ['timestamp', 'symbol','name', 'industry', 'next_7d_return', 'next_14d_return', 'next_21d_return']

# Create Metrics
metrics = clean_data[(clean_data['buy_signal?1'] == True)][relevant_columns]
metrics['year'] = pd.DatetimeIndex(metrics['timestamp']).year
metrics_a = metrics.groupby(['symbol', 'name', 'industry', 'year']).count()['timestamp'].reset_index()
metrics_b = metrics.groupby(['symbol', 'name', 'industry', 'year']).mean()[['next_7d_return', 'next_14d_return', 'next_21d_return']].reset_index()
metrics = pd.merge(metrics_a, metrics_b, on=['symbol', 'name', 'industry', 'year'])

# Create set of good symbols
list_of_good = set(metrics[(metrics['next_14d_return'] > 1) & (metrics['next_21d_return'] > 1) & (metrics['year'] >= 2019)]['symbol'])

# Run for the past 5 days
print('Only with past record')
display(clean_data[(clean_data['timestamp'] > min_date) & (clean_data['buy_signal?1'] == True) & (clean_data['symbol'].isin(list_of_good))][['timestamp', 'symbol', 'next_7d_return', 'next_14d_return', 'next_21d_return', 'close_price_x', 'low_min_d30']])
print('Everything')
display(clean_data[(clean_data['timestamp'] > min_date) & (clean_data['buy_signal?1'] == True)][['timestamp', 'symbol', 'next_7d_return', 'next_14d_return', 'next_21d_return']])

# Last days buys
last_days_symbol = set(clean_data[(clean_data['timestamp'] > min_date) & (clean_data['buy_signal?1'] == True) & (clean_data['symbol'].isin(list_of_good))]['symbol'])
display(metrics[metrics['symbol'].isin(last_days_symbol)].sort_values('symbol'))

Only with past record


Unnamed: 0,timestamp,symbol,next_7d_return,next_14d_return,next_21d_return,close_price_x,low_min_d30
828,2020-05-11 13:30:00,NTES,,,,356.109985,300.929993
828,2020-05-11 13:30:00,SEAC,,,,2.86,2.42
828,2020-05-11 13:30:00,CP,,,,229.869995,205.259995
828,2020-05-11 13:30:00,AOM,,,,38.27,35.75
828,2020-05-11 13:30:00,LRCX,,,,262.230011,213.289993
828,2020-05-11 13:30:00,MKSI,,,,100.57,73.599998
828,2020-05-11 13:30:00,CG,,,,24.58,18.85
828,2020-05-11 13:30:00,KLAC,,,,168.779999,125.559998
828,2020-05-11 13:30:00,MOAT,,,,49.41,41.400002
828,2020-05-11 13:30:00,RETA,,,,164.309998,126.099998


Everything


Unnamed: 0,timestamp,symbol,next_7d_return,next_14d_return,next_21d_return
828,2020-05-11 13:30:00,NTES,,,
828,2020-05-11 13:30:00,SEAC,,,
828,2020-05-11 13:30:00,CP,,,
828,2020-05-11 13:30:00,AOM,,,
828,2020-05-11 13:30:00,LRCX,,,
828,2020-05-11 13:30:00,BGNE,,,
828,2020-05-11 13:30:00,MKSI,,,
828,2020-05-11 13:30:00,CG,,,
828,2020-05-11 13:30:00,KLAC,,,
828,2020-05-11 13:30:00,MOAT,,,


Unnamed: 0,symbol,name,industry,year,timestamp,next_7d_return,next_14d_return,next_21d_return
203,AOM,iShares Core Moderate Allocation ETF,Financial,2017,1,1.006165,1.008309,1.001608
204,AOM,iShares Core Moderate Allocation ETF,Financial,2018,9,1.000581,0.995095,0.997439
205,AOM,iShares Core Moderate Allocation ETF,Financial,2019,1,1.010017,1.019477,1.033667
206,AOM,iShares Core Moderate Allocation ETF,Financial,2020,3,1.005349,1.032933,1.040845
663,CG,The Carlyle Group Inc.,Financial,2017,1,1.077859,1.109489,1.16545
664,CG,The Carlyle Group Inc.,Financial,2018,6,0.996187,0.997818,0.975712
665,CG,The Carlyle Group Inc.,Financial,2020,4,0.956689,1.010695,1.006239
771,CP,Canadian Pacific Railway Limited,Industrials,2020,1,,,
770,CP,Canadian Pacific Railway Limited,Industrials,2019,2,1.041011,1.080144,1.094759
768,CP,Canadian Pacific Railway Limited,Industrials,2017,3,1.038626,1.049264,1.094492


### Signal 2

In [214]:
# Create Metrics
metrics = clean_data[(clean_data['buy_signal?2'] == True)][['timestamp', 'symbol', 'name', 'industry', 'next_7d_return', 'next_14d_return', 'next_21d_return']]
metrics['year'] = pd.DatetimeIndex(metrics['timestamp']).year
metrics_a = metrics.groupby(['symbol', 'year', 'name', 'industry']).count()['timestamp'].reset_index()
metrics_b = metrics.groupby(['symbol', 'year', 'name', 'industry']).mean()[['next_7d_return', 'next_14d_return', 'next_21d_return']].reset_index()
metrics = pd.merge(metrics_a, metrics_b, on=['symbol', 'name', 'industry', 'year'])

# Create set of good symbols
list_of_good = set(metrics[(metrics['next_14d_return'] > 1) & (metrics['next_21d_return'] > 1.05) & (metrics['year'] >= 2018)]['symbol'])

# Run for the past 5 days
max_index = clean_data['index'].max()
print('Only with past record')
display(clean_data[(clean_data['timestamp'] > min_date) & (clean_data['buy_signal?2'] == True) & (clean_data['symbol'].isin(list_of_good))][['timestamp', 'symbol', 'next_7d_return', 'next_14d_return', 'next_21d_return', 'close_price_x']])
print('Everything')
display(clean_data[(clean_data['timestamp'] > min_date) & (clean_data['buy_signal?2'] == True)][['timestamp', 'symbol', 'next_7d_return', 'next_14d_return', 'next_21d_return']])

# Last days buys
last_days_symbol = set(clean_data[(clean_data['timestamp'] > min_date) & (clean_data['buy_signal?2'] == True)]['symbol'])
display(metrics[metrics['symbol'].isin(last_days_symbol)].sort_values('symbol'))

Only with past record


Unnamed: 0,timestamp,symbol,next_7d_return,next_14d_return,next_21d_return,close_price_x
828,2020-05-11 13:30:00,NTES,,,,356.109985
828,2020-05-11 13:30:00,LRCX,,,,262.230011
828,2020-05-11 13:30:00,CVM,,,,15.44
828,2020-05-11 13:30:00,ITW,,,,161.889999
828,2020-05-11 13:30:00,VIG,,,,113.699997


Everything


Unnamed: 0,timestamp,symbol,next_7d_return,next_14d_return,next_21d_return
828,2020-05-11 13:30:00,STX,,,
828,2020-05-11 13:30:00,NTES,,,
828,2020-05-11 13:30:00,UUP,,,
828,2020-05-11 13:30:00,VTEB,,,
828,2020-05-11 13:30:00,LRCX,,,
828,2020-05-11 13:30:00,BGNE,,,
828,2020-05-11 13:30:00,CVM,,,
828,2020-05-11 13:30:00,ITW,,,
828,2020-05-11 13:30:00,VIG,,,


Unnamed: 0,symbol,year,name,industry,timestamp,next_7d_return,next_14d_return,next_21d_return
359,BGNE,2017,"BeiGene, Ltd.",Healthcare,1,1.080032,0.970221,0.978995
360,BGNE,2018,"BeiGene, Ltd.",Healthcare,1,0.96276,0.999047,1.217326
361,BGNE,2019,"BeiGene, Ltd.",Healthcare,2,0.934148,0.953531,0.963371
362,BGNE,2020,"BeiGene, Ltd.",Healthcare,4,0.898412,0.92805,1.010492
730,CVM,2017,CEL-SCI Corporation,Healthcare,2,0.90798,1.001551,1.018122
731,CVM,2018,CEL-SCI Corporation,Healthcare,3,0.94512,0.944779,0.989235
732,CVM,2019,CEL-SCI Corporation,Healthcare,1,1.146377,1.094203,1.118841
733,CVM,2020,CEL-SCI Corporation,Healthcare,2,0.951262,1.302002,1.187119
1415,ITW,2020,Illinois Tool Works Inc.,Industrials,2,1.062596,1.106629,1.161758
1414,ITW,2019,Illinois Tool Works Inc.,Industrials,1,0.995542,1.013644,0.987774


### Signal 3

In [215]:
# Create Metrics
metrics = clean_data[(clean_data['buy_signal?3'] == True)][['timestamp', 'symbol','name', 'industry', 'next_7d_return', 'next_14d_return', 'next_21d_return']]
metrics['year'] = pd.DatetimeIndex(metrics['timestamp']).year
metrics_a = metrics.groupby(['symbol','name', 'industry', 'year']).count()['timestamp'].reset_index()
metrics_b = metrics.groupby(['symbol','name', 'industry', 'year']).mean()[['next_7d_return', 'next_14d_return', 'next_21d_return']].reset_index()
metrics = pd.merge(metrics_a, metrics_b, on=['symbol','name', 'industry', 'year'])

# Create set of good symbols
list_of_good = set(metrics[(metrics['next_14d_return'] > 1) & (metrics['next_21d_return'] > 1) & (metrics['year'] >= 2018)]['symbol'])

# Run for the past 5 days
max_index = clean_data['index'].max()
print('Only with past record')
display(clean_data[(clean_data['timestamp'] > min_date) & (clean_data['buy_signal?3'] == True) & (clean_data['symbol'].isin(list_of_good))][['timestamp', 'symbol', 'next_7d_return', 'next_14d_return', 'next_21d_return', 'close_price_x']])
print('Everything')
display(clean_data[(clean_data['timestamp'] > min_date) & (clean_data['buy_signal?3'] == True)][['timestamp', 'symbol', 'next_7d_return', 'next_14d_return', 'next_21d_return']])

# Last days buys
last_days_symbol = set(clean_data[(clean_data['timestamp'] > min_date) & (clean_data['buy_signal?3'] == True)]['symbol'])
display(metrics[metrics['symbol'].isin(last_days_symbol)].sort_values('symbol'))


Only with past record


Unnamed: 0,timestamp,symbol,next_7d_return,next_14d_return,next_21d_return,close_price_x
828,2020-05-11 13:30:00,ARWR,,,,36.299999
828,2020-05-11 13:30:00,FN,,,,61.549999
828,2020-05-11 13:30:00,ACAD,,,,50.889999
828,2020-05-11 13:30:00,VIPS,,,,17.219999
828,2020-05-11 13:30:00,HUBG,,,,46.060001
828,2020-05-11 13:30:00,KLIC,,,,22.75
828,2020-05-11 13:30:00,NVAX,,,,24.5


Everything


Unnamed: 0,timestamp,symbol,next_7d_return,next_14d_return,next_21d_return
828,2020-05-11 13:30:00,ARWR,,,
828,2020-05-11 13:30:00,FN,,,
828,2020-05-11 13:30:00,ACAD,,,
828,2020-05-11 13:30:00,OSG,,,
828,2020-05-11 13:30:00,VIPS,,,
828,2020-05-11 13:30:00,HUBG,,,
828,2020-05-11 13:30:00,KLIC,,,
828,2020-05-11 13:30:00,NVAX,,,


Unnamed: 0,symbol,name,industry,year,timestamp,next_7d_return,next_14d_return,next_21d_return
26,ACAD,ACADIA Pharmaceuticals Inc.,Healthcare,2017,3,1.029228,1.020032,0.927837
27,ACAD,ACADIA Pharmaceuticals Inc.,Healthcare,2018,1,1.01378,0.892913,0.847638
28,ACAD,ACADIA Pharmaceuticals Inc.,Healthcare,2019,4,1.147321,1.121661,1.097918
29,ACAD,ACADIA Pharmaceuticals Inc.,Healthcare,2020,2,1.188723,1.2313,1.430667
285,ARWR,"Arrowhead Pharmaceuticals, Inc.",Healthcare,2018,5,1.023895,1.061174,1.064986
286,ARWR,"Arrowhead Pharmaceuticals, Inc.",Healthcare,2019,1,0.937563,0.998998,1.101169
287,ARWR,"Arrowhead Pharmaceuticals, Inc.",Healthcare,2020,1,,,
1228,FN,Fabrinet,Technology,2020,4,0.901018,0.877877,0.894276
1227,FN,Fabrinet,Technology,2019,4,1.05118,1.016366,1.023493
1226,FN,Fabrinet,Technology,2018,2,1.064449,1.029961,1.122399


### Signal 4

In [216]:
# Create Metrics
metrics = clean_data[(clean_data['buy_signal?4'] == True)][['timestamp', 'symbol','name', 'industry', 'next_7d_return', 'next_14d_return', 'next_21d_return']]
metrics['year'] = pd.DatetimeIndex(metrics['timestamp']).year
metrics_a = metrics.groupby(['symbol','name', 'industry', 'year']).count()['timestamp'].reset_index()
metrics_b = metrics.groupby(['symbol','name', 'industry', 'year']).mean()[['next_7d_return', 'next_14d_return', 'next_21d_return']].reset_index()
metrics = pd.merge(metrics_a, metrics_b, on=['symbol','name', 'industry', 'year'])

# Create set of good symbols
list_of_good = set(metrics[(metrics['next_14d_return'] > 1) & (metrics['next_21d_return'] > 1) & (metrics['year'] >= 2018)]['symbol'])

# Run for the past 5 days
max_index = clean_data['index'].max()
print('Only with past record')
display(clean_data[(clean_data['timestamp'] > min_date) & (clean_data['buy_signal?4'] == True) & (clean_data['symbol'].isin(list_of_good))][['timestamp', 'symbol', 'next_7d_return', 'next_14d_return', 'next_21d_return', 'close_price_x']])
print('Everything')
display(clean_data[(clean_data['timestamp'] > min_date) & (clean_data['buy_signal?4'] == True)][['timestamp', 'symbol', 'next_7d_return', 'next_14d_return', 'next_21d_return']])

# Last days buys
last_days_symbol = set(clean_data[(clean_data['timestamp'] > min_date) & (clean_data['buy_signal?4'] == True)]['symbol'])
display(metrics[metrics['symbol'].isin(last_days_symbol)].sort_values('symbol'))


Only with past record


Unnamed: 0,timestamp,symbol,next_7d_return,next_14d_return,next_21d_return,close_price_x
828,2020-05-11 13:30:00,AKAM,,,,101.889999
828,2020-05-11 13:30:00,CTMX,,,,14.46
828,2020-05-11 13:30:00,SHM,,,,49.27
828,2020-05-11 13:30:00,AHPI,,,,11.72
828,2020-05-11 13:30:00,MUB,,,,112.629997
828,2020-05-11 13:30:00,AON,,,,192.009995
828,2020-05-11 13:30:00,SUB,,,,106.769997
446,2020-05-11 13:30:00,DWSH,,,,25.65
828,2020-05-11 13:30:00,WLTW,,,,200.0
828,2020-05-11 13:30:00,HSY,,,,132.949997


Everything


Unnamed: 0,timestamp,symbol,next_7d_return,next_14d_return,next_21d_return
828,2020-05-11 13:30:00,AKAM,,,
828,2020-05-11 13:30:00,CTMX,,,
828,2020-05-11 13:30:00,SHM,,,
828,2020-05-11 13:30:00,AHPI,,,
828,2020-05-11 13:30:00,MUB,,,
828,2020-05-11 13:30:00,AON,,,
828,2020-05-11 13:30:00,SUB,,,
446,2020-05-11 13:30:00,DWSH,,,
828,2020-05-11 13:30:00,WLTW,,,
828,2020-05-11 13:30:00,HSY,,,


Unnamed: 0,symbol,name,industry,year,timestamp,next_7d_return,next_14d_return,next_21d_return
117,AHPI,"Allied Healthcare Products, Inc.",Healthcare,2018,19,1.193382,1.12531,1.154204
118,AHPI,"Allied Healthcare Products, Inc.",Healthcare,2019,9,0.994382,0.982579,0.932628
119,AHPI,"Allied Healthcare Products, Inc.",Healthcare,2020,5,5.906903,4.2197,5.051275
130,AKAM,"Akamai Technologies, Inc.",Technology,2017,1,1.00181,1.021354,1.188382
131,AKAM,"Akamai Technologies, Inc.",Technology,2018,21,1.00521,1.003051,1.015895
132,AKAM,"Akamai Technologies, Inc.",Technology,2019,21,1.004187,1.031965,1.043098
133,AKAM,"Akamai Technologies, Inc.",Technology,2020,6,0.989047,0.974959,0.905066
225,AON,Aon Plc,Financial,2020,5,1.039356,1.091124,1.092492
224,AON,Aon Plc,Financial,2019,16,1.008215,1.028589,1.058955
223,AON,Aon Plc,Financial,2018,14,1.021478,1.038141,1.02155


## Charts

In [None]:
# Run Charts
your_symbol = 'AON'
create_charts_1(clean_data, your_symbol, include_sell=True)
create_charts_1(clean_data, your_symbol, 'rsi_bins')
create_charts_2(clean_data, your_symbol)
create_charts_3(clean_data, your_symbol)

## Backtest signals with exit strategies

In [None]:
def backtest(df, buy_signal_column, support_column, stop_loss, max_holding_days, max_drop, money_per_trade, max_invested):

    # Create buy group1 column and uuid
    df['buy_group_backtest'] = np.NaN
    df['uuid'] = df['fake_index'].astype(str) + df['symbol']

    # A new group will be assigned when
    df['buy_group_backtest'].mask(df[buy_signal_column] == True, df['uuid'], inplace=True)
    df['buy_group_backtest'].mask(df['sell_signal?'] == True, df['uuid'], inplace=True)
    df['buy_group_backtest'].mask(df['low_price'] <= df[support_column], df['uuid'], inplace=True)

    # Forward fill groups
    df['buy_group_backtest'] = df['buy_group_backtest'].ffill()

    # Number of days holding 
    df['buy_group_backtest_holdings'] = df.groupby('buy_group_backtest')['timestamp'].rank(method='first')

    # Calculate group cumulative product
    df['buy_group_backtest_cum_return'] = df.groupby('buy_group_backtest').cumprod()['daily_return']
    
    # Create max drop limit
    df['buy_group_backtest_cum_return_shift'] = df['buy_group_backtest_cum_return'].shift(1)
    df['buy_group_backtest_cum_return_diff'] = df['buy_group_backtest_cum_return_shift'] - df['buy_group_backtest_cum_return']
    df['max_drop_limit'] = np.NaN
    df['max_drop_limit'].mask((df['buy_group_backtest_cum_return_diff'] >= max_drop) 
                              & (df['buy_group_backtest_holdings'] != 1), True, inplace=True)    
    
    # Create Loss Threshold
    df['loss_threshold'] = np.NaN
    df['loss_threshold'].mask(df['buy_group_backtest_cum_return'] <= stop_loss, True, inplace=True)

    # Number of days holding threshold
    df['holding_threshold'] = np.NaN
    df['holding_threshold'].mask(df['buy_group_backtest_holdings'] == max_holding_days, True, inplace=True)

    # Total Money Invested
    groups = set(df[df[buy_signal_column] == True]['buy_group_backtest'])
    df['initial_buy'] = np.NaN
    df['initial_buy'].mask(df['buy_group_backtest'].isin(groups), money_per_trade, inplace=True)
    df['today_value_of_investment'] = df['initial_buy'] * df['buy_group_backtest_cum_return']
    
    total_day = df.groupby('just_date').sum()['initial_buy'].reset_index()
    total_day.columns = ['just_date', 'daily_invested']
    df = pd.merge(df, total_day, on ='just_date', how='left')
    
    # Create buy group1 column and uuid
    df['buy_group_backtest'] = np.NaN
    df['uuid'] = df['fake_index'].astype(str) + df['symbol']

    # A new group will be assigned when
    df['buy_group_backtest'].mask((df[buy_signal_column] == True) & (df['daily_invested'] <= max_invested), df['uuid'], inplace=True)
    df['buy_group_backtest'].mask(df['sell_signal?'] == True, df['uuid'], inplace=True)
    df['buy_group_backtest'].mask(df['low_price'] <= df[support_column], df['uuid'], inplace=True)
    df['buy_group_backtest'].mask(df['holding_threshold'] == True, df['uuid'], inplace=True)
    df['buy_group_backtest'].mask(df['loss_threshold'] == True, df['uuid'], inplace=True)
    df['buy_group_backtest'].mask(df['max_drop_limit'] == True, df['uuid'], inplace=True)
    
    # Forward fill groups
    df['buy_group_backtest'] = df['buy_group_backtest'].ffill()

    # Number of days holding
    df['buy_group_backtest_holdings'] = df.groupby('buy_group_backtest')['timestamp'].rank(method='first')

    # Calculate group cumulative product
    df['buy_group_backtest_cum_return'] = df.groupby('buy_group_backtest').cumprod()['daily_return']

    # Identify groups
    buy_groups = set(df[(df[buy_signal_column] == True)]['buy_group_backtest'])
    
    # Get last row of every group
    g = df[df['buy_group_backtest'].isin(buy_groups)].groupby('buy_group_backtest')
    returns = pd.concat([g.tail(1)])[['timestamp','symbol', 'industry', 'buy_group_backtest_cum_return', 'buy_group_backtest_holdings']]
    
    return returns, df

In [None]:
def money_maker(data_frame, signal_name, returns, money_per_trade, print_which='all'):
    
    # Create list of Buy Groups
    buy_groups = set(data_frame[data_frame[signal_name] == True]['buy_group_backtest'])
    plot_me = data_frame[data_frame['buy_group_backtest'].isin(buy_groups)]
    plot_me['money_invested'] = (plot_me['buy_group_backtest_cum_return'] * money_per_trade)
    plot_me['money_return'] = plot_me['money_invested'] - money_per_trade
    
    print(signal_name)
    
    if print_which in ('all', '1'):
        # Money Invested
        plot_me.groupby('timestamp').sum()['money_invested'].plot(figsize=(14,6), color='blue', alpha=0.5, title='Money Invested')
        plt.show();
    
        # Money Gains daily
        plot_me.groupby('timestamp').sum()['money_return'].plot(figsize=(14,6), color='blue', alpha=0.5, title='Gains per Trade', label='Gains per Trade')

    # Cum sum of gains
    returns['money_back'] = (returns['return'] * money_per_trade) - money_per_trade
    gains_df = returns.groupby('date').sum()['money_back'].reset_index()
    gains_df['cum_sum'] = gains_df['money_back'].cumsum()
    gains_df.set_index('date', inplace=True)

    if print_which in ('all', '2'):
        gains_df['cum_sum'].plot(figsize=(14,6), color='green', alpha=0.5, title='Gains - Per Trade and Cum Sum', label = 'Cum Sum of Gains')
        plt.legend()
        plt.show()
    
    # Number of trades
    no_trades = data_frame[(data_frame[signal_name] == True) & (data_frame['buy_group_backtest'].isin(buy_groups))]

    if print_which in ('all', '2'):
        no_trades.groupby('just_date').count()['buy_group_backtest'].plot(figsize=(14,6), color='blue', alpha=0.5, title='Number of Trades per Day', label = 'Daily Trades')

    # Number of trades - Sum sum
    cumsum_trades = data_frame[(data_frame[signal_name] == True) & (data_frame['buy_group_backtest'].isin(buy_groups))]
    cumsum_trades = cumsum_trades.groupby('timestamp').count()['buy_group_backtest'].reset_index()
    cumsum_trades['cum_sum'] = cumsum_trades['buy_group_backtest'].cumsum()
    cumsum_trades.set_index('timestamp', inplace=True)

    if print_which in ('all', '3'):
        cumsum_trades['cum_sum'].plot(figsize=(14,6), color='green', alpha=0.5, title='Number of Trades - Daily and Cum Sum', label='Cum Sum of Trades')
        plt.legend()
        plt.show();
    
    if print_which in ('all', '4'):
        # Number of trades - Sum sum
        gain_trade = gains_df['cum_sum'] / cumsum_trades['cum_sum']
        gain_trade.plot(figsize=(14,6), color='green', alpha=0.5, title='Ratio of Cum Sum Gain / Cum Sum Trades',)
        plt.show();

### Backtest signal 1

In [None]:
returns, data_frame = backtest(clean_data[clean_data['year'] > 2017], 'buy_signal?1', 'low_min_d10', 0.98, 30, 0.05, 100, 5000)
returns.columns = ['date','symbol', 'industry', 'return', 'days']
g = sns.catplot(x="industry", y="return", kind="violin", data=returns)
g.fig.set_size_inches(15,8);
returns.groupby('days').describe()['return']

In [None]:
money_maker(data_frame, 'buy_signal?1', returns, 100)

In [None]:
# Plot Close Price
colors = ['blue', 'green', 'red', 'orange', 'black', 'pink']
heatmap = []
for idx, tolerance in enumerate([0.9, 0.92, 0.94, 0.96, 0.98, 0.99]):
    for max_drop in [0.02, 0.04, 0.06, 0.08, 0.1]:
        returns, data_frame = backtest(clean_data[clean_data['year'] > 2018], 'buy_signal?1', 'low_min_d30', tolerance, 30, max_drop, 100, 5000)
        returns.columns = ['date', 'symbol', 'industry', 'return', 'days']
        heatmap.append([tolerance, max_drop, returns['return'].mean()])
        
to_plot = pd.DataFrame(heatmap, columns=['tolerance', 'max_drop', 'return'])
pivot = to_plot.pivot(index='tolerance', columns='max_drop', values='return')
plt.subplots(figsize=(10,7))
sns.heatmap(pivot, annot=True, fmt="g", cmap='viridis')
plt.show()

### Backtest signal 2

In [None]:
returns, data_frame = backtest(clean_data[clean_data['year'] > 2017], 'buy_signal?2', 'low_min_d10',0.98, 30, 0.05, 100, 5000)
returns.columns = ['date', 'symbol', 'industry', 'return', 'days']
g = sns.catplot(x="industry", y="return", kind="violin", data=returns)
g.fig.set_size_inches(15,8);
h = returns.groupby('industry').mean()['return'].plot()
returns.groupby('industry').describe()['return']

In [None]:
money_maker(data_frame, 'buy_signal?2', returns, 100)

In [None]:
# Plot Close Price
colors = ['blue', 'green', 'red', 'orange', 'black', 'pink']
heatmap = []
for idx, tolerance in enumerate([0.9, 0.92, 0.94, 0.96, 0.98, 0.99]):
    for max_drop in [0.02, 0.04, 0.06, 0.08, 0.1]:
        returns = backtest(clean_data[clean_data['year'] > 2018], 'buy_signal?2', 'low_min_d30', tolerance, 30, max_drop)
        returns.columns = ['symbol', 'industry', 'return', 'days']
        heatmap.append([tolerance, max_drop, returns['return'].mean()])
        
to_plot = pd.DataFrame(heatmap, columns=['tolerance', 'max_drop', 'return'])
pivot = to_plot.pivot(index='tolerance', columns='max_drop', values='return')
plt.subplots(figsize=(10,7))
sns.heatmap(pivot, annot=True, fmt="g", cmap='viridis')
plt.show()

### Backtest signal 3

In [None]:
returns, data_frame = backtest(clean_data[clean_data['year'] > 2017], 'buy_signal?3', 'low_min_d10',0.98, 30, 0.05, 100, 5000)
returns.columns = ['date', 'symbol', 'industry', 'return', 'days']
g = sns.catplot(x="industry", y="return", kind="violin", data=returns)
g.fig.set_size_inches(15,8);
h = returns.groupby('industry').mean()['return'].plot()
returns.groupby('industry').describe()['return']

In [None]:
money_maker(data_frame, 'buy_signal?3', returns, 100)

In [None]:
# Plot Close Price
colors = ['blue', 'green', 'red', 'orange', 'black', 'pink']
heatmap = []
for idx, tolerance in enumerate([0.9, 0.92, 0.94, 0.96, 0.98, 0.99]):
    for max_drop in [0.02, 0.04, 0.06, 0.08, 0.1]:
        returns, data_frame = backtest(clean_data[clean_data['year'] > 2018], 'buy_signal?3', 'low_min_d30', tolerance, 30, max_drop)
        returns.columns = ['symbol', 'industry', 'return', 'days']
        heatmap.append([tolerance, max_drop, returns['return'].mean()])
        
to_plot = pd.DataFrame(heatmap, columns=['tolerance', 'max_drop', 'return'])
pivot = to_plot.pivot(index='tolerance', columns='max_drop', values='return')
plt.subplots(figsize=(10,7))
sns.heatmap(pivot, annot=True, fmt="g", cmap='viridis')
plt.show()

### Backtest signal 4

In [None]:
returns, data_frame = backtest(clean_data[clean_data['year'] > 2017], 'buy_signal?4', 'low_min_d10',0.98, 30, 0.05, 100, 5000)
returns.columns = ['date', 'symbol', 'industry', 'return', 'days']
g = sns.catplot(x="days", y="return", kind="violin", data=returns)
g.fig.set_size_inches(15,8);
h = returns.groupby('days').mean()['return'].plot()
returns.groupby('days').describe()['return']

In [None]:
money_maker(data_frame, 'buy_signal?4', returns, 100)

In [None]:
# Plot Close Price
colors = ['blue', 'green', 'red', 'orange', 'black', 'pink']
heatmap = []
for idx, tolerance in enumerate([0.9, 0.92, 0.94, 0.96, 0.98, 0.99]):
    for max_drop in [0.02, 0.04, 0.06, 0.08, 0.1]:
        returns, data_frame = backtest(clean_data[clean_data['year'] > 2018], 'buy_signal?4', 'low_min_d30', tolerance, 30, max_drop,100,5000)
        returns.columns = ['date', 'symbol', 'industry', 'return', 'days']
        heatmap.append([tolerance, max_drop, returns['return'].mean()])
        
to_plot = pd.DataFrame(heatmap, columns=['tolerance', 'max_drop', 'return'])
pivot = to_plot.pivot(index='tolerance', columns='max_drop', values='return')
plt.subplots(figsize=(10,7))
sns.heatmap(pivot, annot=True, fmt="g", cmap='viridis')
plt.show()

In [None]:


returns1, data_frame1 = backtest(clean_data[clean_data['year'] > 2018], 'buy_signal?1', 'low_min_d30',0.98, 30, 0.05, 100, 5000)
returns1.columns = ['date', 'symbol', 'industry', 'return', 'days']
money_maker(data_frame1, 'buy_signal?1', returns1, 100, '4')

returns2, data_frame2 = backtest(clean_data[clean_data['year'] > 2018], 'buy_signal?2', 'low_min_d30',0.98, 30, 0.05, 100, 5000)
returns2.columns = ['date', 'symbol', 'industry', 'return', 'days']
money_maker(data_frame2, 'buy_signal?2', returns2, 100, '4')

returns3, data_frame3 = backtest(clean_data[clean_data['year'] > 2018], 'buy_signal?3', 'low_min_d30',0.98, 30, 0.05, 100, 5000)
returns3.columns = ['date', 'symbol', 'industry', 'return', 'days']
money_maker(data_frame3, 'buy_signal?3', returns3, 100, '4')

returns4, data_frame4 = backtest(clean_data[clean_data['year'] > 2018], 'buy_signal?4', 'low_min_d30',0.98, 30, 0.05, 100, 5000)
returns4.columns = ['date', 'symbol', 'industry', 'return', 'days']
money_maker(data_frame4, 'buy_signal?4', returns4, 100, '4')

In [None]:
money_maker(data_frame1, 'buy_signal?1', returns1, 100, '2')
money_maker(data_frame2, 'buy_signal?2', returns2, 100, '2')
money_maker(data_frame3, 'buy_signal?3', returns3, 100, '2')
money_maker(data_frame4, 'buy_signal?4', returns4, 100, '2')

In [None]:
clean_data.head()

In [None]:
def compare_years(max_drop_list, tolerance_list, years_list, signal_name, your_df):
    
    for idxx, year in enumerate(years_list):
        print(year)
        heatmap = []
        for idx, tolerance in enumerate(tolerance_list):
            for max_drop in max_drop_list: 
                returns = backtest(your_df[your_df['year'] == year], signal_name, 'low_min_d30', tolerance, 30, max_drop)
                returns.columns = ['symbol', 'industry', 'return', 'days']
                heatmap.append([tolerance, max_drop, returns['return'].mean()])
        to_plot = pd.DataFrame(heatmap, columns=['tolerance', 'max_drop', 'return'])
        pivot = to_plot.pivot(index='tolerance', columns='max_drop', values='return')
        sns.heatmap(pivot, annot=True, fmt="g", cmap='viridis')
    
        plt.show()

In [None]:
compare_years(max_drop_list=[0.01, 0.02, 0.03, 0.04, 0.05],
              tolerance_list=[0.9, 0.93, 0.96, 0.99],
              years_list=[2017, 2018, 2019, 2020],
              signal_name='buy_signal?1',
              your_df=clean_data)

In [None]:
compare_years(max_drop_list=[0.01, 0.02, 0.03, 0.04, 0.05],
              tolerance_list=[0.9, 0.93, 0.96, 0.99],
              years_list=[2017, 2018, 2019, 2020],
              signal_name='buy_signal?2',
              your_df=clean_data)

In [None]:
compare_years(max_drop_list=[0.01, 0.02, 0.03, 0.04, 0.05],
              tolerance_list=[0.9, 0.93, 0.96, 0.99],
              years_list=[2017, 2018, 2019, 2020],
              signal_name='buy_signal?3',
              your_df=clean_data)

In [None]:
compare_years(max_drop_list=[0.01, 0.02, 0.03, 0.04, 0.05],
              tolerance_list=[0.9, 0.93, 0.96, 0.99],
              years_list=[2017, 2018, 2019, 2020],
              signal_name='buy_signal?4',
              your_df=clean_data)

In [None]:
to_plot = clean_data[(clean_data['buy_signal?1'] == True)]
to_plot['next_21d_return'].mean()

In [None]:
to_plot['macd_hist_3d_min_coef'].plot.hist()

In [None]:
test = clean_data[clean_data['symbol'] == 'AAPL']
test['buy_reno'] = np.NaN
test['buy_reno'].mask(
                                        (
                                            #(test['stationary_sma_10d_zscore_shift_2'] <= test['stationary_sma_10d_zscore_shift'])
                                            #(test['stationary_sma_10d_zscore'] > test['stationary_sma_10d_zscore_shift'])
                                            #(test['stationary_sma_10d_zscore'] < test['stationary_sma_10d_zscore_max'])
                                            (test['stationary_sma_10d_zscore_shift'] == test['stationary_sma_10d_zscore_min'])
                                            #(test['stationary_sma_10d_zscore_shift_2'] <= (0.8 * test['stationary_sma_10d_zscore_min_all_shift_2']))
                                            #& (df_2['stationary_sma_10d_zscore_shift_2'] <= (0.8 * df_2['stationary_sma_10d_zscore_min_all_shift_2']))
                                            & (test['moving_2d_return'] > 1)
                                            #& (test['rsi_bins'] < test['rsi_175_max_bin'])
                                            #& (test['rsi_bins'] > test['rsi_175_min_bin'])
                                            #& (test['macd_hist'] >= test['macd_hist_1d_shift'])
                                        )
                                        , True, inplace=True)

test['close_price_x'].plot(figsize=(16,8), color='blue', alpha=0.5)
test[test['buy_reno'] == True]['close_price_x'].plot(linestyle='', marker='^', color='green', alpha = 0.5, markersize = 9, label='Buy Signal 1')
#test['stationary_sma_10d_zscore'].plot(figsize=(16,8), color='blue', alpha=0.5)
#test['stationary_sma_10d_zscore_shift'].plot(figsize=(16,8), color='red', alpha=0.5)
#test['sma_20d'].plot(figsize=(16,8), color='red', alpha=0.5)

In [None]:
print(test[(test['stationary_sma_10d_zscore_shift_2'] <= (0.8 * test['stationary_sma_10d_zscore_min_all_shift_2']))
    & (test['moving_7d_return'] > 1)
    & (test['rsi_bins'] < test['rsi_175_max_bin'])
    & (test['rsi_bins'] > test['rsi_175_min_bin'])
    #& (test['macd_hist'] >= test['macd_hist_1d_shift'])
    & (test['next_21d_return'] > 1)].count()['index'])

print(test[(test['stationary_sma_10d_zscore_shift_2'] <= (0.8 * test['stationary_sma_10d_zscore_min_all_shift_2']))
    & (test['moving_7d_return'] > 1)
    & (test['rsi_bins'] < test['rsi_175_max_bin'])
    & (test['rsi_bins'] > test['rsi_175_min_bin'])
   # & (test['macd_hist'] >= test['macd_hist_1d_shift'])
    & (test['next_21d_return'] < 1)].count()['index'])


In [None]:
test['macd_line'].plot(figsize=(16,8), color='blue', alpha=0.5)

test[(test['stationary_sma_10d_zscore_shift_2'] <= (0.8 * test['stationary_sma_10d_zscore_min_all_shift_2']))
    & (test['moving_7d_return'] > 1)
    & (test['rsi_bins'] < test['rsi_175_max_bin'])
    & (test['rsi_bins'] > test['rsi_175_min_bin'])
    #& (test['macd_hist'] >= test['macd_hist_1d_shift'])
    & (test['next_14d_return'] > 1)]['macd_line'].plot(linestyle='', marker='^', color='green', alpha = 0.5, markersize = 9, label='Buy Signal 1')

test[(test['stationary_sma_10d_zscore_shift_2'] <= (0.8 * test['stationary_sma_10d_zscore_min_all_shift_2']))
    & (test['moving_7d_return'] > 1)
    & (test['rsi_bins'] < test['rsi_175_max_bin'])
    & (test['rsi_bins'] > test['rsi_175_min_bin'])
   # & (test['macd_hist'] >= test['macd_hist_1d_shift'])
    & (test['next_14d_return'] < 1)]['macd_line'].plot(linestyle='', marker='^', color='red', alpha = 0.5, markersize = 9, label='Buy Signal 1')


In [None]:
test['close_price_x'].plot(figsize=(16,8), color='blue', alpha=0.5)
#test['sma_10d'].plot(figsize=(16,8), color='green', alpha=0.5)
#test['sma_20d'].plot(figsize=(16,8), color='red', alpha=0.5)
#test[(test['low_min_d10'] > test['low_min_d10_shift1_10']) 
    #& (test['low_min_d10_shift1_10'] > test['low_min_d10_shift2_20'])
    #& (test['stationary_sma_100d_zscore'] <= (0.8 * test['stationary_sma_100d_zscore_min_all']))
    #& (test['sma_10d'] > test['sma_20d']) ]['close_price_x'].plot(linestyle='', marker='^', color='green', alpha = 0.5, markersize = 9, label='Buy Signal 1')
    #& (test['sma_10d_shift'] < test['sma_20d_shift'])

test[(test['stationary_sma_10d_zscore_shift_2'] <= (0.8 * test['stationary_sma_10d_zscore_min_all_shift_2']))
    & (test['moving_7d_return'] > 1)
    & (test['rsi_35_min_bin_low'] < test['rsi_bins'])
    & (test['rsi_bins'] < 7)
    & (test['macd_hist'] >= test['macd_hist_1d_shift'])
    & (test['next_14d_return'] > 1)]['close_price_x'].plot(linestyle='', marker='^', color='green', alpha = 0.5, markersize = 9, label='Buy Signal 1')

test[(test['stationary_sma_10d_zscore_shift_2'] <= (0.8 * test['stationary_sma_10d_zscore_min_all_shift_2']))
    & (test['moving_7d_return'] > 1)
    & (test['rsi_35_min_bin_low'] < test['rsi_bins'])
    & (test['rsi_bins'] < 7)
    & (test['macd_hist'] >= test['macd_hist_1d_shift'])
    & (test['next_14d_return'] < 1)]['close_price_x'].plot(linestyle='', marker='^', color='red', alpha = 0.5, markersize = 9, label='Buy Signal 1')


In [None]:
test['rsi_bins'].plot(figsize=(16,8), color='blue', alpha=0.5)
#test['sma_10d'].plot(figsize=(16,8), color='green', alpha=0.5)
#test['sma_20d'].plot(figsize=(16,8), color='red', alpha=0.5)
#test[(test['low_min_d10'] > test['low_min_d10_shift1_10']) 
    #& (test['low_min_d10_shift1_10'] > test['low_min_d10_shift2_20'])
    #& (test['stationary_sma_100d_zscore'] <= (0.8 * test['stationary_sma_100d_zscore_min_all']))
    #& (test['sma_10d'] > test['sma_20d']) ]['close_price_x'].plot(linestyle='', marker='^', color='green', alpha = 0.5, markersize = 9, label='Buy Signal 1')
    #& (test['sma_10d_shift'] < test['sma_20d_shift'])

print(test[(test['stationary_sma_10d_zscore_shift_2'] <= (0.8 * test['stationary_sma_10d_zscore_min_all_shift_2']))
    & (test['moving_7d_return'] > 1)
    & (test['rsi_bins'] < test['rsi_175_max_bin'])
    & (test['rsi_bins'] > test['rsi_175_min_bin'])
    & (test['next_14d_return'] > 1)].count()['index'])

print(test[(test['stationary_sma_10d_zscore_shift_2'] <= (0.8 * test['stationary_sma_10d_zscore_min_all_shift_2']))
    & (test['moving_7d_return'] > 1)
    & (test['rsi_bins'] < test['rsi_175_max_bin'])
    & (test['rsi_bins'] > test['rsi_175_min_bin'])
    & (test['rsi_bins'] < 7)
    & (test['next_14d_return'] < 1)].count()['index'])

In [None]:
test['sma_10d_coef'].plot(figsize=(16,8), color='blue', alpha=0.5)
test[(test['stationary_sma_10d_zscore_shift_2'] <= (0.8 * test['stationary_sma_10d_zscore_min_all_shift_2']))
    & (test['moving_7d_return'] > 1)
    & (test['rsi_bins'] < 7)
    & (test['next_14d_return'] > 1)]['sma_10d_coef'].plot(linestyle='', marker='^', color='green', alpha = 0.5, markersize = 9, label='Buy Signal 1')

test[(test['stationary_sma_10d_zscore_shift_2'] <= (0.8 * test['stationary_sma_10d_zscore_min_all_shift_2']))
    & (test['moving_7d_return'] > 1)
    & (test['rsi_bins'] < 7)
    & (test['next_14d_return'] < 1)]['sma_10d_coef'].plot(linestyle='', marker='^', color='red', alpha = 0.5, markersize = 9, label='Buy Signal 1')


In [None]:
clean_data = 0
df_1 = 0
test = 0
data_frame = 0
data_frame1 = 0
data_frame2 = 0
data_frame3 = 0
data_frame4 = 0

In [76]:


# These are the usual ipython objects, including this one you are creating
ipython_vars = ['In', 'Out', 'exit', 'quit', 'get_ipython', 'ipython_vars']

# Get a sorted list of the objects and their sizes
sorted([(x, sys.getsizeof(globals().get(x))) for x in dir() if not x.startswith('_') and x not in sys.modules and x not in ipython_vars], key=lambda x: x[1], reverse=True)

[('clean_data', 2463489991),
 ('df', 27830886),
 ('df_today', 8902027),
 ('df_1', 2521452),
 ('metrics', 989641),
 ('metrics_b', 928521),
 ('metrics_a', 867273),
 ('list_of_good', 33000),
 ('df1s', 9032),
 ('my_stocks', 7992),
 ('last_days_symbol', 2280),
 ('BeautifulSoup', 2008),
 ('Session', 1472),
 ('ThreadPool', 1064),
 ('Process', 896),
 ('timedelta', 400),
 ('create_charts_1', 144),
 ('create_charts_2', 144),
 ('create_charts_3', 144),
 ('create_dataframe', 144),
 ('finviz_pull', 144),
 ('get_prices', 144),
 ('mean', 144),
 ('median', 144),
 ('mp_handler', 144),
 ('run_indicators_2', 144),
 ('stdev', 144),
 ('yahoo_finance_query', 144),
 ('min_date', 136),
 ('relevant_columns', 128),
 ('processes', 104),
 ('symbol', 96),
 ('data', 88),
 ('mp', 88),
 ('np', 88),
 ('pd', 88),
 ('plt', 88),
 ('sns', 88),
 ('Manager', 72),
 ('Pool', 72),
 ('cpu_count', 72),
 ('L', 64),
 ('manager', 64),
 ('p', 64),
 ('p1', 64),
 ('previous_5days', 59),
 ('max_index', 32),
 ('days', 28),
 ('i', 28),
 