In [None]:
import pandas, json, numpy, requests, os, datetime, statistics, pytz, tweepy, sqlite3, time, re, random, matplotlib.pyplot as plt, sklearn, statsmodels.api as sm
from bs4 import BeautifulSoup
from scipy.signal import find_peaks

This script is to find the prices of the dates before the intercept between MA-30 and MA-60 occurs.
The average difference (in percentage) will be used in the scoring of the pattern.

In [None]:
### Loading stock prices df and pattern dates json:
def moving_average(symbol, company_name, plot='no', moving_average_range=[30, 60], ma_type='sma'):
    price_df = pandas.read_csv(os.getcwd() + "\\Daily Stock Prices\\" + symbol + ' - ' + company_name + '.csv', index_col= 'Date', parse_dates= True).sort_index(ascending=True)
    price_df.sort_index(ascending=True)

    if ma_type == 'sma': # For SMA
        ma_30 = price_df.Close.rolling(window=moving_average_range[0]).mean()
        ma_60 = price_df.Close.rolling(window=moving_average_range[1]).mean()

        if plot == 'yes':
            plt.plot(price_df.index, price_df.Close, color='yellow')
            plt.plot(price_df.index, ma_30, color='black')
            plt.plot(price_df.index, ma_60, color='blue')


    elif ma_type == 'ema': # For EMA
        ma_30 = price_df.Close.ewm(span=moving_average_range[0], adjust= False).mean()
        ma_60 = price_df.Close.ewm(span=moving_average_range[1], adjust= False).mean()

        if plot == 'yes':
            plt.plot(price_df.index, price_df.Close, color='yellow')
            plt.plot(price_df.index, ema_30, color='black')
            plt.plot(price_df.index, ema_60, color='blue')

    ## Check when sma-30 and sma-60 (or ema-30 and ema-60) cross/intercept to signal buy or sell:
    ### 60 : len(SMA-30) is used because SMA-60 doesn't start until day 60 from earliest data

    # SMA-30 is going to intercept the 60 from below/30 is downtrending into the 60 = sell signal
    ma_30_below_intercept_current_MA = []
    ma_30_below_intercept_next_MA = []
    ma_30_below_intercept_current_date = []
    ma_30_below_intercept_next_date = []

    # SMA-30 is going to intercept the 60 from above/30 is uptrending into the 60 = buy signal
    ma_30_above_intercept_current_MA = []
    ma_30_above_intercept_next_MA = []
    ma_30_above_intercept_current_date = []
    ma_30_above_intercept_next_date = []

    ma_60_below_intercept_current_MA = []
    ma_60_below_intercept_next_MA = []
    ma_60_below_intercept_current_date = []
    ma_60_below_intercept_next_date = []

    ma_60_above_intercept_current_MA = []
    ma_60_above_intercept_next_MA = []
    ma_60_above_intercept_current_date = []
    ma_60_above_intercept_next_date = []

    below_current_price = []
    below_next_price = []

    above_current_price = []
    above_next_price = []

    ma_30_above_intercept_previous_10_day_average_list = []
    ma_60_above_intercept_previous_10_day_average_list = []

    ma_30_below_intercept_previous_10_day_average_list = []
    ma_60_below_intercept_previous_10_day_average_list = []
    ma_30_above_intercept_previous_10_day_average_list = []
    ma_60_above_intercept_previous_10_day_average_list = []

    for x in range(70, len(ma_30)):  ### Changed 60 to 70 to account for 10 day previous average
        try:
            current_day_30 = ma_30[x]
            current_day_60 = ma_60[x]
            temp_list_average_30 = []
            temp_list_average_60 = []
            previous_10_day_average_30 = numpy.NaN
            previous_10_day_average_60 = numpy.NaN
            next_day_30 = ma_30[x + 1]
            next_day_60 = ma_60[x + 1]

            try: ### Needed in the case that there is no previous 10 days
                for t in range(1, 11):
                    temp_list_average_30.append(ma_30[x-t])
                    temp_list_average_60.append(ma_60[x-t])  

                previous_10_day_average_30 = numpy.average(temp_list_average_30)
                previous_10_day_average_60 = numpy.average(temp_list_average_60)
            except Exception:
                pass

            #### These two if statements will work to find historical intercepts but not predict new.
            #### To predict new intercepts, use previous_day_30 & _60 to figure direction and slope/likelihood.
            if current_day_30 < current_day_60 and next_day_30 >= next_day_60 and previous_10_day_average_30 < previous_10_day_average_60:
                ma_30_below_intercept_current_date.append(ma_30.index[x])
                ma_30_below_intercept_next_date.append(ma_30.index[x+1])
                ma_30_below_intercept_current_MA.append(ma_30[x])
                ma_30_below_intercept_next_MA.append(ma_30[x+1])
                ma_60_below_intercept_current_date.append(ma_60.index[x])
                ma_60_below_intercept_next_date.append(ma_30.index[x+1])
                ma_60_below_intercept_current_MA.append(ma_60[x])
                ma_60_below_intercept_next_MA.append(ma_60[x+1])
                ma_30_below_intercept_previous_10_day_average_list.append(previous_10_day_average_30)
                ma_60_below_intercept_previous_10_day_average_list.append(previous_10_day_average_60)
                below_current_price.append(price_df.Close.loc[ma_30.index[x]])

                try:
                    below_next_price.append(price_df.Close.loc[ma_30.index[x+1]])

                except Exception:
                    below_next_price.append(numpy.NaN)


            elif current_day_30 > current_day_60 and next_day_30 <= next_day_60 and previous_10_day_average_30 > previous_10_day_average_60:
                ma_30_above_intercept_current_MA.append(ma_30[x])
                ma_30_above_intercept_next_MA.append(ma_30[x+1])
                ma_30_above_intercept_current_date.append(ma_30.index[x])
                ma_30_above_intercept_next_date.append(ma_30.index[x+1])
                ma_60_above_intercept_current_MA.append(ma_60[x])
                ma_60_above_intercept_next_MA.append(ma_60[x+1])
                ma_60_above_intercept_current_date.append(ma_60.index[x])
                ma_60_above_intercept_next_date.append(ma_60.index[x+1])
                ma_30_above_intercept_previous_10_day_average_list.append(previous_10_day_average_30)
                ma_60_above_intercept_previous_10_day_average_list.append(previous_10_day_average_60)
                above_current_price.append(price_df.Close.loc[ma_30.index[x]])
                try:
                    above_next_price.append(price_df.Close.loc[ma_30.index[x+1]])

                except Exception:
                    above_next_price.append(numpy.NaN)

        except Exception:
            pass

    if plot == 'yes':
        print(1)
        plt.plot(price_df.index, price_df.Close, color='yellow')
        plt.plot(ma_30_below_intercept_current_date, price_df.Close.loc[ma_30_below_intercept_current_date], '|', color= 'black')
        plt.plot(ma_30_above_intercept_current_date, price_df.Close.loc[ma_30_above_intercept_current_date], '+', color= 'black')
        plt.show()

    combined_below_df = pandas.DataFrame({'Before_Intercept_Dates': ma_30_below_intercept_current_date, 'After_Intercept_Dates': ma_30_below_intercept_next_date, 'Before_Intercept_Stock_Price': below_current_price, 'After_Intercept_Stock_Price': below_next_price, 'Moving_Average_30_Down_Before_Intercept_MA_Value': ma_30_below_intercept_current_MA, 'Moving_Average_30_Down_Before_Intercept_MA_Value_Avg_10_Days_Prior': ma_30_below_intercept_previous_10_day_average_list, 'Moving_Average_60_Down_Before_Intercept_MA_Value': ma_60_below_intercept_current_MA, 'Moving_Average_60_Down_Before_Intercept_MA_Value_Avg_10_Days_Prior': ma_60_below_intercept_previous_10_day_average_list})
    combined_below_df['Direction'] = 'Below'
    combined_below_df['Symbol'] = symbol
    combined_below_df['Name'] = company_name

    combined_above_df = pandas.DataFrame({'Before_Intercept_Dates': ma_30_above_intercept_current_date, 'After_Intercept_Dates': ma_30_above_intercept_next_date, 'Before_Intercept_Stock_Price': above_current_price, 'After_Intercept_Stock_Price': above_next_price, 'Moving_Average_30_Up_Before_Intercept_MA_Value': ma_30_above_intercept_current_MA, 'Moving_Average_30_Up_Before_Intercept_MA_Value_Avg_10_Days_Prior': ma_30_above_intercept_previous_10_day_average_list, 'Moving_Average_60_Up_Before_Intercept_MA_Value': ma_60_above_intercept_current_MA, 'Moving_Average_60_Up_Before_Intercept_MA_Value_Avg_10_Days_Prior': ma_60_above_intercept_previous_10_day_average_list})
    combined_above_df['Direction'] = 'Above'
    combined_above_df['Symbol'] = symbol
    combined_above_df['Name'] = company_name

    if not os.path.exists(os.getcwd() + '\\Patterns and Prices\\'):
        os.makedirs(os.getcwd() + '\\Patterns and Prices\\')

    combined_below_df.to_csv(os.getcwd() + '\\Patterns and Prices\\' + company_name + ' - Below Interception.csv')
    combined_above_df.to_csv(os.getcwd() + '\\Patterns and Prices\\' + company_name + ' - Above Interception.csv')


    return combined_below_df, combined_above_df

In [None]:
NYSE_csv = pandas.read_csv('NYSE.txt', sep="\t", header=0).set_index('Symbol')

AMEX_csv = pandas.read_csv('AMEX.txt', sep="\t", header=0).set_index('Symbol')

stock_exchange_ticks_and_names = pandas.merge(NYSE_csv.reset_index(), AMEX_csv.reset_index(), how='outer')
stock_exchange_ticks_and_names.to_csv('merged_NYSE_AMEX.csv')
stock_exchange_ticks_and_names_copy = stock_exchange_ticks_and_names.copy().dropna()

regex1 = re.compile('[@_!#$%^&*()<>?/\|}{~:[\].]')
regex2 = re.compile('Cl ')

stock_exchange_ticks_and_names_removed = pandas.DataFrame()

for x, y in stock_exchange_ticks_and_names_copy.iterrows():

    if bool(regex1.search(y['Description'])) == False and bool(regex2.search(y['Description'])) == False and bool(regex1.search(y['Symbol'])) == False:
        stock_exchange_ticks_and_names_removed.loc[x, 'Symbol'] = y['Symbol']
        stock_exchange_ticks_and_names_removed.loc[x, 'Description'] = y['Description']

stock_indices_df = pandas.DataFrame({'Description': ['S&P', 'Dow', "Nasdaq"], 'Symbol': ['.INX', '.DJI', ".IXIC"]})

stocks_and_names_with_indices = pandas.concat([stock_exchange_ticks_and_names_removed, stock_indices_df])
stocks_and_names_with_indices = stocks_and_names_with_indices.set_index('Symbol')
stocks_and_names_with_indices = stocks_and_names_with_indices.reset_index()
stocks_and_names_with_indices.to_csv('merged_NYSE_AMEX_removed_intercept_pattern.csv')

In [None]:
### Looping:
total_below_df = pandas.DataFrame()
total_above_df = pandas.DataFrame()

for x, y in stocks_and_names_with_indices.iterrows():
    print(y['Symbol'])
    
    try:
        combined_below_df, combined_above_df = moving_average(y['Symbol'], y['Description'], plot='no', moving_average_range=[30, 60], ma_type='sma')
    
    except Exception:
        pass
    
    total_below_df = pandas.concat([total_below_df, combined_below_df])
    total_above_df = pandas.concat([total_above_df, combined_above_df])
    
total_below_df.to_csv('All Patterns and Prices - Below.csv')
total_above_df.to_csv('All Patterns and Prices - Above.csv')
    

In [None]:
### Need to obtain the average percentage of distance between before MA-30 and MA-60 values:
total_below_df = pandas.read_csv('All Patterns and Prices - Below.csv')
total_above_df = pandas.read_csv('All Patterns and Prices - Above.csv')

minimum = [0, 10, 50, 100, 200]
maximum = [10, 50, 100, 200, 99999]

for k in range(len(minimum)):
    mini = minimum[k]
    maxi = maximum[k]
    below_intercept_percentage_difference_list = []
    above_intercept_percentage_difference_list = []
    
    for u, v in total_below_df.iterrows():
        if mini <= v.Moving_Average_30_Down_Before_Intercept_MA_Value < maxi:
            below_intercept_percentage_difference_list.append(v.Moving_Average_30_Down_Before_Intercept_MA_Value/v.Moving_Average_60_Down_Before_Intercept_MA_Value)

    for t, q in total_above_df.iterrows():
        if mini <= q.Moving_Average_30_Up_Before_Intercept_MA_Value < maxi:
            above_intercept_percentage_difference_list.append(q.Moving_Average_60_Up_Before_Intercept_MA_Value/q.Moving_Average_30_Up_Before_Intercept_MA_Value)

    below_intercept_percentage_difference = 1 - (numpy.nanmean(below_intercept_percentage_difference_list))
    above_intercept_percentage_difference = 1 - ((numpy.nanmean(above_intercept_percentage_difference_list)))                                        


    ### Need to obtain the standard deviation as well: (This is gets the std of individual stocks)
    below_intercept_std_percentage_list = []
    above_intercept_std_percentage_list = []

    for x, y in stocks_and_names_with_indices.iterrows():
        down_30_list = []
        down_60_list = []
        up_30_list = []
        up_60_list = []
        
        try:
            temp_below_std_df = pandas.read_csv(os.getcwd() + '\\Patterns and Prices\\' + y['Description'] + ' - Below Interception.csv')
            temp_above_std_df = pandas.read_csv(os.getcwd() + '\\Patterns and Prices\\' + y['Description'] + ' - Above Interception.csv')
        except Exception:
            continue
            
        for a, l in temp_below_std_df.iterrows():
            if l.Moving_Average_30_Down_Before_Intercept_MA_Value != 0:
                down_30_list.append(l.Moving_Average_30_Down_Before_Intercept_MA_Value)
            if l.Moving_Average_60_Down_Before_Intercept_MA_Value != 0:
                down_60_list.append(l.Moving_Average_60_Down_Before_Intercept_MA_Value)
                
        for c, m in temp_above_std_df.iterrows():
            if m.Moving_Average_30_Up_Before_Intercept_MA_Value != 0:
                up_30_list.append(m.Moving_Average_30_Up_Before_Intercept_MA_Value)
            if m.Moving_Average_60_Up_Before_Intercept_MA_Value != 0:
                up_60_list.append(m.Moving_Average_60_Up_Before_Intercept_MA_Value)
        
        if mini <= numpy.nanmean(down_30_list) < maxi:
            below_intercept_std_percentage_list.append((numpy.nanstd(down_30_list)/numpy.nanstd(down_60_list)))

        if mini <= numpy.nanmean(up_30_list) < maxi:
            above_intercept_std_percentage_list.append((numpy.nanstd(up_60_list)/numpy.nanstd(up_30_list)))

    below_intercept_std_percentage = 1 - (numpy.nanmean(below_intercept_std_percentage_list))
    above_intercept_std_percentage = 1 - (numpy.nanmean(above_intercept_std_percentage_list))

    lowest_below_intercept_percentage_difference = below_intercept_percentage_difference + below_intercept_std_percentage
    highest_above_intercept_percentage_difference = above_intercept_percentage_difference + above_intercept_std_percentage

    print('Price Range: ', mini, '-', maxi)
    print('Lowest Below Intercept Percentage Difference :', lowest_below_intercept_percentage_difference * 100, "%")
    print('Highest Above Intercept Percentage Difference :', highest_above_intercept_percentage_difference * 100, "%")
    print('Average Below Intercept Percentage Difference :', below_intercept_percentage_difference * 100, "%")
    print('Average Above Intercept Percentage Difference :', above_intercept_percentage_difference * 100, "%")
                                         