In [31]:
# polygon_functions - python script with all the polygon helper functions
from polygon_functions import *

In [32]:
import pandas as pd

In [33]:
move_database = pd.read_csv('data/move_data_with_fundamentals.csv', index_col = 0)

In [34]:
move_database = move_database.sort_values(by = '1', ascending = False).reset_index(drop = True)

In [36]:
# Prepare databases
basic_database = move_database[['0', 
                                '1']]

In [37]:
basic_database.columns = ['Stock', 'Date']

In [38]:
basic_database['Date'] = [pd.Timestamp(year = pd.Timestamp(date).year,
                                       month = pd.Timestamp(date).month,
                                       day = pd.Timestamp(date).day,
                                       hour = 9,
                                       minute = 30,
                                       tz = 'America/New_York') for date in basic_database['Date']]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  basic_database['Date'] = [pd.Timestamp(year = pd.Timestamp(date).year,


In [40]:
basic_fundamentals = move_database[['Market Capitalization',
 'Current Liabilities',
 'Shares',
 'Cash',
 'Operating Expenses',
 'Current Ratio',
 'PE Ratio',
 'Earnings/Share',
 'Debt/Equity',
 'Debt',
 'Current Debt',
 'Interest Expense',
 'Revenue',
 'Price/Sales',]]

In [42]:
offering_data = move_database[['Num. Active ATMs (all time)',
 'Num. Active 424s (all time)',
 'Num. Active 424B5s (past year)',
 'Num. Active 424B3s (past year)',
 'Shares Raised in the Past Year',
 'Dollars Raised in the Past Year',
 'Current Offering (shares)',
 'Current Offering (cash)',
 'ATM Present']]

offering_data['Current Offering (cash)'].loc[offering_data[offering_data['Current Offering (cash)'] < 1000].index] = 0.0
offering_data['ATM Present'].loc[offering_data[offering_data['Current Offering (cash)'] < 1000].index] = False
offering_data['Current Offering (shares)'].loc[offering_data[offering_data['Current Offering (shares)'] < 1000].index] = 0.0

In [76]:
def get_morning_low(stock, date):
    try:
        date = pd.Timestamp(date)
        date = pd.Timestamp(year = date.year,
                            month = date.month, 
                            day = date.day,
                            hour = 9,
                            minute = 30,
                            tz = 'America/New_York')
        morning_date = pd.Timestamp(year = date.year,
                            month = date.month, 
                            day = date.day,
                            hour = 12,
                            minute = 0,
                            tz = 'America/New_York')
        stock_data = get_data_for_stock(stock, date)
        stock_data.index = [pd.Timestamp(ts).tz_localize('America/New_York').tz_convert('America/New_York') for ts in stock_data.index]
        morning_LOD = stock_data[(stock_data.index >= date) & (stock_data.index <= morning_date)]['Low'].min()
        open_price = stock_data.loc[date, 'Close']
        return (morning_LOD - open_price) / open_price
    except:
        return np.nan

In [77]:
morning_LODs = Parallel(12, 'loky', verbose = 10)(delayed(get_morning_low)(stock, date) for stock, date in move_database[['0', '1']].values.tolist())

[Parallel(n_jobs=12)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   1 tasks      | elapsed:    0.2s
[Parallel(n_jobs=12)]: Done   8 tasks      | elapsed:    0.2s
[Parallel(n_jobs=12)]: Done  17 tasks      | elapsed:    0.5s
[Parallel(n_jobs=12)]: Done  26 tasks      | elapsed:    0.7s
[Parallel(n_jobs=12)]: Done  37 tasks      | elapsed:    0.9s
[Parallel(n_jobs=12)]: Done  48 tasks      | elapsed:    1.1s
[Parallel(n_jobs=12)]: Done  61 tasks      | elapsed:    1.5s
[Parallel(n_jobs=12)]: Done  74 tasks      | elapsed:    1.7s
[Parallel(n_jobs=12)]: Done  89 tasks      | elapsed:    2.3s
[Parallel(n_jobs=12)]: Done 104 tasks      | elapsed:    3.1s
[Parallel(n_jobs=12)]: Done 121 tasks      | elapsed:    3.6s
[Parallel(n_jobs=12)]: Done 138 tasks      | elapsed:    4.0s
[Parallel(n_jobs=12)]: Done 157 tasks      | elapsed:    4.5s
[Parallel(n_jobs=12)]: Done 176 tasks      | elapsed:    4.9s
[Parallel(n_jobs=12)]: Done 197 tasks      | elapsed:  

In [54]:
price_action_database = move_database[['open_price', 'pm_open', 'hi_price', 'Prior Day Price Action Trend', 'Premarket Move Change Classification', 'Premarket Move Time Classification']]

In [78]:
price_action_database['LOD move'] = morning_LODs

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  price_action_database['LOD move'] = morning_LODs


In [56]:
price_action_database.columns = ['Open Price', 'PM Beginning Price', 'PM High Price', 'Prior Day Price Action Trend', 'Premarket Move Change Classification', 'Premarket Move Time Classification']

In [64]:
uncommon_fundamentals_DB = move_database[[ 'Shares Outstanding',
 'Float Shares',
 'Float Percentage',
 'IO%',
 'Insider%',
 'Months Left',
 'Accumulated Deficit',
 'Net Working Capital']]

In [81]:
basic_database.to_csv('data/base_data.csv')
basic_fundamentals.to_csv('data/basic_fundamentals.csv')
offering_data.to_csv('data/offering_data.csv')
price_action_database.to_csv('data/PA_database.csv')
uncommon_fundamentals_DB.to_csv('data/uncommon_fundamentals_database.csv')

In [72]:
def get_data_for_stock1d(stock, start_date, end_date, tz_origin = 'America/Chicago', tz_to_convert = 'America/New_York', token = 'AKMZJGOJ8KO8NB5P32VG'):
    try:
        start_date = pd.Timestamp(pd.Timestamp(start_date).date())
        end_date = pd.Timestamp(pd.Timestamp(end_date).date())
        request = 'https://api.polygon.io/v2/aggs/ticker/{}/range/1/day/{}/{}?unadjusted=true&sort=asc&apiKey={}'.format(stock.upper(), str(start_date.date()), str(end_date.date()), token)
        raw_data = json.loads(requests.get(request).text)
        raw_data = pd.DataFrame(raw_data['results'])
        raw_data['Timestamp'] = [pd.Timestamp(datetime.datetime.fromtimestamp(t/1000)) for t in raw_data['t']]
        raw_data['Hour'] = [date.hour for date in raw_data['Timestamp']]
        raw_data['Minute'] = [date.minute for date in raw_data['Timestamp']]
        raw_data = raw_data[['v', 'o', 'c', 'h', 'l', 'Timestamp']]
        raw_data.columns = ['Volume', 'Open', 'Close', 'High', 'Low', 'Ts']
        raw_data = raw_data.set_index('Ts')
        return raw_data
    except Exception as e:
#         print(e)

#         print('No data found for {} on {}'.format(stock, start_date))
        pass
def get_60d_high(stock, date):
    date = pd.Timestamp(date)
    return get_data_for_stock1d(stock, date - pd.Timedelta('1 day'), date + pd.Timedelta('1 day'))['High'].max()
def get_dump_data(i):
# for i in move_data.index:
    try:
        stock = move_database.loc[i, '0']
        date = move_database.loc[i, '1']
        date = pd.Timestamp(date)
        date = pd.Timestamp(year = date.year,
                             month = date.month,
                             day = date.day,
                            hour = 9, 
                            minute = 30,
                            tz = 'America/New_York')
        stock_data = get_data_for_stock1d(stock, date - pd.Timedelta('10000 days'), date)
        stock_data['Premarket Change'] = (stock_data['Open'] - stock_data['Close'].shift(1)) / stock_data['Close'].shift(1)
        stock_data['Candle Move'] = (stock_data['Close'] - stock_data['Open']) / stock_data['Open']
        dumps = stock_data[stock_data['Premarket Change'] > 0.1]
        failure_rate = len(dumps[dumps['Candle Move'] < 0]) / len(dumps)
        try:
            average_dump = dumps[dumps['Candle Move'] < 0]['Candle Move'].mean()
        except:
            average_dump = np.nan
        try:
            average_success = dumps[dumps['Candle Move'] > 0]['Candle Move'].mean()
        except:
            average_success = np.nan
        splits = get_stock_splits(stock, date)
        return {'Failure Rate': failure_rate,
                'Average Dump': average_dump,
                'Average Success': average_success,
                '# Dumps': len(dumps),
                'Splits': splits}
    except Exception as e:
        print(e)
        return {'Failure Rate': np.nan,
                'Average Dump': np.nan,
                'Average Success': np.nan,
                '# Dumps': 0,
                'Splits': np.nan}
dump_data = Parallel(12, 'loky', verbose = 10)(delayed(get_dump_data)(i) for i in move_database.index)

[Parallel(n_jobs=12)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   1 tasks      | elapsed:    0.7s
[Parallel(n_jobs=12)]: Done   8 tasks      | elapsed:    1.1s
[Parallel(n_jobs=12)]: Done  17 tasks      | elapsed:    1.6s
[Parallel(n_jobs=12)]: Done  26 tasks      | elapsed:    2.2s
[Parallel(n_jobs=12)]: Done  37 tasks      | elapsed:    2.7s
[Parallel(n_jobs=12)]: Done  48 tasks      | elapsed:    3.2s
[Parallel(n_jobs=12)]: Done  61 tasks      | elapsed:    4.1s
[Parallel(n_jobs=12)]: Done  74 tasks      | elapsed:    4.8s
[Parallel(n_jobs=12)]: Done  89 tasks      | elapsed:    5.7s
[Parallel(n_jobs=12)]: Done 104 tasks      | elapsed:    6.5s
[Parallel(n_jobs=12)]: Done 121 tasks      | elapsed:    7.4s
[Parallel(n_jobs=12)]: Done 138 tasks      | elapsed:    8.3s
[Parallel(n_jobs=12)]: Done 157 tasks      | elapsed:    9.3s
[Parallel(n_jobs=12)]: Done 176 tasks      | elapsed:   10.2s
[Parallel(n_jobs=12)]: Done 197 tasks      | elapsed:  

In [73]:
dump_data = pd.DataFrame(dump_data)

In [82]:
dump_data.to_csv('data/individual_historical_fades.csv')