# Pedro - Short Queeze Predictor

In [8]:
# Import necessary libraries
import pandas as pd
import glob
from pathlib import Path
import matplotlib.pyplot as plt
from dotenv import load_dotenv
import alpaca_trade_api as tradeapi
import os
import hvplot.pandas

In [9]:
# Load environment variables
load_dotenv('alpaca.env')

# Set Alpaca API key and secret
alpaca_api_key = os.getenv("ALPACA_API_KEY")
alpaca_secret_key = os.getenv("ALPACA_SECRET_KEY")

# Initialize Alpaca API
api = tradeapi.REST(
    alpaca_api_key,
    alpaca_secret_key,
    api_version="v2"
)

In [12]:
# Load and preprocess short data
file_paths = glob.glob('Resources/2023*.xlsx')
dfs = [pd.read_excel(fp, engine='openpyxl') for fp in file_paths]

combined_df = pd.concat(dfs, ignore_index=True)
combined_df.rename(columns={'ShortSqueeze.com Short Interest Data': 'Company Name'}, inplace=True)

# Define columns to drop
columns_to_drop = ['Total Short Interest', 'Days to Cover', 'Performance (52-wk)', 'Short: Prior Mo', '% Change Mo/Mo', 
                   'Shares: Float', 'Avg. Daily Vol.', 'Shares: Outstanding', 'Short Squeeze Rankingâ„¢', '% from 52-wk High', 
                   '(abs)', '% from 200 day MA', '(abs).1', '% from 50 day MA', '(abs).2', '% Insider Ownership', 
                   '% Institutional Ownership']

columns_to_drop = [col for col in columns_to_drop if col in combined_df.columns]
combined_df.drop(columns_to_drop, axis=1, inplace=True)

combined_df['Short % of Float'] = pd.to_numeric(combined_df['Short % of Float'], errors='coerce')
combined_df = combined_df[combined_df['Short % of Float'] >= 17]
combined_df['Market Cap'] = pd.to_numeric(combined_df['Market Cap'], errors='coerce')
combined_df = combined_df[combined_df['Market Cap'] >= 300000000]

# Define date mapping dictionary
date_mapping = {
    'JanA': '01-11', 'JanB': '01-25',
    'FebA': '02-09', 'FebB': '02-27',
    'MarA': '03-09', 'MarB': '03-24',
    'AprA': '04-12', 'AprB': '04-25',
    'MayA': '05-09', 'MayB': '05-24',
    'JunA': '06-09', 'JunB': '06-27'
}

combined_df['Record Date'] = combined_df['Record Date'].str.replace(r'(\d{4})-(\w+)', lambda m: f'{m.group(1)}-{date_mapping[m.group(2)]}')
combined_df.reset_index(drop=True, inplace=True)
combined_df.sort_values('Record Date', inplace=True)



In [27]:
# Load insider trading data
insider_df = pd.read_csv(Path("Resources/InsiderTrading.csv"))

# Remove dollar symbols and convert 'Total Amount' to numeric
insider_df['Total Amount'] = insider_df['Total Amount'].replace({'\$': '', ',': ''}, regex=True).astype(float)

# Convert 'Date' to datetime
insider_df['Date'] = pd.to_datetime(insider_df['Date'])

# Merge common symbols from both dataframes
common_symbols = pd.merge(combined_df, insider_df, on='Symbol')['Symbol'].drop_duplicates().values
combined_df = combined_df[combined_df['Symbol'].isin(common_symbols)]
insider_df = insider_df[insider_df['Symbol'].isin(common_symbols)]
combined_df.rename(columns={'Record Date': 'Date'}, inplace=True)
combined_df['Date'] = pd.to_datetime(combined_df['Date'])
combined_df.sort_values('Date', inplace=True)

# Convert 'Total Amount' to numeric and filter insider trading data for trades over $1MM
insider_df['Total Amount'] = pd.to_numeric(insider_df['Total Amount'], errors='coerce')
insider_df = insider_df[insider_df['Total Amount'] >= 1000000]

In [28]:
# Get closing prices for each stock symbol
tickers = common_symbols.tolist()
start_date = pd.Timestamp("2023-01-01", tz="America/New_York").isoformat()
end_date = pd.Timestamp("2023-06-30", tz="America/New_York").isoformat()

data_dict = {ticker: api.get_bars(ticker, "1Day", start=start_date, end=end_date).df for ticker in tickers}

# Calculate the change in close price 5 days after each instance of insider trading
for ticker in tickers:
    ticker_data = data_dict[ticker]
    ticker_data['Close'] = ticker_data['close']  # Ensure column name consistency
    insider_trading_dates = insider_df.loc[insider_df['Symbol'] == ticker, 'Date']

    # Calculate close price after 5 days for each instance of insider trading
    for trade_date in insider_trading_dates:
        trade_date_plus_5 = trade_date + pd.DateOffset(days=5)
        if trade_date_plus_5 in ticker_data.index:
            close_price_on_trade_date = ticker_data.loc[trade_date, 'Close']
            close_price_5_days_after = ticker_data.loc[trade_date_plus_5, 'Close']

            # Filter out instances where the close price did not increase
            if close_price_5_days_after <= close_price_on_trade_date:
                insider_df.drop(insider_df[(insider_df['Symbol'] == ticker) & (insider_df['Date'] == trade_date)].index, inplace=True)

In [46]:
# Create a dataframe to hold the results
result_df = pd.DataFrame(columns=['Symbol', '% Short Float', 'Insider Trading Amount', 'Close Price day 1', 'Close Price day 5', 'Return', 'Sector', 'Industry'])

for ticker in tickers:
    ticker_data = data_dict[ticker]
    ticker_data['Close'] = ticker_data['close']
    insider_trading_dates = insider_df.loc[insider_df['Symbol'] == ticker, 'Date'].dt.date  # Convert insider trading dates to the same format as ticker data dates
    insider_trading_dates = pd.to_datetime(insider_trading_dates)  # Convert insider trading dates to datetime format
    short_percent = combined_df.loc[combined_df['Symbol'] == ticker, 'Short % of Float'].values[0]
    insider_trading_amount = insider_df.loc[insider_df['Symbol'] == ticker, 'Total Amount'].values[0]
    sector = combined_df.loc[combined_df['Symbol'] == ticker, 'Sector'].values[0]
    industry = combined_df.loc[combined_df['Symbol'] == ticker, 'Industry'].values[0]
    print(f"Processing ticker {ticker}:")
    print(f"Insider trading dates for {ticker}: {insider_trading_dates}")
    print(f"Available dates in ticker data: {ticker_data.index}")

    for trade_date in insider_trading_dates:
        print(f"Processing trade date: {trade_date}")
        trade_date_plus_5 = trade_date + pd.DateOffset(days=5)
        if trade_date_plus_5 in ticker_data.index:
            close_price_on_trade_date = ticker_data.loc[trade_date, 'Close']
            close_price_5_days_after = ticker_data.loc[trade_date_plus_5, 'Close']
            if close_price_5_days_after > close_price_on_trade_date:
                return_percent = ((close_price_5_days_after - close_price_on_trade_date) / close_price_on_trade_date) * 100
                result_df = result_df.append({'Symbol': ticker,
                                              '% Short Float': short_percent,
                                              'Insider Trading Amount': insider_trading_amount,
                                              'Close Price day 1': close_price_on_trade_date,
                                              'Close Price day 5': close_price_5_days_after,
                                              'Return': return_percent,
                                              'Sector': sector,
                                              'Industry': industry}, ignore_index=True)
                
result_df.head(1)

Processing ticker RILY:
Insider trading dates for RILY: 76   2023-02-08
Name: Date, dtype: datetime64[ns]
Available dates in ticker data: DatetimeIndex(['2023-01-03 05:00:00+00:00', '2023-01-04 05:00:00+00:00',
               '2023-01-05 05:00:00+00:00', '2023-01-06 05:00:00+00:00',
               '2023-01-09 05:00:00+00:00', '2023-01-10 05:00:00+00:00',
               '2023-01-11 05:00:00+00:00', '2023-01-12 05:00:00+00:00',
               '2023-01-13 05:00:00+00:00', '2023-01-17 05:00:00+00:00',
               ...
               '2023-06-16 04:00:00+00:00', '2023-06-20 04:00:00+00:00',
               '2023-06-21 04:00:00+00:00', '2023-06-22 04:00:00+00:00',
               '2023-06-23 04:00:00+00:00', '2023-06-26 04:00:00+00:00',
               '2023-06-27 04:00:00+00:00', '2023-06-28 04:00:00+00:00',
               '2023-06-29 04:00:00+00:00', '2023-06-30 04:00:00+00:00'],
              dtype='datetime64[ns, UTC]', name='timestamp', length=124, freq=None)
Processing trade date: 2023-

Unnamed: 0,Symbol,% Short Float,Insider Trading Amount,Close Price day 1,Close Price day 5,Return,Sector,Industry
