# Pedro - Short Queeze Predictor

### 1. Short Data

In [None]:
import pandas as pd
import glob

# Get a list of all xlsx files in the "Resources" folder
file_paths = glob.glob('Resources/Short_Data/2023*.xlsx')

# Initialize an empty list to store individual DataFrames
dfs = []

# Iterate over each file path and read the Excel file into a DataFrame
for file_path in file_paths:
    df = pd.read_excel(file_path, engine='openpyxl')
    dfs.append(df)

# Concatenate all DataFrames into a single DataFrame
combined_df = pd.concat(dfs, ignore_index=True)

# Rename the 'ShortSqueeze.com Short Interest Data' column
combined_df.rename(columns={'ShortSqueeze.com Short Interest Data': 'Company Name'}, inplace=True)

# Drop unnecessary columns
columns_to_drop = ['Total Short Interest', 'Days to Cover', 'Performance (52-wk)', 'Short: Prior Mo', '% Change Mo/Mo', 'Shares: Float',
                   'Avg. Daily Vol.', 'Shares: Outstanding', 'Short Squeeze Ranking™', '% from 52-wk High', '(abs)',
                   '% from 200 day MA', '(abs).1', '% from 50 day MA', '(abs).2', '% Insider Ownership', '% Institutional Ownership']

# Check if the columns exist in the dataframe before dropping them
columns_to_drop = [col for col in columns_to_drop if col in combined_df.columns]

combined_df.drop(columns_to_drop, axis=1, inplace=True)

# Convert 'Short % of Float' column to numeric
combined_df['Short % of Float'] = pd.to_numeric(combined_df['Short % of Float'], errors='coerce')

# Filter by Short % of Float >= 17
combined_df = combined_df[combined_df['Short % of Float'] >= 17]

# Convert 'Market Cap' column to numeric
combined_df['Market Cap'] = pd.to_numeric(combined_df['Market Cap'], errors='coerce')

# Drop columns where Market Cap is less than 300,000,000
combined_df = combined_df[combined_df['Market Cap'] >= 300000000]

# Replace 'Record Date' values
date_mapping = {
    'JanA': '01-11', 'JanB': '01-25',
    'FebA': '02-09', 'FebB': '02-27',
    'MarA': '03-09', 'MarB': '03-24',
    'AprA': '04-12', 'AprB': '04-25',
    'MayA': '05-09', 'MayB': '05-24',
    'JunA': '06-09', 'JunB': '06-27'
}

combined_df['Record Date'] = combined_df['Record Date'].str.replace(r'(\d{4})-(\w+)', lambda m: f'{m.group(1)}-{date_mapping[m.group(2)]}')

# Reset the index
combined_df.reset_index(drop=True, inplace=True)

# Sort by 'Record Date' in ascending order
combined_df.sort_values('Record Date', inplace=True)

# Display the combined dataframe
combined_df

### 2. Insider Trading Data

In [None]:
from pathlib import Path
insider_df = pd.read_csv(Path("Resources/InsiderTrading.csv"))
insider_df.head()

In [None]:
common_symbols = pd.merge(combined_df, insider_df, on='Symbol')['Symbol'].drop_duplicates()
print(common_symbols)

In [None]:
combined_df = combined_df[combined_df['Symbol'].isin(common_symbols)]
insider_df = insider_df[insider_df['Symbol'].isin(common_symbols)]

In [None]:
combined_df.rename(columns={'Record Date': 'Date'}, inplace=True)
combined_df['Date'] = pd.to_datetime(combined_df['Date'])
combined_df.sort_values('Date', inplace=True)

In [None]:
combined_df.drop(['Company Name', '% Insider Ownership', '% Institutional Ownership', 'Market Cap', 'Exchange', 'Sector', 'Industry', 'Price'], axis=1)

In [None]:
insider_df.head()

In [None]:
insider_df.drop(['Insider', 'Share Price', 'Num. of Shares', 'Relation', 'Num. of Insiders', 'Transaction'], axis=1)

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

# Create a dictionary to map symbols to colors or marker styles
symbol_mapping = {'HPK': 'b', 'RILY': 'g', 'DISH': 'r', 'SAVA': 'c', 'KSS': 'm', 'FATE': 'y', 'EVA': 'k', 'LAZR': 'orange', 'ZUMZ': 'purple', 'RCUS': 'brown'}

# Convert 'Date' column to datetime format
insider_df['Date'] = pd.to_datetime(insider_df['Date'])

# Sort the dataframe by 'Date'
insider_df = insider_df.sort_values('Date')

# Create two subplots side by side
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))

# Scatter plot of investments by symbol on the left subplot
for symbol, data in insider_df.groupby('Symbol'):
    ax1.scatter(data['Date'], data['Total Amount'] / 1000000, c=symbol_mapping[symbol], label=symbol, alpha=0.5)

# Set the title and labels for the left subplot
ax1.set_title('Investment by Symbol')
ax1.set_xlabel('Date')
ax1.set_ylabel('Total Amount (Million Dollars)')

# Rotate and adjust x-axis tick labels for the left subplot
ax1.set_xticklabels(ax1.get_xticklabels(), rotation=45, ha='right')

# Add a legend to the left subplot
ax1.legend(bbox_to_anchor=(1.05, 1), loc='upper left')

# Group the dataframe by Symbol
combined_grouped = combined_df.groupby('Symbol')

# Scatter plot of Short % of Float by symbol on the right subplot
for symbol, data in combined_grouped:
    ax2.scatter(data['Date'], data['Short % of Float'], label=symbol)

# Set the title and labels for the right subplot
ax2.set_title('Short % of Float by Symbol')
ax2.set_xlabel('Date')
ax2.set_ylabel('Short % of Float')

# Rotate and adjust x-axis tick labels for the right subplot
ax2.set_xticklabels(ax2.get_xticklabels(), rotation=45, ha='right')

# Add a legend to the right subplot
ax2.legend(bbox_to_anchor=(1.05, 1), loc='upper left')

# Adjust the spacing between subplots
plt.tight_layout()

# Show the plot
plt.show()

In [None]:
import os
import alpaca_trade_api as tradeapi
from dotenv import load_dotenv
import hvplot.pandas

load_dotenv('alpaca.env')

# Set Alpaca API key and secret
alpaca_api_key = os.getenv("ALPACA_API_KEY")
alpaca_secret_key = os.getenv("ALPACA_SECRET_KEY")


api = tradeapi.REST(
    alpaca_api_key,
    alpaca_secret_key,
    api_version = "v2"
)

In [None]:
# Define the list of tickers
tickers = ['FATE', 'LAZR', 'SAVA', 'RILY', 'ZUMZ', 'DISH', 'HPK', 'KSS', 'EVA', 'RCUS']

timeframe = "1Day"
start_date = pd.Timestamp("2023-01-01", tz="America/New_York").isoformat()
end_date = pd.Timestamp("2023-06-30", tz="America/New_York").isoformat()

# Create an empty dictionary to store the data for each ticker
data_dict = {}

# Retrieve data for each ticker and store it in the dictionary
for ticker in tickers:
    ticker_data = api.get_bars(
        ticker,
        timeframe,
        start=start_date,
        end=end_date
    ).df
    data_dict[ticker] = ticker_data

In [None]:
for ticker in tickers:
    # Filter the dataframe to include only rows with the Symbol
    insider = insider_df[insider_df['Symbol'] == ticker]

    # Create a figure and axes
    fig, ax1 = plt.subplots()

    # Plot the 'close' data 
    ax1.plot(data_dict[ticker]['close'], color='blue', label='close')

    # Set the y-axis label for the 'close' data
    ax1.set_ylabel('Close')

    # Create a second y-axis for the 'Total Amount' data
    ax2 = ax1.twinx()

    # Plot the 'Total Amount' data 
    ax2.scatter(insider['Date'], insider['Total Amount'], color='red', marker='o', label='Insider Trading')

    # Set the y-axis label for the 'Total Amount' data
    ax2.set_ylabel('Total Amount')

    # Filter the second dataframe to include only rows with the Symbol
    float_short = combined_df[combined_df['Symbol'] == ticker]

    # Plot the 'Short % of Float' data
    ax1.scatter(float_short['Date'], float_short['Short % of Float'], color='green', marker='o', label='Short % of Float')

    # Set the y-axis label for the 'Short % of Float' data
    ax1.set_ylabel('Short % of Float')

    # Set the title and legend
    ax1.set_title(f'{ticker} Data Overlay')
    ax1.legend(loc='upper left', bbox_to_anchor=(1.8, 1))
    ax2.legend(loc='upper right', bbox_to_anchor=(1.8, 1))

    # Rotate and adjust x-axis tick labels
    plt.xticks(rotation=45, ha='right')

    # Show the plot
    plt.tight_layout()
    plt.show()