In [116]:
import time
import pandas as pd
import numpy as np
from datetime import date
import yfinance as yf

pd.set_option('display.max_rows', 100)
pd.options.display.float_format = '{:,.2f}'.format 

# Question 1

In [37]:
url = "https://stockanalysis.com/ipos/withdrawn/"
df = pd.read_html(url)[0]
df.head()

Unnamed: 0,Symbol,Company Name,Price Range,Shares Offered
0,ODTX,"Odyssey Therapeutics, Inc.",-,-
1,UNFL,"Unifoil Holdings, Inc.",$3.00 - $4.00,2000000
2,AURN,"Aurion Biotech, Inc.",-,-
3,ROTR,"PHI Group, Inc.",-,-
4,ONE,One Power Company,-,-


In [38]:
def create_company_class(df):
    df['company_class_helper'] = df['Company Name'].str.lower()
    # Conditions
    cond = [df['company_class_helper'].str.contains("acquisition|corporation|corp."),
            df['company_class_helper'].str.contains("inc.|incorporated"),
            df['company_class_helper'].str.contains("group"),
            df['company_class_helper'].str.contains("ltd.?|limited"),
            df['company_class_helper'].str.contains("holdings"),]
    choices = ["Acq.Corp", "Inc", "Group", "Limited", "Holdings"]
    df["Company Class"] = np.select(cond, choices, default="Other")
    # Drop helper column
    df.drop('company_class_helper', axis=1, inplace=True)
    return df

In [58]:
def create_avg_price(data):
    df = data.copy()
    df['Price Range'] = df['Price Range'].str.split("-")
    def compute_mean(row):
        try:
            num_list = [float(num.replace("$", "").strip()) for num in row]
            return np.mean(num_list)
        except:
            return None
    df['Avg. Price'] = df['Price Range'].apply(compute_mean)
    return df


In [54]:
df = create_company_class(df)

In [59]:
df = create_avg_price(df)

In [60]:
df.head(10)

Unnamed: 0,Symbol,Company Name,Price Range,Shares Offered,Company Class,Avg. Price
0,ODTX,"Odyssey Therapeutics, Inc.","[, ]",-,Inc,
1,UNFL,"Unifoil Holdings, Inc.","[$3.00 , $4.00]",2000000,Inc,3.5
2,AURN,"Aurion Biotech, Inc.","[, ]",-,Inc,
3,ROTR,"PHI Group, Inc.","[, ]",-,Inc,
4,ONE,One Power Company,"[, ]",-,Other,
5,HPOT,The Great Restaurant Development Holdings Limited,"[$4.00 , $6.00]",1400000,Limited,5.0
6,CABR,"Caring Brands, Inc.",[$4.00],750000,Inc,4.0
7,SQVI,"Sequoia Vaccines, Inc.","[$8.00 , $10.00]",2775000,Inc,9.0
8,SNI,Shenni Holdings Limited,"[$4.00 , $6.00]",3000000,Limited,5.0
9,KMCM,Key Mining Corp.,[$2.25],4444444,Acq.Corp,2.25


In [65]:
df['Shares Offered'] =  pd.to_numeric(df['Shares Offered'], errors='coerce')
df['Withdrawn Value'] = df['Avg. Price'] * df['Shares Offered']

In [67]:
df.notnull().sum()

Symbol             100
Company Name       100
Price Range        100
Shares Offered      72
Company Class      100
Avg. Price          73
Withdrawn Value     71
dtype: int64

In [76]:
agg_df = df.groupby("Company Class", as_index=False)['Withdrawn Value'].sum().sort_values('Withdrawn Value', ascending=False)
agg_df['Withdrawn Value'] = agg_df['Withdrawn Value']/1e6
agg_df

Unnamed: 0,Company Class,Withdrawn Value
0,Acq.Corp,4111.85
2,Inc,2257.16
4,Other,752.07
3,Limited,549.73
1,Group,33.79


# Question 2

In [77]:
url = "https://stockanalysis.com/ipos/2024/"
df_sharpe = pd.read_html(url)[0]
df_sharpe.head()

Unnamed: 0,IPO Date,Symbol,Company Name,IPO Price,Current,Return
0,"Dec 31, 2024",ONEG,OneConstruction Group Limited,$4.00,$4.15,3.75%
1,"Dec 27, 2024",PHH,"Park Ha Biological Technology Co., Ltd.",$4.00,$21.91,447.75%
2,"Dec 23, 2024",HIT,"Health In Tech, Inc.",$4.00,$0.59,-85.31%
3,"Dec 23, 2024",TDAC,Translational Development Acquisition Corp.,$10.00,$10.27,2.70%
4,"Dec 20, 2024",RANG,Range Capital Acquisition Corp.,$10.00,$10.20,2.00%


In [78]:
df_sharpe['IPO Date'] = pd.to_datetime(df_sharpe['IPO Date'], format='mixed')

In [82]:
df_sharpe_subset = df_sharpe[df_sharpe['IPO Date'] < '2024-06-01'].copy()
df_sharpe_subset.shape, df_sharpe_subset['Symbol'].nunique()

((77, 6), 77)

In [129]:
df_sharpe_subset['IPO Date'].describe()

count                               77
mean     2024-03-17 13:42:51.428571392
min                2024-01-09 00:00:00
25%                2024-02-07 00:00:00
50%                2024-03-22 00:00:00
75%                2024-04-22 00:00:00
max                2024-05-31 00:00:00
Name: IPO Date, dtype: object

In [85]:
tickers = df_sharpe_subset['Symbol'].unique().tolist()

In [88]:

stocks_df = pd.DataFrame({'A' : []})

for i,ticker in enumerate(tickers):
  print(i,ticker)

  # Work with stock prices
  ticker_obj = yf.Ticker(ticker)

  # historyPrices = yf.download(tickers = ticker,
  #                    period = "max",
  #                    interval = "1d")
  historyPrices = ticker_obj.history(
                     period = "max",
                     interval = "1d")

  # generate features for historical prices, and what we want to predict
  historyPrices['Ticker'] = ticker
  historyPrices['Year']= historyPrices.index.year
  historyPrices['Month'] = historyPrices.index.month
  historyPrices['Weekday'] = historyPrices.index.weekday
  historyPrices['Date'] = historyPrices.index.date

  # historical returns
  for i in [1,3,7,30,90,252,365]:
    historyPrices['growth_'+str(i)+'d'] = historyPrices['Close'] / historyPrices['Close'].shift(i)
  historyPrices['growth_future_30d'] = historyPrices['Close'].shift(-30) / historyPrices['Close']

  # Technical indicators
  # SimpleMovingAverage 10 days and 20 days
  historyPrices['SMA10']= historyPrices['Close'].rolling(10).mean()
  historyPrices['SMA20']= historyPrices['Close'].rolling(20).mean()
  historyPrices['growing_moving_average'] = np.where(historyPrices['SMA10'] > historyPrices['SMA20'], 1, 0)
  historyPrices['high_minus_low_relative'] = (historyPrices.High - historyPrices.Low) / historyPrices['Close']

  # 30d rolling volatility : https://ycharts.com/glossary/terms/rolling_vol_30
  historyPrices['volatility'] =   historyPrices['Close'].rolling(30).std() * np.sqrt(252)

  # what we want to predict
  historyPrices['is_positive_growth_30d_future'] = np.where(historyPrices['growth_future_30d'] > 1, 1, 0)

  # sleep 1 sec between downloads - not to overload the API server
  time.sleep(1)


  if stocks_df.empty:
    stocks_df = historyPrices
  else:
    stocks_df = pd.concat([stocks_df, historyPrices], ignore_index=True)

0 NAKA
1 BOW
2 HDL
3 RFAI
4 JDZG
5 RAY
6 BTOC
7 ZK
8 GPAT
9 PAL
10 SVCO
11 NNE
12 CCIX
13 VIK
14 ZONE
15 LOAR
16 MRX
17 RBRK
18 NCI
19 MFI
20 YYGH
21 TRSG
22 CDTG
23 CTRI
24 IBTA
25 MTEN
26 SUPX
27 TWG
28 ULS
29 PACS
30 MNDR
31 CTNM
32 MAMO
33 ZBAO
34 BOLD
35 MMA
36 UBXG
37 IBAC
38 AUNA
39 BKHA
40 LOBO
41 RDDT
42 ALAB
43 INTJ
44 RYDE
45 LGCL
46 SMXT
47 VHAI
48 DYCQ
49 CHRO
50 UMAC
51 HLXB
52 MGX
53 TBBB
54 TELO
55 KYTX
56 PMNT
57 AHR
58 LEGT
59 ANRO
60 GUTS
61 AS
62 FBLG
63 AVBP
64 BTSG
65 HAO
66 CGON
67 YIBO
68 JL
69 SUGP
70 JVSA
71 KSPI
72 CCTG
73 PSBD
74 SYNX
75 SDHC
76 ROMA


In [90]:
stocks_df['Sharpe'] = (stocks_df['growth_252d'] - 0.045) / stocks_df['volatility']

In [125]:
trading_day = stocks_df[stocks_df['Date']==date(2025, 6, 6)].copy()
trading_day.shape

(77, 27)

In [126]:
trading_day[['Sharpe', 'growth_252d']].notnull().sum()

Sharpe         73
growth_252d    73
dtype: int64

In [122]:
trading_day[['Sharpe', 'growth_252d']].describe()

Unnamed: 0,Sharpe,growth_252d
count,73.0,73.0
mean,0.28,1.23
std,0.51,1.48
min,-0.08,0.02
25%,0.04,0.29
50%,0.08,0.76
75%,0.29,1.45
max,2.84,8.1


In [133]:
top_10_sharpe = trading_day.sort_values("Sharpe", ascending=False)[['Ticker']].head(10)

In [134]:
top_10_growth_yoy =trading_day.sort_values("growth_252d", ascending=False)[['Ticker']].head(10)

In [136]:
set(top_10_sharpe['Ticker'].values).intersection(set(top_10_growth_yoy['Ticker'].values))

{'JL'}

In [137]:
set(top_10_sharpe['Ticker'].values).difference(set(top_10_growth_yoy['Ticker'].values))

{'BKHA', 'DYCQ', 'HLXB', 'IBAC', 'INTJ', 'JVSA', 'LEGT', 'MNDR', 'TRSG'}

# Question 3