Question 1: [IPO] Withdrawn IPOs by Company Type

In [7]:
import ssl
import pandas as pd
from urllib.request import urlopen
import time
import urllib.request
from urllib.error import HTTPError, URLError

In [177]:
def get_table(url):
    # Create SSL context for https
    context = ssl._create_unverified_context()
    
    # Set up headers to mimic a browser request
    headers = {
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }
    
    try:
        # Create a Request object with headers
        req = urllib.request.Request(url, headers=headers)
        
        # Open URL with SSL context
        with urllib.request.urlopen(req, context=context) as response:
            # Add a small delay to be respectful to the server
            time.sleep(1)
            
            # Read the HTML content
            html = response.read()
            
            # Parse tables from HTML
            tables = pd.read_html(html)
            
            if len(tables) > 0:
                return tables[0]
            else:
                print("No tables found in the webpage")
                return None
                
    except HTTPError as e:
        print(f"HTTP Error: {e.code} - {e.reason}")
        return None
    except URLError as e:
        print(f"URL Error: {e.reason}")
        return None
    except Exception as e:
        print(f"An error occurred: {str(e)}")
        return None




In [None]:
# URL for withdrawn IPOs
url = "https://stockanalysis.com/ipos/withdrawn/"

# Fetch the data
df = get_table(url)

# Check if the DataFrame is not None and print its length
print(f'total number of withdrawn IPOs: {len(df)}' if df is not None else 'No data retrieved.')

# Display the first few rows of the DataFrame
df.head()

total number of withdrawn IPOs: 100


Unnamed: 0,Symbol,Company Name,Price Range,Shares Offered
0,ODTX,"Odyssey Therapeutics, Inc.",-,-
1,UNFL,"Unifoil Holdings, Inc.",$3.00 - $4.00,2000000
2,AURN,"Aurion Biotech, Inc.",-,-
3,ROTR,"PHI Group, Inc.",-,-
4,ONE,One Power Company,-,-


In [159]:
# Classify companies based on their names without using numpy
def classify_company(name):
    name_lower = name.lower()
    if "acquisition" in name_lower:
        return "Acq.Corp"
    elif "inc." in name_lower:
        return "Inc"
    elif "group" in name_lower:
        return "Group"
    elif "holdings" in name_lower:
        return "Holdings"
    elif "ltd" in name_lower:
        return "Ltd"
    elif "limited" in name_lower:
        return "Ltd"
    else:
        return "Other"



In [160]:
result = df["Company Name"].apply(classify_company)
result
# Assign the result to a new column in the DataFrame
df["Company Class"] = result
# Display unique values in the "Company Class" column
set(df["Company Class"].values)

{'Acq.Corp', 'Group', 'Holdings', 'Inc', 'Ltd', 'Other'}

In [106]:
# Parse the "Price Range" column to extract average price
def parse_price_range(price_range):
    # Check if the price range is a string and contains a dash
    if isinstance(price_range, str) and "-" in price_range:
        parts = price_range.replace("$", "").split(" - ")
        try:
            numbers = [float(p.strip()) for p in parts if p.strip().replace('.', '', 1).isdigit()]
            if len(numbers) == 2:
                return np.mean(numbers)
        except Exception:
            # If there's an error in conversion, return NaN
            return np.nan
    # If the price range is a single value, convert it to float
    elif isinstance(price_range, str) and price_range.startswith("$"):
        return float(price_range.replace("$", "").strip())
    # If the price range is not valid, return NaN
    return np.nan

df["Price Avg"] = df["Price Range"].apply(parse_price_range).replace(np.nan, 0)

df.head(8)

Unnamed: 0,Symbol,Company Name,Price Range,Shares Offered,Company Class,Price Avg,Shares Offered Clean
0,ODTX,"Odyssey Therapeutics, Inc.",-,-,Inc,0.0,0.0
1,UNFL,"Unifoil Holdings, Inc.",$3.00 - $4.00,2000000,Inc,3.5,2000000.0
2,AURN,"Aurion Biotech, Inc.",-,-,Inc,0.0,0.0
3,ROTR,"PHI Group, Inc.",-,-,Inc,0.0,0.0
4,ONE,One Power Company,-,-,Other,0.0,0.0
5,HPOT,The Great Restaurant Development Holdings Limited,$4.00 - $6.00,1400000,Holdings,5.0,1400000.0
6,CABR,"Caring Brands, Inc.",$4.00,750000,Inc,4.0,750000.0
7,SQVI,"Sequoia Vaccines, Inc.",$8.00 - $10.00,2775000,Inc,9.0,2775000.0


In [None]:
# Parse the "Shares Offered" column to clean and convert it to float
df["Shares Offered Clean"] = (
	df["Shares Offered"]
	.replace("-", np.nan)
	.str.replace(",", "")
	.astype(float)
.replace(np.nan, 0))


In [169]:
df["Withdraw Value"] = (df["Shares Offered Clean"] * df["Price Avg"])

print(f'Total Record Withdraw Value greater than 0: {len(df[df["Withdraw Value"] > 0])}')
df[df["Withdraw Value"] > 0]

Total Record Withdraw Value greater than 0: 71


Unnamed: 0,Symbol,Company Name,Price Range,Shares Offered,Company Class,Price Avg,Shares Offered Clean,Withdraw Value
1,UNFL,"Unifoil Holdings, Inc.",$3.00 - $4.00,2000000,Inc,3.5,2000000.0,7000000.0
5,HPOT,The Great Restaurant Development Holdings Limited,$4.00 - $6.00,1400000,Holdings,5.0,1400000.0,7000000.0
6,CABR,"Caring Brands, Inc.",$4.00,750000,Inc,4.0,750000.0,3000000.0
7,SQVI,"Sequoia Vaccines, Inc.",$8.00 - $10.00,2775000,Inc,9.0,2775000.0,24975000.0
8,SNI,Shenni Holdings Limited,$4.00 - $6.00,3000000,Holdings,5.0,3000000.0,15000000.0
...,...,...,...,...,...,...,...,...
92,DPAC,Deep Space Acquisition Corp. I,$10.00,21000000,Acq.Corp,10.0,21000000.0,210000000.0
93,GIF,"GigCapital6, Inc.",$10.00,20000000,Inc,10.0,20000000.0,200000000.0
94,HYIV,Haymaker Acquisition Corp. IV,$10.00,26100000,Acq.Corp,10.0,26100000.0,261000000.0
97,IFIT,iFIT Health & Fitness Inc.,$18.00 - $21.00,30769231,Inc,19.5,30769231.0,600000004.5


In [175]:
# Group by 'Company Class' and sum the 'Withdraw Value'
withdrawn_value_by_class = df.groupby("Company Class")["Withdraw Value"].sum().sort_values(ascending=False)
withdrawn_value_by_class_formatted = withdrawn_value_by_class.apply(lambda x: f"${x:,.2f}")

# Display the results
print("Withdrawn Value by Company Class:")
print(withdrawn_value_by_class_formatted)

# highest withdrawn value by company class
print(f"\nHighest Withdrawn Value by Company Class: ${withdrawn_value_by_class.max():,.2f}")



Withdrawn Value by Company Class:
Company Class
Acq.Corp    $4,021,000,000.00
Inc         $2,257,164,204.85
Other         $767,919,999.00
Ltd           $321,734,584.75
Holdings      $303,000,000.50
Group          $33,787,500.00
Name: Withdraw Value, dtype: object

Highest Withdrawn Value by Company Class: $4,021,000,000.00


Question 2: [IPO] Median Sharpe Ratio for 2024 IPOs (First 5 Months)


In [545]:
url = "https://stockanalysis.com/ipos/2024/"

# Fetch the data
ipo_data = get_table(url)


In [546]:
ipo_data["IPO Date cleaned"] = pd.to_datetime(ipo_data["IPO Date"])

ipo_data_cleaned = ipo_data[(ipo_data["IPO Date cleaned"] < "2024-06-01") & (ipo_data["IPO Price"] != '-')]


In [547]:
# print the number of records in the cleaned DataFrame
print(f'Total number of IPOs before cleaning: {len(ipo_data)}')
print(f'Total number of IPOs after cleaning: {len(ipo_data_cleaned)}')

Total number of IPOs before cleaning: 225
Total number of IPOs after cleaning: 75


In [551]:
# Calculate growth and volatility for a given ticker
def calc_growth(ticker, data, period_days):
    r = data[ticker][data.index >= data[ticker].first_valid_index()]
    #print(f"Calculating growth for {ticker} over the last {period_days} days.")
    try:
        return r[period_days] /r[0]
    except IndexError:
        #print(f"IndexError: {ticker} does not have enough data for the specified period.")
        return None

def calc_voltatility(ticker, data, volatility_date):
    tr = data[ticker][data.index >= data[ticker].first_valid_index()]
    r = tr.rolling(30).std() * np.sqrt(252)

    return r[r.index == volatility_date].values[0]

In [549]:
import yfinance as yf
#get data from yahoo finance
index_tickers = dict(zip(ipo_data_cleaned['Company Name'], ipo_data_cleaned['Symbol'])) 

tickers = list(index_tickers.values())
history_data = yf.download(tickers=tickers, interval='1d',period='max')['Close']

[*********************100%***********************]  75 of 75 completed


In [None]:
history_data

In [None]:
voltatility_date = '2025-06-06'
# Calculate growth for a given ticker
for i in [1, 3, 7, 30, 90, 252]:
    ipo_data_cleaned[f'growth_{i}d'] = ipo_data_cleaned['Symbol'].apply(lambda x: calc_growth(x, history_data, i))

# Calculate volatility for a given ticker
ipo_data_cleaned['volatility'] = ipo_data_cleaned['Symbol'].apply(lambda x: calc_voltatility(x, history_data, voltatility_date))

#calculate sharpe ratio
ipo_data_cleaned['Sharpe Ratio'] = ipo_data_cleaned['growth_252d'] / ipo_data_cleaned['volatility']

In [None]:

ipo_data_cleaned.growth_252d.describe()

count    71.000000
mean      0.900859
std       0.892906
min       0.038947
25%       0.229211
50%       0.634667
75%       1.136392
max       4.849711
Name: growth_252d, dtype: float64

In [None]:
ipo_data_cleaned['Sharpe Ratio'].describe()

count    71.000000
mean      0.271087
std       0.532052
min       0.006326
25%       0.039845
50%       0.084900
75%       0.228232
max       2.961827
Name: Sharpe Ratio, dtype: float64

In [None]:
ipo_data_sort_by_growth252d = ipo_data_cleaned.sort_values(by='growth_252d', ascending=False)

ipo_data_sort_by_growth252d.head(10)

Unnamed: 0,IPO Date,Symbol,Company Name,IPO Price,Current,Return,IPO Date cleaned,growth_1d,growth_3d,growth_7d,growth_30d,growth_90d,growth_252d,volatility,Sharpe Ratio
159,"May 8, 2024",NNE,NANO Nuclear Energy Inc.,$4.00,$34.25,756.63%,2024-05-08,0.973025,0.795761,0.755299,3.030828,2.387283,4.849711,57.122852,0.0849
198,"Feb 14, 2024",UMAC,"Unusual Machines, Inc.",$4.00,$7.98,99.50%,2024-02-14,1.155116,0.830033,0.955446,0.577558,0.455446,3.617162,11.681615,0.309646
189,"Mar 21, 2024",RDDT,"Reddit, Inc.",$34.00,$115.03,238.32%,2024-03-21,0.911975,1.290841,1.000198,0.924663,1.206384,2.486717,111.575768,0.022287
164,"Apr 25, 2024",MRX,Marex Group plc,$19.00,$39.82,109.58%,2024-04-25,1.000526,1.014218,1.008425,1.053712,1.337162,2.388211,29.466621,0.081048
209,"Feb 1, 2024",AS,"Amer Sports, Inc.",$13.00,$37.74,190.31%,2024-02-01,1.115672,1.129105,1.112687,1.129105,1.067911,2.362687,86.819697,0.027214
205,"Feb 7, 2024",AHR,"American Healthcare REIT, Inc.",$12.00,$35.90,199.17%,2024-02-07,0.987897,0.994705,1.049168,1.072617,1.141479,2.273312,21.657313,0.104967
212,"Jan 26, 2024",BTSG,"BrightSpring Health Services, Inc.",$13.00,$21.89,68.39%,2024-01-26,0.96,1.001818,1.03,0.84,1.05,2.159091,37.062851,0.058255
173,"Apr 18, 2024",MTEN,Mingteng International Corporation Inc.,$4.00,$12.00,200.00%,2024-04-18,0.959459,1.081081,1.121622,1.527027,1.435135,2.043243,40.703293,0.050198
163,"Apr 25, 2024",LOAR,Loar Holdings Inc.,$28.00,$83.92,199.71%,2024-04-25,1.018648,1.072131,1.047131,1.210246,1.453484,1.978484,86.076,0.022985
165,"Apr 25, 2024",RBRK,"Rubrik, Inc.",$32.00,$88.86,177.69%,2024-04-25,1.027027,0.865405,0.942432,0.914324,0.881892,1.911892,164.768721,0.011603


In [None]:
ipo_data_sort_by_sharpe_ratio = ipo_data_cleaned.sort_values(by='Sharpe Ratio', ascending=False)

ipo_data_sort_by_sharpe_ratio.head(10)

Unnamed: 0,IPO Date,Symbol,Company Name,IPO Price,Current,Return,IPO Date cleaned,growth_1d,growth_3d,growth_7d,growth_30d,growth_90d,growth_252d,volatility,Sharpe Ratio
187,"Mar 21, 2024",BKHA,Black Hawk Acquisition Corporation,$10.00,$10.64,6.40%,2024-03-21,1.0,0.998026,1.0,1.001974,1.00691,1.04541,0.352961,2.961827
218,"Jan 19, 2024",JVSA,JVSPAC Acquisition Corp.,$10.00,$10.85,8.50%,2024-01-19,1.000499,1.000997,1.002991,1.005982,1.014955,1.065803,0.502601,2.120573
206,"Feb 6, 2024",LEGT,Legato Merger Corp. III,$10.00,$10.63,6.33%,2024-02-06,1.000995,1.000995,1.000995,1.004975,1.010945,1.037811,0.517665,2.004795
185,"Mar 26, 2024",IBAC,IB Acquisition Corp.,$10.00,$10.43,4.30%,2024-03-26,0.99499,0.995992,1.001002,0.998998,1.006012,1.038076,0.610592,1.700114
199,"Feb 9, 2024",HLXB,Helix Acquisition Corp. II,$10.00,$10.83,8.30%,2024-02-09,1.035329,1.02159,1.011776,1.00687,1.000981,1.059863,0.904682,1.171531
196,"Feb 21, 2024",DYCQ,DT Cloud Acquisition Corporation,$10.00,$10.83,8.30%,2024-02-21,1.001485,1.00198,1.00198,1.006931,1.018317,1.058416,1.046983,1.01092
222,"Jan 12, 2024",SYNX,Silynxcom Ltd.,$4.00,$1.70,-56.00%,2024-01-12,1.01108,0.980609,0.798061,0.989474,0.700831,1.177285,1.368371,0.860355
180,"Apr 2, 2024",MAMO,Massimo Group,$4.50,$2.18,-51.56%,2024-04-02,1.136213,1.299003,1.335548,1.365449,1.227575,0.893688,1.506475,0.593231
215,"Jan 25, 2024",YIBO,Planet Image International Limited,$4.00,$1.40,-65.00%,2024-01-25,0.97491,0.953405,0.727599,0.913978,0.827957,1.842294,3.99195,0.461502
175,"Apr 16, 2024",TWG,Top Wealth Group Holding Limited,$4.00,$0.14,-96.40%,2024-04-16,1.069364,1.50289,0.872832,0.589595,0.456647,0.079191,0.232289,0.340915


In [None]:
import time

stocks_df = pd.DataFrame({'A' : []})

for i,ticker in enumerate(tickers):
  #print(i,ticker)

  # Work with stock prices
  ticker_obj = yf.Ticker(ticker)

  # historyPrices = yf.download(tickers = ticker,
  #                    period = "max",
  #                    interval = "1d")
  historyPrices = ticker_obj.history(
                     period = "max",
                     interval = "1d")

  # generate features for historical prices, and what we want to predict
  historyPrices['Ticker'] = ticker
  historyPrices['Year']= historyPrices.index.year
  historyPrices['Month'] = historyPrices.index.month
  historyPrices['Weekday'] = historyPrices.index.weekday
  historyPrices['Date'] = historyPrices.index.date

  # historical returns
  for i in [1,3,7,30,90,252]:
    historyPrices['growth_'+str(i)+'d'] = historyPrices['Close'] / historyPrices['Close'].shift(i)
  historyPrices['growth_future_30d'] = historyPrices['Close'].shift(-5) / historyPrices['Close']

  # future growth
  for i in 12:
    historyPrices['future_growth_'+str(i)+'d'] = historyPrices['Close'] / historyPrices['Close'].shift(i*21)

  # Technical indicators
  # SimpleMovingAverage 10 days and 20 days
  historyPrices['SMA10']= historyPrices['Close'].rolling(10).mean()
  historyPrices['SMA20']= historyPrices['Close'].rolling(20).mean()
  historyPrices['growing_moving_average'] = np.where(historyPrices['SMA10'] > historyPrices['SMA20'], 1, 0)
  historyPrices['high_minus_low_relative'] = (historyPrices.High - historyPrices.Low) / historyPrices['Close']

  # 30d rolling volatility : https://ycharts.com/glossary/terms/rolling_vol_30
  historyPrices['volatility'] =   historyPrices['Close'].rolling(30).std() * np.sqrt(252)

  # what we want to predict
  historyPrices['is_positive_growth_30d_future'] = np.where(historyPrices['growth_future_30d'] > 1, 1, 0)

  # sleep 1 sec between downloads - not to overload the API server
  time.sleep(1)


  if stocks_df.empty:
    stocks_df = historyPrices
  else:
    stocks_df = pd.concat([stocks_df, historyPrices], ignore_index=True)

0 BOW
1 HDL
2 RFAI
3 JDZG
4 RAY
5 BTOC
6 ZK
7 GPAT
8 PAL
9 SVCO
10 NNE
11 CCIX
12 VIK
13 ZONE
14 LOAR
15 MRX
16 RBRK
17 NCI
18 MFI
19 YYGH
20 TRSG
21 CDTG
22 CTRI
23 IBTA
24 MTEN
25 TWG
26 ULS
27 PACS
28 MNDR
29 CTNM
30 MAMO
31 ZBAO
32 BOLD
33 MMA
34 UBXG
35 IBAC
36 AUNA
37 BKHA
38 LOBO
39 RDDT
40 ALAB
41 INTJ
42 RYDE
43 LGCL
44 SMXT
45 VHAI
46 DYCQ
47 CHRO
48 UMAC
49 HLXB
50 MGX
51 TBBB
52 TELO
53 KYTX
54 PMNT
55 AHR
56 LEGT
57 ANRO
58 GUTS
59 AS
60 FBLG
61 AVBP
62 BTSG
63 HAO
64 CGON
65 YIBO
66 JL
67 SUGP
68 JVSA
69 KSPI
70 CCTG
71 PSBD
72 SYNX
73 SDHC
74 ROMA


In [455]:
check_df = stocks_df[(pd.to_datetime(stocks_df.Date) == '2025-06-06')]


In [511]:
check_df.growth_252d.describe()

count    71.000000
mean      1.152897
std       1.406017
min       0.024970
25%       0.293422
50%       0.758065
75%       1.362736
max       8.097413
Name: growth_252d, dtype: float64

In [515]:
# print the descriptive statistics of the Sharpe ratio
print(check_df['Sharpe'].describe())

#print the median of the Sharpe ratio
print(f"Median Sharpe Ratio: {check_df['Sharpe'].median()}")

count    71.000000
mean      0.301597
std       0.529685
min      -0.079677
25%       0.041215
50%       0.083768
75%       0.335681
max       2.835668
Name: Sharpe, dtype: float64
Median Sharpe Ratio: 0.0837682300329437


Question 3: [IPO] ‘Fixed Months Holding Strategy’


In [522]:
# Calculate growth and volatility for a given ticker
def first_trading_date(ticker, data):
    r = data[ticker][data.index >= data[ticker].first_valid_index()]
    
    try:
        return r.index[0].date()
    except IndexError:
        #print(f"IndexError: {ticker} does not have enough data for the specified period.")
        return None

In [534]:
first_trading_date('SDHC', history_data)

datetime.date(2024, 1, 16)

In [None]:
# Calculate future growth for a given ticker
for i in range(12):
    i += 1
    ipo_data_cleaned[f'future_growth_{i}m'] = ipo_data_cleaned['Symbol'].apply(lambda x: calc_growth(x, history_data, i*21))
    

ipo_data_cleaned['first_trading_date'] = ipo_data_cleaned['Symbol'].apply(lambda x: first_trading_date(x, history_data))


In [564]:
# Calculate the mean future growth for each period
keys = []
values = [] 
ipo_data_cleaned[f'future_growth_1m'].mean()
for i in range(12):
    i += 1
    keys.append(f'future_growth_{i}m')
    values.append(ipo_data_cleaned[f'future_growth_{i}m'].mean())

future_growth_df = pd.DataFrame({'future_growth_period': keys, 'mean': values})

# Sort the DataFrame by mean future growth
future_growth_df.sort_values(by='mean', ascending=False, inplace=True)

#print optinmal number of months for future growth
print(f"Optimal number of months for future growth: {future_growth_df.head(1)['future_growth_period'].values[0]} with mean {future_growth_df.head(1)['mean'].values[0]}")


Optimal number of months for future growth: future_growth_2m with mean 0.9405443035211186
