In [1]:
import time
from datetime import date

import numpy as np
import pandas as pd

import pandas_datareader as pdr
import yfinance as yf

import matplotlib.pyplot as plt

import requests

from io import StringIO

In [2]:
pd.set_option("display.max_rows", None) # show full of showing rows
pd.set_option("display.max_columns", None) # show full of showing cols
pd.set_option("display.max_colwidth", None) # show full width of showing cols

In [3]:
HEADERS = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
}

# 1. IPO Filings Web Scraping and Data Processing

What's the total sum ($m) of 2023 filings that happened on Fridays?

Re-use the [Code Snippet 1] example to get the data from web for this endpoint: https://stockanalysis.com/ipos/filings/ Convert the 'Filing Date' to datetime(), 'Shares Offered' to float64 (if '-' is encountered, populate with NaNs). Define a new field 'Avg_price' based on the "Price Range", which equals to NaN if no price is specified, to the price (if only one number is provided), or to the average of 2 prices (if a range is given). You may be inspired by the function extract_numbers() in [Code Snippet 4], or you can write your own function to "parse" a string. Define a column "Shares_offered_value", which equals to "Shares Offered" * "Avg_price" (when both columns are defined; otherwise, it's NaN)

Find the total sum in $m (millions of USD, closest INTEGER number) for all filings during 2023, which happened on Fridays (Date.dt.dayofweek()==4). You should see 32 records in total, 25 of it is not null.

(additional: you can read about S-1 IPO filing to understand the context)

In [4]:
URL = "https://stockanalysis.com/ipos/filings/"

response = requests.get(URL, headers=HEADERS)
df_ipo = pd.read_html(StringIO(response.text))[0]
df_ipo.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 326 entries, 0 to 325
Data columns (total 5 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   Filing Date     326 non-null    object
 1   Symbol          326 non-null    object
 2   Company Name    326 non-null    object
 3   Price Range     326 non-null    object
 4   Shares Offered  326 non-null    object
dtypes: object(5)
memory usage: 12.9+ KB


In [5]:
df_ipo.head()

Unnamed: 0,Filing Date,Symbol,Company Name,Price Range,Shares Offered
0,"May 3, 2024",TBN,Tamboran Resources Corporation,-,-
1,"Apr 29, 2024",HWEC,"HW Electro Co., Ltd.",$3.00,3750000
2,"Apr 29, 2024",DTSQ,DT Cloud Star Acquisition Corporation,$10.00,6000000
3,"Apr 26, 2024",EURK,Eureka Acquisition Corp,$10.00,5000000
4,"Apr 26, 2024",HDL,Super Hi International Holding Ltd.,-,-


In [6]:
df_ipo["Filing Date"] = pd.to_datetime(df_ipo["Filing Date"], format="%b %d, %Y")

In [7]:
df_ipo.head()

Unnamed: 0,Filing Date,Symbol,Company Name,Price Range,Shares Offered
0,2024-05-03,TBN,Tamboran Resources Corporation,-,-
1,2024-04-29,HWEC,"HW Electro Co., Ltd.",$3.00,3750000
2,2024-04-29,DTSQ,DT Cloud Star Acquisition Corporation,$10.00,6000000
3,2024-04-26,EURK,Eureka Acquisition Corp,$10.00,5000000
4,2024-04-26,HDL,Super Hi International Holding Ltd.,-,-


In [8]:
df_ipo.replace({"-" : np.nan}, inplace=True)

In [9]:
df_ipo["Shares Offered"] = df_ipo["Shares Offered"].astype("float64")

In [10]:
df_ipo.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 326 entries, 0 to 325
Data columns (total 5 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   Filing Date     326 non-null    datetime64[ns]
 1   Symbol          326 non-null    object        
 2   Company Name    326 non-null    object        
 3   Price Range     258 non-null    object        
 4   Shares Offered  252 non-null    float64       
dtypes: datetime64[ns](1), float64(1), object(3)
memory usage: 12.9+ KB


In [11]:
df_ipo.head(12)

Unnamed: 0,Filing Date,Symbol,Company Name,Price Range,Shares Offered
0,2024-05-03,TBN,Tamboran Resources Corporation,,
1,2024-04-29,HWEC,"HW Electro Co., Ltd.",$3.00,3750000.0
2,2024-04-29,DTSQ,DT Cloud Star Acquisition Corporation,$10.00,6000000.0
3,2024-04-26,EURK,Eureka Acquisition Corp,$10.00,5000000.0
4,2024-04-26,HDL,Super Hi International Holding Ltd.,,
5,2024-04-22,DRJT,Derun Group Inc,$5.00,
6,2024-04-19,GPAT,GP-Act III Acquisition Corp.,$10.00,25000000.0
7,2024-04-16,JLJT,Jialiang Holdings Ltd,$5.00,
8,2024-04-15,GAUZ,Gauzy Ltd.,,
9,2024-04-12,BOW,Bowhead Specialty Holdings Inc.,,


In [98]:
def avg_price(value) -> float:
    
    if isinstance(value, str):
        if "-" in value:
            lower, higher = value.split(" - ")
            return (float(lower.replace("$", "")) + float(higher.replace("$", ""))) / 2
        elif "$" in value:
            return value.split("$")[1]
    else:
        return np.nan


In [106]:
df_ipo["Avg Price"] = df_ipo["Price Range"].apply(lambda x: avg_price(x))
df_ipo["Avg Price"] = df_ipo["Avg Price"].astype("float64")

In [107]:
df_ipo.head(20)

Unnamed: 0,Filing Date,Symbol,Company Name,Price Range,Shares Offered,Avg Price
0,2024-05-03,TBN,Tamboran Resources Corporation,,,
1,2024-04-29,HWEC,"HW Electro Co., Ltd.",$3.00,3750000.0,3.0
2,2024-04-29,DTSQ,DT Cloud Star Acquisition Corporation,$10.00,6000000.0,10.0
3,2024-04-26,EURK,Eureka Acquisition Corp,$10.00,5000000.0,10.0
4,2024-04-26,HDL,Super Hi International Holding Ltd.,,,
5,2024-04-22,DRJT,Derun Group Inc,$5.00,,5.0
6,2024-04-19,GPAT,GP-Act III Acquisition Corp.,$10.00,25000000.0,10.0
7,2024-04-16,JLJT,Jialiang Holdings Ltd,$5.00,,5.0
8,2024-04-15,GAUZ,Gauzy Ltd.,,,
9,2024-04-12,BOW,Bowhead Specialty Holdings Inc.,,,


In [108]:
df_ipo.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 326 entries, 0 to 325
Data columns (total 6 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   Filing Date     326 non-null    datetime64[ns]
 1   Symbol          326 non-null    object        
 2   Company Name    326 non-null    object        
 3   Price Range     258 non-null    object        
 4   Shares Offered  252 non-null    float64       
 5   Avg Price       258 non-null    float64       
dtypes: datetime64[ns](1), float64(2), object(3)
memory usage: 15.4+ KB


In [109]:
df_ipo["Shares Offered Value"] = df_ipo["Shares Offered"] * df_ipo["Avg Price"]

In [110]:
df_ipo.head(20)

Unnamed: 0,Filing Date,Symbol,Company Name,Price Range,Shares Offered,Avg Price,Shares Offered Value
0,2024-05-03,TBN,Tamboran Resources Corporation,,,,
1,2024-04-29,HWEC,"HW Electro Co., Ltd.",$3.00,3750000.0,3.0,11250000.0
2,2024-04-29,DTSQ,DT Cloud Star Acquisition Corporation,$10.00,6000000.0,10.0,60000000.0
3,2024-04-26,EURK,Eureka Acquisition Corp,$10.00,5000000.0,10.0,50000000.0
4,2024-04-26,HDL,Super Hi International Holding Ltd.,,,,
5,2024-04-22,DRJT,Derun Group Inc,$5.00,,5.0,
6,2024-04-19,GPAT,GP-Act III Acquisition Corp.,$10.00,25000000.0,10.0,250000000.0
7,2024-04-16,JLJT,Jialiang Holdings Ltd,$5.00,,5.0,
8,2024-04-15,GAUZ,Gauzy Ltd.,,,,
9,2024-04-12,BOW,Bowhead Specialty Holdings Inc.,,,,


In [117]:
df_ipo[(df_ipo["Filing Date"].dt.year == 2023) & (df_ipo["Filing Date"].dt.dayofweek == 4)]["Shares Offered Value"].shape

(32,)

In [116]:
df_ipo[(df_ipo["Filing Date"].dt.year == 2023) & (df_ipo["Filing Date"].dt.dayofweek == 4)]["Shares Offered Value"].notna().sum()

25

In [124]:
f"${df_ipo[(df_ipo['Filing Date'].dt.year == 2023) & (df_ipo['Filing Date'].dt.dayofweek == 4)]['Shares Offered Value'].sum()/1000000}"

'$285.7'