Q1

In [7]:
import ssl
import pandas as pd
from urllib.request import urlopen
import time
import urllib.request
from urllib.error import HTTPError, URLError

In [None]:
def get_withdrawn_ipos(url):
    # Create SSL context for https
    context = ssl._create_unverified_context()
    
    # Set up headers to mimic a browser request
    headers = {
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }
    
    try:
        # Create a Request object with headers
        req = urllib.request.Request(url, headers=headers)
        
        # Open URL with SSL context
        with urllib.request.urlopen(req, context=context) as response:
            # Add a small delay to be respectful to the server
            time.sleep(1)
            
            # Read the HTML content
            html = response.read()
            
            # Parse tables from HTML
            tables = pd.read_html(html)
            
            if len(tables) > 0:
                return tables[0]
            else:
                print("No tables found in the webpage")
                return None
                
    except HTTPError as e:
        print(f"HTTP Error: {e.code} - {e.reason}")
        return None
    except URLError as e:
        print(f"URL Error: {e.reason}")
        return None
    except Exception as e:
        print(f"An error occurred: {str(e)}")
        return None




In [98]:
# URL for withdrawn IPOs
url = "https://stockanalysis.com/ipos/withdrawn/"

# Fetch the data
df = get_withdrawn_ipos(url)

# Check if the DataFrame is not None and print its length
print(f'total number of withdrawn IPOs: {len(df)}' if df is not None else 'No data retrieved.')

# Display the first few rows of the DataFrame
df.head()

total number of withdrawn IPOs: 100


Unnamed: 0,Symbol,Company Name,Price Range,Shares Offered
0,ODTX,"Odyssey Therapeutics, Inc.",-,-
1,UNFL,"Unifoil Holdings, Inc.",$3.00 - $4.00,2000000
2,AURN,"Aurion Biotech, Inc.",-,-
3,ROTR,"PHI Group, Inc.",-,-
4,ONE,One Power Company,-,-


In [99]:
import numpy as np

# Classify companies based on their names
conditions = [
    df["Company Name"].str.lower().str.contains("acquisiton"),
    df["Company Name"].str.lower().str.contains("inc."),
    df["Company Name"].str.lower().str.contains("group"),
    df["Company Name"].str.lower().str.contains("holdings"),
    df["Company Name"].str.lower().str.contains("ltd"),
    df["Company Name"].str.lower().str.contains("limited"),
]
choices = ["Acq.Corp", "Inc", "Group", "Holdings", "Ltd", "Ltd"]

# Use np.select to assign company classes based on conditions
result = np.select(conditions, choices, default="Other")
result

# Assign the result to a new column in the DataFrame
df["Company Class"] = result

In [100]:
# Display unique values in the "Company Class" column
set(df["Company Class"].values)

{'Group', 'Holdings', 'Inc', 'Ltd', 'Other'}

In [106]:
# Parse the "Price Range" column to extract average price
def parse_price_range(price_range):
    # Check if the price range is a string and contains a dash
    if isinstance(price_range, str) and "-" in price_range:
        parts = price_range.replace("$", "").split(" - ")
        try:
            numbers = [float(p.strip()) for p in parts if p.strip().replace('.', '', 1).isdigit()]
            if len(numbers) == 2:
                return np.mean(numbers)
        except Exception:
            # If there's an error in conversion, return NaN
            return np.nan
    # If the price range is a single value, convert it to float
    elif isinstance(price_range, str) and price_range.startswith("$"):
        return float(price_range.replace("$", "").strip())
    # If the price range is not valid, return NaN
    return np.nan

df["Price Avg"] = df["Price Range"].apply(parse_price_range).replace(np.nan, 0)

df.head(8)

Unnamed: 0,Symbol,Company Name,Price Range,Shares Offered,Company Class,Price Avg,Shares Offered Clean
0,ODTX,"Odyssey Therapeutics, Inc.",-,-,Inc,0.0,0.0
1,UNFL,"Unifoil Holdings, Inc.",$3.00 - $4.00,2000000,Inc,3.5,2000000.0
2,AURN,"Aurion Biotech, Inc.",-,-,Inc,0.0,0.0
3,ROTR,"PHI Group, Inc.",-,-,Inc,0.0,0.0
4,ONE,One Power Company,-,-,Other,0.0,0.0
5,HPOT,The Great Restaurant Development Holdings Limited,$4.00 - $6.00,1400000,Holdings,5.0,1400000.0
6,CABR,"Caring Brands, Inc.",$4.00,750000,Inc,4.0,750000.0
7,SQVI,"Sequoia Vaccines, Inc.",$8.00 - $10.00,2775000,Inc,9.0,2775000.0


In [104]:
#
df["Shares Offered Clean"] = (
	df["Shares Offered"]
	.replace("-", np.nan)
	.str.replace(",", "")
	.astype(float)
.replace(np.nan, 0))


In [112]:
df["Withdraw Value"] = (df["Shares Offered Clean"] * df["Price Avg"])

print(f'Total Record Withdraw Value greater than 0: {len(df[df["Withdraw Value"] > 0])}')
df[df["Withdraw Value"] > 0]

Total Record Withdraw Value greater than 0: 71


Unnamed: 0,Symbol,Company Name,Price Range,Shares Offered,Company Class,Price Avg,Shares Offered Clean,Withdraw Value
1,UNFL,"Unifoil Holdings, Inc.",$3.00 - $4.00,2000000,Inc,3.5,2000000.0,7000000.0
5,HPOT,The Great Restaurant Development Holdings Limited,$4.00 - $6.00,1400000,Holdings,5.0,1400000.0,7000000.0
6,CABR,"Caring Brands, Inc.",$4.00,750000,Inc,4.0,750000.0,3000000.0
7,SQVI,"Sequoia Vaccines, Inc.",$8.00 - $10.00,2775000,Inc,9.0,2775000.0,24975000.0
8,SNI,Shenni Holdings Limited,$4.00 - $6.00,3000000,Holdings,5.0,3000000.0,15000000.0
...,...,...,...,...,...,...,...,...
92,DPAC,Deep Space Acquisition Corp. I,$10.00,21000000,Other,10.0,21000000.0,210000000.0
93,GIF,"GigCapital6, Inc.",$10.00,20000000,Inc,10.0,20000000.0,200000000.0
94,HYIV,Haymaker Acquisition Corp. IV,$10.00,26100000,Other,10.0,26100000.0,261000000.0
97,IFIT,iFIT Health & Fitness Inc.,$18.00 - $21.00,30769231,Inc,19.5,30769231.0,600000004.5


In [114]:
# Group by 'Company Class' and sum the 'Withdraw Value'
withdrawn_value_by_class = df.groupby("Company Class")["Withdraw Value"].sum().sort_values(ascending=False)

print(withdrawn_value_by_class)

Company Class
Other       4.538920e+09
Inc         2.257164e+09
Ltd         4.967346e+08
Holdings    3.030000e+08
Group       1.087875e+08
Name: Withdraw Value, dtype: float64
