In [63]:
!pip install yfinance



In [64]:
# IMPORTS
import numpy as np
import pandas as pd
import requests
import re


#Fin Data Sources
import yfinance as yf
import pandas_datareader as pdr

#Data viz
import plotly.graph_objs as go
import plotly.express as px

import time
from datetime import date

# for graphs
import matplotlib.pyplot as plt

### Question 1: [IPO] Withdrawn IPOs by Company Type

What is the total withdrawn IPO value (in $ millions) for the company class with the highest total withdrawal value?



From the withdrawn IPO list (stockanalysis.com/ipos/withdrawn), collect and process the data to find out which company type saw the most withdrawn IPO value.

In [65]:
import pandas as pd
import requests
from io import StringIO

def get_withdrawn_ipos(url: str) -> pd.DataFrame:
    """
    Fetch and return HTML tables from the given URL using pandas.
    Returns the first table if multiple are found.
    """
    headers = {
        'User-Agent': (
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
            'AppleWebKit/537.36 (KHTML, like Gecko) '
            'Chrome/58.0.3029.110 Safari/537.3'
        )
    }

    try:
        response = requests.get(url, headers=headers, timeout=10)
        response.raise_for_status()

        html_io = StringIO(response.text)
        tables = pd.read_html(html_io)

        if not tables:
            raise ValueError(f"No tables found on the page {url}.")

        # Return all tables or only the first depending on your needs
        return tables[0]  # or return tables to get all

    except requests.exceptions.RequestException as e:
        print(f"Request failed for {url}: {e}")
    except ValueError as ve:
        print(f"Data error for {url}: {ve}")
    except Exception as ex:
        print(f"An unexpected error occurred for {url}: {ex}")

    return pd.DataFrame()

In [66]:
withdrawn_ipos_df = get_withdrawn_ipos("https://stockanalysis.com/ipos/withdrawn/")

In [67]:
len(withdrawn_ipos_df)

100

In [68]:
withdrawn_ipos_df.head()

Unnamed: 0,Symbol,Company Name,Price Range,Shares Offered
0,ODTX,"Odyssey Therapeutics, Inc.",-,-
1,UNFL,"Unifoil Holdings, Inc.",$3.00 - $4.00,2000000
2,AURN,"Aurion Biotech, Inc.",-,-
3,ROTR,"PHI Group, Inc.",-,-
4,ONE,One Power Company,-,-


In [69]:
withdrawn_ipos_df.columns

Index(['Symbol', 'Company Name', 'Price Range', 'Shares Offered'], dtype='object')

Create a new column called Company Class, categorizing company names based on patterns

In [70]:
def categorize_company_class(df: pd.DataFrame) -> pd.DataFrame:
    """
    Categorizes company names into 'Company Class' based on ordered patterns:
        1. Acquisition Corp / Acquisition Corporation → 'Corp'
        2. Inc / Incorporated → 'Inc'
        3. Group → 'Group'
        4. Ltd / Limited → 'Limited'
        5. Holdings → 'Holdings'
        Others → 'Other'

    Args:
        df (pd.DataFrame): The input DataFrame containing a 'Company Name' column.

    Returns:
        pd.DataFrame: The DataFrame with a new 'Company Class' column.
    """
    if 'Company Name' not in df.columns:
        print("Error: 'Company Name' column not found in the DataFrame.")
        return pd.DataFrame()

    # Ordered list of (class, keywords)
    pattern_order = [
        ('Corp', ['acquisition corp', 'acquisition corporation']),
        ('Inc', ['inc', 'incorporated']),
        ('Group', ['group']),
        ('Limited', ['ltd', 'limited']),
        ('Holdings', ['holdings']),
    ]

    def get_company_class(company_name: str) -> str:
        """Helper function to determine the company class for a single name."""
        if not isinstance(company_name, str):
            return "Other"

        lower_name = company_name.lower()

        for category, keywords in pattern_order:
            for keyword in keywords:
                if keyword in lower_name:
                    return category
        return "Other"

    df['Company Class'] = df['Company Name'].apply(get_company_class)
    return df


In [71]:
categorize_company_class(withdrawn_ipos_df)

Unnamed: 0,Symbol,Company Name,Price Range,Shares Offered,Company Class
0,ODTX,"Odyssey Therapeutics, Inc.",-,-,Inc
1,UNFL,"Unifoil Holdings, Inc.",$3.00 - $4.00,2000000,Inc
2,AURN,"Aurion Biotech, Inc.",-,-,Inc
3,ROTR,"PHI Group, Inc.",-,-,Inc
4,ONE,One Power Company,-,-,Other
...,...,...,...,...,...
95,FHP,"Freehold Properties, Inc.",-,-,Inc
96,CHO,Chobani Inc.,-,-,Inc
97,IFIT,iFIT Health & Fitness Inc.,$18.00 - $21.00,30769231,Inc
98,GLGX,"Gerson Lehrman Group, Inc.",-,-,Inc


In [72]:
withdrawn_ipos_df.groupby('Company Class').size()

Unnamed: 0_level_0,0
Company Class,Unnamed: 1_level_1
Corp,21
Group,4
Holdings,1
Inc,51
Limited,17
Other,6


Define a new field Avg. price by parsing the Price Range field (create a function and apply it to the Price Range column).


In [73]:
def calculate_average_price(df: pd.DataFrame) -> pd.DataFrame:
    """
    Calculates the 'Avg. Price' from the 'Price Range' column in a DataFrame.

    Handles price ranges (e.g., '$8.00-$10.00'), single prices (e.g., '$5.00'),
    and non-available data (e.g., '-').

    Args:
        df (pd.DataFrame): The input DataFrame containing a 'Price Range' column.

    Returns:
        pd.DataFrame: The DataFrame with a new 'Avg. Price' column.
                      Returns an empty DataFrame if 'Price Range' column is missing.
    """
    if 'Price Range' not in df.columns:
        print("Error: 'Price Range' column not found in the DataFrame.")
        return pd.DataFrame()

    def parse_price_range(price_range_str: str):
        """Helper function to parse a single price range string."""
        if not isinstance(price_range_str, str):
            return None

        # Remove '$' and any leading/trailing whitespace
        cleaned_str = price_range_str.replace('$', '').strip()

        if cleaned_str == '-':
            return None
        elif '-' in cleaned_str:
            try:
                # Split the range, convert parts to float, and calculate average
                lower_price, upper_price = map(float, cleaned_str.split('-'))
                return (lower_price + upper_price) / 2.0
            except ValueError:
                # Handle cases where conversion to float fails
                print(f"Warning: Could not parse price range '{price_range_str}'. Returning None.")
                return None
        else:
            try:
                # Single price value
                return float(cleaned_str)
            except ValueError:
                # Handle cases where conversion to float fails
                print(f"Warning: Could not parse single price '{price_range_str}'. Returning None.")
                return None

    # Apply the parsing function to the 'Price Range' column
    df['Avg. Price'] = df['Price Range'].apply(parse_price_range)

    return df

In [74]:
calculate_average_price(withdrawn_ipos_df)

Unnamed: 0,Symbol,Company Name,Price Range,Shares Offered,Company Class,Avg. Price
0,ODTX,"Odyssey Therapeutics, Inc.",-,-,Inc,
1,UNFL,"Unifoil Holdings, Inc.",$3.00 - $4.00,2000000,Inc,3.5
2,AURN,"Aurion Biotech, Inc.",-,-,Inc,
3,ROTR,"PHI Group, Inc.",-,-,Inc,
4,ONE,One Power Company,-,-,Other,
...,...,...,...,...,...,...
95,FHP,"Freehold Properties, Inc.",-,-,Inc,
96,CHO,Chobani Inc.,-,-,Inc,
97,IFIT,iFIT Health & Fitness Inc.,$18.00 - $21.00,30769231,Inc,19.5
98,GLGX,"Gerson Lehrman Group, Inc.",-,-,Inc,


Convert Shares Offered to numeric, clean missing or invalid values.

In [75]:
withdrawn_ipos_df['Shares Offered'] = pd.to_numeric(withdrawn_ipos_df['Shares Offered'], errors='coerce')

Create a new column:
Withdrawn Value = Shares Offered * Avg Price

In [76]:
withdrawn_ipos_df['Withdrawn Value'] = withdrawn_ipos_df['Shares Offered'] * withdrawn_ipos_df['Avg. Price']

In [77]:
withdrawn_ipos_df['Withdrawn Value'].notna().sum()

np.int64(71)

Group by Company Class and calculate total withdrawn value.

In [78]:
withdrawn_ipos_df.groupby('Company Class')['Withdrawn Value'].sum().sort_values(ascending=False)

Unnamed: 0_level_0,Withdrawn Value
Company Class,Unnamed: 1_level_1
Corp,4021000000.0
Inc,2257164000.0
Other,767920000.0
Limited,549734600.0
Holdings,75000000.0
Group,33787500.0


Which class had the highest total value of withdrawals?

The Class Corp had the highest total value of withdrawals eventhough the class inc had the greatest number of IPOs.

### Question 2: [IPO] Median Sharpe Ratio for 2024 IPOs (First 5 Months)

What is the median Sharpe ratio (as of 6 June 2025) for companies that went public in the first 5 months of 2024?

In [79]:
withdrawn_2024_df = get_withdrawn_ipos("https://stockanalysis.com/ipos/2024/")

In [80]:
withdrawn_2024_df.columns

Index(['IPO Date', 'Symbol', 'Company Name', 'IPO Price', 'Current', 'Return'], dtype='object')

In [81]:
# Ensure 'IPO Date' is datetime
withdrawn_2024_df['IPO Date'] = pd.to_datetime(withdrawn_2024_df['IPO Date'], errors='coerce')

# Drop rows with any missing values
withdrawn_2024_df.replace('-', np.nan, inplace=True)
withdrawn_2024_df = withdrawn_2024_df.dropna()

# Filter for IPOs before 1 June 2024
filtered_df = withdrawn_2024_df[withdrawn_2024_df['IPO Date'] < pd.Timestamp("2024-06-01")]

# Optional: Reset index if needed
filtered_df = filtered_df.reset_index(drop=True)

In [82]:
filtered_df

Unnamed: 0,IPO Date,Symbol,Company Name,IPO Price,Current,Return
0,2024-05-23,BOW,Bowhead Specialty Holdings Inc.,$17.00,$36.43,113.88%
1,2024-05-17,HDL,Super Hi International Holding Ltd.,$19.56,$20.13,2.91%
2,2024-05-17,RFAI,RF Acquisition Corp II,$10.00,$10.52,5.20%
3,2024-05-15,JDZG,JIADE Limited,$4.00,$0.29,-92.31%
4,2024-05-15,RAY,Raytech Holding Limited,$4.00,$1.11,-71.00%
...,...,...,...,...,...,...
70,2024-01-18,CCTG,CCSC Technology International Holdings Limited,$6.00,$1.10,-82.00%
71,2024-01-18,PSBD,Palmer Square Capital BDC Inc.,$16.45,$14.15,-15.32%
72,2024-01-12,SYNX,Silynxcom Ltd.,$4.00,$1.83,-57.00%
73,2024-01-11,SDHC,Smith Douglas Homes Corp.,$21.00,$17.74,-14.29%


Download daily stock data for those tickers (via yfinance).
Make sure you understand how growth_1d ... growth_365d, and volatility columns are defined.Define a new column growth_252d representing growth after 252 trading days (~1 year), in addition to any other growth periods you already track.

In [83]:
import time

def get_stock_features(ALL_TICKERS):
    """
    Fetches historical stock data for a list of tickers and generates specified features,
    including a 'Ticker' column for identification.

    Args:
        ALL_TICKERS (list): A list of stock ticker symbols.

    Returns:
        pd.DataFrame: A DataFrame containing historical stock data with engineered features
                      and a 'Ticker' column.
    """
    stocks_df = pd.DataFrame() # Initialize an empty DataFrame

    for i, ticker in enumerate(ALL_TICKERS):
        #print(i, ticker)

        ticker_obj = yf.Ticker(ticker)
        historyPrices = ticker_obj.history(period="max", interval="1d")

        # Add the Ticker column back
        historyPrices['Ticker'] = ticker

        # historical returns
        for j in [1, 3, 7, 30, 90, 252, 365]:
            historyPrices['growth_' + str(j) + 'd'] = historyPrices['Close'] / historyPrices['Close'].shift(j)

        # volataility
        historyPrices['volatility'] = historyPrices['Close'].rolling(30).std() * np.sqrt(252)

        # sleep 1 sec between downloads - not to overload the API server
        time.sleep(1)

        if stocks_df.empty:
            stocks_df = historyPrices
        else:
            stocks_df = pd.concat([stocks_df, historyPrices], ignore_index=False)

    return stocks_df

In [84]:
full_stock_data_df = get_stock_features(filtered_df['Symbol'].unique().tolist())

In [85]:
full_stock_data_df.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits,Ticker,growth_1d,growth_3d,growth_7d,growth_30d,growth_90d,growth_252d,growth_365d,volatility
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2025-06-09 00:00:00-04:00,3.75,3.955,2.6,2.86,381700,0.0,0.0,ROMA,0.772973,0.760638,0.922581,2.454936,4.386503,4.627832,,12.238182
2025-06-10 00:00:00-04:00,2.85,3.31,2.77,2.77,52500,0.0,0.0,ROMA,0.968531,0.77591,0.899351,2.387931,3.743243,5.368217,,11.81036
2025-06-11 00:00:00-04:00,2.75,3.153,2.75,3.0,37400,0.0,0.0,ROMA,1.083032,0.810811,0.892857,2.542373,4.115226,6.0,,11.401742
2025-06-12 00:00:00-04:00,3.021,3.05,2.85,2.88,36300,0.0,0.0,ROMA,0.96,1.006993,0.844575,2.360656,4.241532,5.938144,,10.886753
2025-06-13 00:00:00-04:00,2.87,2.89,2.56,2.755,121938,0.0,0.0,ROMA,0.956597,0.994585,0.732713,2.003636,4.057437,4.833334,,10.398258


Calculate the Sharpe ratio assuming a risk-free rate of 4.5%

In [86]:
full_stock_data_df['Sharpe'] = (full_stock_data_df['growth_252d'] - 0.045) / full_stock_data_df['volatility']

In [87]:
if not isinstance(full_stock_data_df.index, pd.DatetimeIndex):
    full_stock_data_df.index = pd.to_datetime(full_stock_data_df.index)

filtered_data_20250606 = full_stock_data_df.loc['2025-06-06']

In [88]:
filtered_data_20250606[['growth_252d', 'Sharpe']].describe()

Unnamed: 0,growth_252d,Sharpe
count,71.0,71.0
mean,1.152898,0.301597
std,1.406017,0.529685
min,0.02497,-0.079677
25%,0.293422,0.041215
50%,0.758065,0.083768
75%,1.362736,0.335681
max,8.097413,2.835668


    What is the median Sharpe ratio for these 71 stocks?

Based on the describe() output:

    The median (50%) Sharpe ratio for these 71 stocks is 0.083768.


[Additional] Do you observe the same top 10 companies when sorting by growth_252d versus sorting by Sharpe?


In [89]:
# Get top 10 companies by growth_252d
top_10_by_growth = filtered_data_20250606.sort_values(by='growth_252d', ascending=False)['Ticker'].head(10).tolist()
print(f"Top 10 companies by growth_252d:\n{top_10_by_growth}\n")

Top 10 companies by growth_252d:
['JL', 'ROMA', 'UMAC', 'NNE', 'RBRK', 'AHR', 'AS', 'MRX', 'RDDT', 'MTEN']



In [90]:
# Get top 10 companies by Sharpe ratio
top_10_by_sharpe = filtered_data_20250606.sort_values(by='Sharpe', ascending=False)['Ticker'].head(10).tolist()
print(f"Top 10 companies by Sharpe ratio:\n{top_10_by_sharpe}\n")

Top 10 companies by Sharpe ratio:
['BKHA', 'JVSA', 'LEGT', 'IBAC', 'NCI', 'HLXB', 'MNDR', 'DYCQ', 'INTJ', 'JL']



In [91]:
# Compare the two lists
common_companies = set(top_10_by_growth).intersection(set(top_10_by_sharpe))
print(f"There are {len(common_companies)} common companies in the top 10 lists:")
print(common_companies)

There are 1 common companies in the top 10 lists:
{'JL'}


### Question 3: [IPO] ‘Fixed Months Holding Strategy’
What is the optimal number of months (1 to 12) to hold a newly IPO'd stock in order to maximize average growth?
(Assume you buy at the close of the first trading day and sell after a fixed number of trading days.)

In [92]:
def get_stock_data_with_future_growth(ALL_TICKERS):
    """
    Fetches historical stock data for a list of tickers and generates specified features,
    including past growth, volatility, ticker identification, and future growth over 1-12 months.

    Args:
        ALL_TICKERS (list): A list of stock ticker symbols.

    Returns:
        pd.DataFrame: A DataFrame containing historical stock data with engineered features,
                      including future growth columns, and a DatetimeIndex for date tracking.
    """
    stocks_df = pd.DataFrame() # Initialize an empty DataFrame

    # Define the number of trading days for future growth calculations (1 to 12 months)
    # Assuming 1 month = 21 trading days
    future_growth_days = [i * 21 for i in range(1, 13)] # [21, 42, 63, ..., 252]

    for i, ticker in enumerate(ALL_TICKERS):
        # print(f"{i+1}/{len(ALL_TICKERS)}: Downloading {ticker} data...")

        ticker_obj = yf.Ticker(ticker)
        historyPrices = ticker_obj.history(period="max", interval="1d")

        if historyPrices.empty:
            print(f"Warning: No historical data found for {ticker}. Skipping.")
            time.sleep(1) # Still sleep to avoid hammering API
            continue

        # Add the Ticker column
        historyPrices['Ticker'] = ticker

        # NEW: Add future growth columns (1 to 12 months)
        for m, days_shifted in enumerate(future_growth_days):
            col_name = f'future_growth_{m+1}m'
            # Calculate future growth: Close price 'days_shifted' days in the future divided by current Close
            historyPrices[col_name] = historyPrices['Close'].shift(-days_shifted) / historyPrices['Close']

        # Sleep 1 sec between downloads - not to overload the API server
        time.sleep(1)

        if stocks_df.empty:
            stocks_df = historyPrices
        else:
            # Using ignore_index=False to preserve the DatetimeIndex for filtering by date
            stocks_df = pd.concat([stocks_df, historyPrices], ignore_index=False)

    return stocks_df

In [93]:
ipo_full_stock_data_df = get_stock_data_with_future_growth(filtered_df['Symbol'].unique().tolist())

In [94]:
print(ipo_full_stock_data_df.head())
print(ipo_full_stock_data_df.columns)

                                Open       High        Low      Close  \
Date                                                                    
2024-05-23 00:00:00-04:00  23.000000  24.270000  22.139999  23.799999   
2024-05-24 00:00:00-04:00  24.260000  26.150000  23.980000  25.700001   
2024-05-28 00:00:00-04:00  25.850000  26.879999  25.075001  26.480000   
2024-05-29 00:00:00-04:00  26.440001  26.490000  25.500999  26.290001   
2024-05-30 00:00:00-04:00  27.209999  27.209999  25.500000  26.139999   

                            Volume  Dividends  Stock Splits Ticker  \
Date                                                                 
2024-05-23 00:00:00-04:00  3335800        0.0           0.0    BOW   
2024-05-24 00:00:00-04:00   990500        0.0           0.0    BOW   
2024-05-28 00:00:00-04:00   555100        0.0           0.0    BOW   
2024-05-29 00:00:00-04:00   302700        0.0           0.0    BOW   
2024-05-30 00:00:00-04:00   200900        0.0           0.0    BOW  

Determine the first trading day (min_date) for each ticker.
This is the earliest date in the data for each stock.

In [95]:
if not isinstance(ipo_full_stock_data_df.index, pd.DatetimeIndex):
    ipo_full_stock_data_df.index = pd.to_datetime(ipo_full_stock_data_df.index)

# Group by 'Ticker' and find the minimum date (from the index) for each
first_trading_days = ipo_full_stock_data_df.groupby('Ticker').apply(lambda x: x.index.min(), include_groups=False)
print("First Trading Day for each Ticker:")
print(first_trading_days.head()) # Print first few entries to verify
print(f"\nTotal tickers: {len(first_trading_days)}")

First Trading Day for each Ticker:
Ticker
AHR    2024-02-07 00:00:00-05:00
ALAB   2024-03-20 00:00:00-04:00
ANRO   2024-02-02 00:00:00-05:00
AS     2024-02-01 00:00:00-05:00
AUNA   2024-03-22 00:00:00-04:00
dtype: datetime64[ns, America/New_York]

Total tickers: 75


Join the data:
Perform an inner join between the min_date DataFrame and the future growth data on both ticker and date.

In [96]:
# 1. Prepare ipo_full_stock_data_df: Reset index to make 'Date' a column
ipo_full_stock_data_df_reset = ipo_full_stock_data_df.reset_index()
ipo_full_stock_data_df_reset.rename(columns={'index': 'Date'}, inplace=True)

In [97]:
# 2. Prepare first_trading_days: Convert Series to DataFrame and rename columns
first_trading_days_df = first_trading_days.reset_index()
first_trading_days_df.columns = ['Ticker', 'IPO_Date']

In [98]:
# Ensure 'Date' and 'IPO_Date' columns are of datetime type for accurate joining
ipo_full_stock_data_df_reset['Date'] = pd.to_datetime(ipo_full_stock_data_df_reset['Date'])
first_trading_days_df['IPO_Date'] = pd.to_datetime(first_trading_days_df['IPO_Date'])

In [99]:
merged_data = pd.merge(
    ipo_full_stock_data_df_reset,
    first_trading_days_df,
    how='inner',
    left_on=['Ticker', 'Date'],
    right_on=['Ticker', 'IPO_Date']
)

merged_data.drop(columns=['IPO_Date'], inplace=True)

In [100]:
print(f"\nShape of the resulting DataFrame: {merged_data.shape}")


Shape of the resulting DataFrame: (75, 21)




Compute descriptive statistics for the resulting DataFrame:
Use .describe() or similar to analyze each of the 12 columns:

    future_growth_1m
    future_growth_2m
    ...
    future_growth_12m



In [101]:
future_growth_columns = [f'future_growth_{i}m' for i in range(1, 13)]
merged_data[future_growth_columns].describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
future_growth_1m,75.0,0.927259,0.346261,0.098947,0.778984,0.977,1.046509,2.646505
future_growth_2m,75.0,0.940749,0.574267,0.0738,0.685815,1.0,1.154013,4.874759
future_growth_3m,75.0,0.833988,0.40948,0.060947,0.511212,0.9275,1.069085,2.04
future_growth_4m,75.0,0.825192,0.401772,0.045368,0.517233,0.909091,1.1343,1.605
future_growth_5m,75.0,0.803849,0.488226,0.054109,0.448403,0.821092,1.016381,3.213873
future_growth_6m,75.0,0.864267,0.653078,0.061432,0.38456,0.802239,1.093948,3.67052
future_growth_7m,75.0,0.847205,0.71288,0.048274,0.29687,0.844875,1.114468,5.12235
future_growth_8m,75.0,0.833048,0.762356,0.043103,0.208677,0.812109,1.082433,5.171484
future_growth_9m,75.0,0.881849,0.936895,0.033144,0.22674,0.822715,1.049719,6.764933
future_growth_10m,74.0,0.917993,0.911384,0.041357,0.242424,0.772592,1.200791,5.352601


Determine the best holding period:

    Find the number of months (1 to 12) where the average (mean) future growth is maximal.

From the above table we see that holding for 2 months has the highest avg growth . This still howeever is almost a 6 percent loss on the initial investment

### Question 4: [Strategy] Simple RSI-Based Trading Strategy

What is the total profit (in thousands) you would have earned by investing $1000 every time a stock was oversold (RSI < 25)?

In [102]:
# https://companiesmarketcap.com/usa/largest-companies-in-the-usa-by-market-cap/
US_STOCKS = ['MSFT', 'AAPL', 'GOOG', 'NVDA', 'AMZN', 'META', 'BRK-B', 'LLY', 'AVGO','V', 'JPM']

# You're required to add EU_STOCKS and INDIA_STOCS
# https://companiesmarketcap.com/european-union/largest-companies-in-the-eu-by-market-cap/
EU_STOCKS = ['NVO','MC.PA', 'ASML', 'RMS.PA', 'OR.PA', 'SAP', 'ACN', 'TTE', 'SIE.DE','IDEXY','CDI.PA']

# https://companiesmarketcap.com/india/largest-companies-in-india-by-market-cap/
INDIA_STOCKS = ['RELIANCE.NS','TCS.NS','HDB','BHARTIARTL.NS','IBN','SBIN.NS','LICI.NS','INFY','ITC.NS','HINDUNILVR.NS','LT.NS']

In [103]:
ALL_TICKERS = US_STOCKS  + EU_STOCKS + INDIA_STOCKS

In [104]:
stocks_df = pd.DataFrame({'A' : []})

for i,ticker in enumerate(ALL_TICKERS):
  print(i,ticker)

  # Work with stock prices
  ticker_obj = yf.Ticker(ticker)

  # historyPrices = yf.download(tickers = ticker,
  #                    period = "max",
  #                    interval = "1d")
  historyPrices = ticker_obj.history(
                     period = "max",
                     interval = "1d")

  # generate features for historical prices, and what we want to predict
  historyPrices['Ticker'] = ticker
  historyPrices['Year']= historyPrices.index.year
  historyPrices['Month'] = historyPrices.index.month
  historyPrices['Weekday'] = historyPrices.index.weekday
  historyPrices['Date'] = historyPrices.index.date

  # historical returns
  for i in [1,3,7,30,90,365]:
    historyPrices['growth_'+str(i)+'d'] = historyPrices['Close'] / historyPrices['Close'].shift(i)
  historyPrices['growth_future_30d'] = historyPrices['Close'].shift(-30) / historyPrices['Close']

  # Technical indicators
  # SimpleMovingAverage 10 days and 20 days
  historyPrices['SMA10']= historyPrices['Close'].rolling(10).mean()
  historyPrices['SMA20']= historyPrices['Close'].rolling(20).mean()
  historyPrices['growing_moving_average'] = np.where(historyPrices['SMA10'] > historyPrices['SMA20'], 1, 0)
  historyPrices['high_minus_low_relative'] = (historyPrices.High - historyPrices.Low) / historyPrices['Close']

  # 30d rolling volatility : https://ycharts.com/glossary/terms/rolling_vol_30
  historyPrices['volatility'] =   historyPrices['Close'].rolling(30).std() * np.sqrt(252)

  # what we want to predict
  historyPrices['is_positive_growth_30d_future'] = np.where(historyPrices['growth_future_30d'] > 1, 1, 0)

  # sleep 1 sec between downloads - not to overload the API server
  time.sleep(1)


  if stocks_df.empty:
    stocks_df = historyPrices
  else:
    stocks_df = pd.concat([stocks_df, historyPrices], ignore_index=True)

0 MSFT
1 AAPL
2 GOOG
3 NVDA
4 AMZN
5 META
6 BRK-B
7 LLY
8 AVGO
9 V
10 JPM
11 NVO
12 MC.PA
13 ASML
14 RMS.PA
15 OR.PA
16 SAP
17 ACN
18 TTE
19 SIE.DE
20 IDEXY
21 CDI.PA
22 RELIANCE.NS
23 TCS.NS
24 HDB
25 BHARTIARTL.NS
26 IBN
27 SBIN.NS
28 LICI.NS
29 INFY
30 ITC.NS
31 HINDUNILVR.NS
32 LT.NS


In [105]:
def get_ticker_type(ticker:str, us_stocks_list, eu_stocks_list, india_stocks_list):
  if ticker in us_stocks_list:
    return 'US'
  elif ticker in eu_stocks_list:
    return 'EU'
  elif ticker in india_stocks_list:
    return 'INDIA'
  else:
    return 'ERROR'

In [106]:
stocks_df['ticker_type'] = stocks_df.Ticker.apply(lambda x:get_ticker_type(x, US_STOCKS, EU_STOCKS, INDIA_STOCKS))

In [107]:
stocks_df.ticker_type.value_counts()

Unnamed: 0_level_0,count
ticker_type,Unnamed: 1_level_1
US,83762
EU,80133
INDIA,66365


In [108]:
stocks_df.Ticker.nunique()

33

In [109]:
stocks_df['Date'] = pd.to_datetime(stocks_df['Date'],utc=True)
# Volume needs to be float, not int
stocks_df['Volume'] = stocks_df['Volume']*1.0
# to resolve an error "Exception: input array type is not double"
# https://stackoverflow.com/questions/51712269/how-to-run-ta-lib-on-multiple-columns-of-a-pandas-dataframe
for f in ['Open','High','Low','Close', 'Volume', 'Close']:
  stocks_df.loc[:,f] = stocks_df.loc[:,f].astype('float64')


Install TA-Lib

In [110]:
# [2025 way - seems to be working] https://medium.com/@Tobi_Lux/collaboration-of-colab-yfinance-and-ta-lib-recent-issues-6cd8eebb56b0
url = 'https://anaconda.org/conda-forge/libta-lib/0.4.0/download/linux-64/libta-lib-0.4.0-h166bdaf_1.tar.bz2'
!curl -L $url | tar xj -C /usr/lib/x86_64-linux-gnu/ lib --strip-components=1
!pip install conda-package-handling
!wget https://anaconda.org/conda-forge/ta-lib/0.5.1/download/linux-64/ta-lib-0.5.1-py311h9ecbd09_0.conda
!cph x ta-lib-0.5.1-py311h9ecbd09_0.conda

# 🛠 Option 1: Remove the existing directory first (clean, but destructive)
!rm -rf /usr/local/lib/python3.11/dist-packages/talib

!mv ./ta-lib-0.5.1-py311h9ecbd09_0/lib/python3.11/site-packages/talib /usr/local/lib/python3.11/dist-packages/
import talib



  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  3987    0  3987    0     0   8743      0 --:--:-- --:--:-- --:--:--  8743
100  517k  100  517k    0     0   380k      0  0:00:01  0:00:01 --:--:-- 1733k
--2025-06-13 19:51:18--  https://anaconda.org/conda-forge/ta-lib/0.5.1/download/linux-64/ta-lib-0.5.1-py311h9ecbd09_0.conda
Resolving anaconda.org (anaconda.org)... 104.19.145.37, 104.19.144.37, 2606:4700::6813:9125, ...
Connecting to anaconda.org (anaconda.org)|104.19.145.37|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://binstar-cio-packages-prod.s3.amazonaws.com/5f738b4b78d4370a69f82984/673b62920c609ff6945bb24d?response-content-disposition=attachment%3B%20filename%3D%22ta-lib-0.5.1-py311h9ecbd09_0.conda%22%3B%20filename%2A%3DUTF-8%27%27ta-lib-0.5.1-py311h9ecbd09_0.conda&response-content-type=application%2Foctet-stream&X-Amz-Algorithm=

In [111]:
# Sort the DataFrame by Ticker and then by Date. This is crucial for
# correct calculation of indicators when using `groupby` and `apply`.
stocks_df = stocks_df.sort_values(by=['Ticker', 'Date']).reset_index(drop=True)

# Calculate RSI per ticker using a groupby-apply approach
print("Calculating RSI for all tickers...")
stocks_df['rsi'] = stocks_df.groupby('Ticker')['Close'].transform(
    lambda x: talib.RSI(x.values, timeperiod=14)
)



Calculating RSI for all tickers...


In [112]:
print(stocks_df['rsi'].dtype)

float64


In [113]:
# Define the RSI threshold
rsi_threshold = 25

# Filter the DataFrame based on RSI and Date conditions
selected_df = stocks_df[
    (stocks_df['rsi'] < rsi_threshold) &
    (stocks_df['Date'] >= '2000-01-01') &
    (stocks_df['Date'] <= '2025-06-01')
]

In [114]:
len(selected_df)

1565

In [115]:
net_income = 1000 * (selected_df['growth_future_30d'] - 1).sum()

In [116]:
print(f"Total profit earned : {net_income:,.2f}")

Total profit earned : 124,670.53


In [117]:
rsi_threshold = 25

selected_df = stocks_df[
    (stocks_df['rsi'] < rsi_threshold) &
    (stocks_df['Date'] >= '2000-01-01') &
    (stocks_df['Date'] <= '2025-06-01') &
    (stocks_df['growth_future_30d'].notna())
].copy()

selected_df['growth_future_30d'] = selected_df['growth_future_30d'].astype(float)

# Profit = sum of individual profits from all trades
net_income = 1000 * (selected_df['growth_future_30d'] - 1).sum()

# Convert to thousands of dollars
net_income_k = net_income / 1000

print(f"Total profit earned: ${net_income_k:,.2f}K")


Total profit earned: $124.67K


In [118]:
rsi_threshold = 25

# Filter valid RSI signals with future return data
selected_df = stocks_df[
    (stocks_df['rsi'] < rsi_threshold) &
    (stocks_df['Date'] >= '2000-01-01') &
    (stocks_df['Date'] <= '2025-06-01') &
    (stocks_df['growth_future_30d'].notna())
].copy()

# Ensure proper data type
selected_df['growth_future_30d'] = selected_df['growth_future_30d'].astype(float)
selected_df = selected_df.sort_values(by='Date')

# Loop through and only take trades spaced 30 days apart
investment = 1000
capital_gain = 0
last_trade_date = pd.Timestamp.min.tz_localize('UTC')  # Ensure timezone-aware

for _, row in selected_df.iterrows():
    if row['Date'] >= last_trade_date + pd.Timedelta(days=30):
        profit = investment * (row['growth_future_30d'] - 1)
        capital_gain += profit
        last_trade_date = row['Date']

# Convert to thousands of dollars
net_income_k = capital_gain / 1000

print(f"Total profit earned (no overlapping): ${net_income_k:,.2f}K")


Total profit earned (no overlapping): $7.47K


Q5. [Exploratory, Optional] Predicting a Positive-Return IPO
Most of the strategies for investing in IPOs deliver negative average and median returns (and even 75% quantiles).


How would you change the strategy if you want to increase the profitability?

In [163]:
import pandas as pd
import requests
from io import StringIO
import yfinance as yf
from datetime import timedelta, datetime
# Fix: Import DateOffset directly from pandas
from pandas import DateOffset

In [119]:
def get_ipos_by_year(year: int) -> pd.DataFrame:
    """
    Fetch IPO data for the given year from stockanalysis.com.
    """
    url = f"https://stockanalysis.com/ipos/{year}/"
    headers = {
        'User-Agent': (
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
            'AppleWebKit/537.36 (KHTML, like Gecko) '
            'Chrome/58.0.3029.110 Safari/537.3'
        )
    }

    try:
        response = requests.get(url, headers=headers, timeout=10)
        response.raise_for_status()

        # Wrap HTML text in StringIO to avoid deprecation warning
        # "Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object."
        html_io = StringIO(response.text)
        tables = pd.read_html(html_io)

        if not tables:
            raise ValueError(f"No tables found for year {year}.")

        return tables[0]

    except requests.exceptions.RequestException as e:
        print(f"Request failed: {e}")
    except ValueError as ve:
        print(f"Data error: {ve}")
    except Exception as ex:
        print(f"Unexpected error: {ex}")

    return pd.DataFrame()

In [159]:
# Clean IPO data
def prepare_ipo_df(raw_df):
    df = raw_df.copy()
    df = df[df['Symbol'].notna()]  # remove empty symbols
    df['ticker'] = df['Symbol'].str.strip()
    df['ipo_date'] = pd.to_datetime(df['IPO Date'], errors='coerce')
    df = df[['ticker', 'ipo_date']]
    return df.dropna()

In [164]:
def check_post_ipo_conditional_returns(ipo_df: pd.DataFrame, max_hold_months_from_entry: int = 18) -> pd.DataFrame:
    result_list = []

    current_date = pd.Timestamp(datetime.now()).tz_localize(None) # Get today's date, remove timezone

    for _, row in ipo_df.iterrows():
        ticker = row['ticker']
        ipo_date = pd.to_datetime(row['ipo_date'])
        stock = yf.Ticker(ticker)

        try:
            # Calculate 3-month entry date
            entry_date_3m = ipo_date + pd.DateOffset(months=3)

            # Get history from IPO - 5d to entry + max_hold_months_from_entry + 1 month
            # We fetch up to 2 months beyond the latest possible check date to ensure we capture it
            # The latest check is 18 months from entry, which is 21 months from IPO.
            # So, fetch history up to 22-23 months from IPO date to be safe.
            hist = stock.history(start=ipo_date - timedelta(days=5),
                                 end=ipo_date + pd.DateOffset(months=max_hold_months_from_entry + 3))

            if hist.empty:
                # print(f"No historical data for {ticker}. Skipping.")
                continue

            hist = hist.reset_index()
            # Ensure 'Date' column is timezone-naive for consistent comparisons
            if 'Date' in hist.columns:
                hist['Date'] = pd.to_datetime(hist['Date']).dt.tz_localize(None)
            elif 'Datetime' in hist.columns: # Sometimes yfinance returns 'Datetime'
                hist['Date'] = pd.to_datetime(hist['Datetime']).dt.tz_localize(None)
                hist = hist.drop(columns=['Datetime'])
            else:
                print(f"Could not find 'Date' or 'Datetime' column in history for {ticker}. Skipping.")
                continue


            # IPO day close
            ipo_close_row = hist[hist['Date'] >= ipo_date].head(1)
            if ipo_close_row.empty:
                # print(f"Could not find IPO close for {ticker}. Skipping.")
                continue
            ipo_close = ipo_close_row['Close'].values[0]

            # 3-month checkpoint relative to IPO date
            row_3m = hist[hist['Date'] >= entry_date_3m].head(1)
            if row_3m.empty:
                # print(f"Could not find 3-month mark for {ticker}. Skipping.")
                continue

            close_3m = row_3m['Close'].values[0]
            return_3m = (close_3m - ipo_close) / ipo_close

            row_result = {
                'ticker': ticker,
                'ipo_date': ipo_date,
                'ipo_close': ipo_close,
                'return_3m': return_3m,
            }

            # Only simulate holding if 3M return was positive
            if return_3m > 0:
                row_result['entry_price'] = close_3m
                row_result['investment_amount'] = 1000.0
                initial_shares = row_result['investment_amount'] / close_3m

                # Checkpoints for holding period from entry date
                for m_hold in [6, 9, 12, 18]: # Months from entry date
                    check_date = entry_date_3m + pd.DateOffset(months=m_hold)

                    # If the check_date is in the future, use current_date
                    if check_date > current_date:
                        target_date_for_fetch = current_date
                        # print(f"For {ticker}, {m_hold}m hold: Target date {check_date.strftime('%Y-%m-%d')} is in future, using {current_date.strftime('%Y-%m-%d')}")
                    else:
                        target_date_for_fetch = check_date

                    future_row = hist[hist['Date'] >= target_date_for_fetch].head(1)

                    if not future_row.empty:
                        future_price = future_row['Close'].values[0]
                        return_from_entry = (future_price - close_3m) / close_3m
                        current_value = initial_shares * future_price

                        row_result[f'return_from_entry_{m_hold}m'] = return_from_entry
                        row_result[f'is_positive_from_entry_{m_hold}m'] = return_from_entry > 0
                        row_result[f'value_at_{m_hold}m'] = current_value
                    else:
                        row_result[f'return_from_entry_{m_hold}m'] = None
                        row_result[f'is_positive_from_entry_{m_hold}m'] = None
                        row_result[f'value_at_{m_hold}m'] = None
            else:
                row_result['entry_price'] = None
                row_result['investment_amount'] = None
                # Mark all conditional returns as None if 3m return is not positive
                for m_hold in [6, 9, 12, 18]:
                    row_result[f'return_from_entry_{m_hold}m'] = None
                    row_result[f'is_positive_from_entry_{m_hold}m'] = None
                    row_result[f'value_at_{m_hold}m'] = None


            result_list.append(row_result)

        except Exception as e:
            print(f"Error fetching/processing {ticker}: {e}")
            continue

    return pd.DataFrame(result_list)

In [165]:
def analyze_ipo_results(df: pd.DataFrame, year: int):
    print(f"\n--- Analysis for IPOs in {year} ---")

    # Filter for eligible stocks (positive 3-month return)
    eligible_df = df[df['return_3m'] > 0].copy() # Use .copy() to avoid SettingWithCopyWarning

    print(f"Total IPOs in {year}: {len(df)}")
    print(f"Eligible IPOs (positive 3-month return): {len(eligible_df)}")

    if eligible_df.empty:
        print("No eligible IPOs to analyze further for this year.")
        return

    for m_hold in [6, 9, 12, 18]:
        return_col = f'return_from_entry_{m_hold}m'
        is_positive_col = f'is_positive_from_entry_{m_hold}m'
        value_col = f'value_at_{m_hold}m'

        # Filter out NaNs for percentages and means, but count total eligible for denominator
        valid_returns = eligible_df[eligible_df[return_col].notna()]

        if not valid_returns.empty:
            pct_positive = valid_returns[is_positive_col].mean() * 100
            avg_return = valid_returns[return_col].mean()
            # Calculate average value for those with valid returns
            avg_value = valid_returns[value_col].mean()

            print(f"\n{m_hold} Months from Entry Date (equivalent to {m_hold+3} months from IPO):")
            print(f"  % Positive Returns: {pct_positive:.1f}% (Based on {len(valid_returns)} data points)")
            print(f"  Average Return: {avg_return:.2%}")
            print(f"  Average Value of $1000 Investment: ${avg_value:.2f}")
        else:
            print(f"\n{m_hold} Months from Entry Date (equivalent to {m_hold+3} months from IPO):")
            print("  No valid data points for this period.")


In [166]:
if __name__ == "__main__":
    start_year = 2021
    end_year = 2024

    all_years_results = []

    for year in range(start_year, end_year + 1):
        print(f"\n--- Processing IPOs for year {year} ---")
        raw_ipos_df = get_ipos_by_year(year)

        if not raw_ipos_df.empty:
            cleaned_ipos_df = prepare_ipo_df(raw_ipos_df)
            print(f"Found {len(cleaned_ipos_df)} cleaned IPOs for {year}.")

            # Check conditional returns
            year_results_df = check_post_ipo_conditional_returns(cleaned_ipos_df)

            if not year_results_df.empty:
                all_years_results.append(year_results_df)
                analyze_ipo_results(year_results_df, year)
            else:
                print(f"No results generated for {year}.")
        else:
            print(f"No raw IPO data retrieved for {year}.")


--- Processing IPOs for year 2021 ---
Found 1035 cleaned IPOs for 2021.


ERROR:yfinance:$GDNR: possibly delisted; no timezone found
ERROR:yfinance:$LSPR: possibly delisted; no timezone found
ERROR:yfinance:$SAGA: possibly delisted; no timezone found
ERROR:yfinance:$APCA: possibly delisted; no timezone found
ERROR:yfinance:$RCAC: possibly delisted; no price data found  (1d 2021-12-12 00:00:00 -> 2023-09-17 00:00:00)
ERROR:yfinance:$ADRT: possibly delisted; no timezone found
ERROR:yfinance:$BNOX: possibly delisted; no timezone found
ERROR:yfinance:$SHAP: possibly delisted; no price data found  (1d 2021-12-11 00:00:00 -> 2023-09-16 00:00:00)
ERROR:yfinance:$AHRN: possibly delisted; no timezone found
ERROR:yfinance:$BFAC: possibly delisted; no price data found  (1d 2021-12-10 00:00:00 -> 2023-09-15 00:00:00)
ERROR:yfinance:$EVE: possibly delisted; no price data found  (1d 2021-12-10 00:00:00 -> 2023-09-15 00:00:00)
ERROR:yfinance:$IVCP: possibly delisted; no timezone found
ERROR:yfinance:$FRBN: possibly delisted; no timezone found
ERROR:yfinance:$FXCO: possibly


--- Analysis for IPOs in 2021 ---
Total IPOs in 2021: 397
Eligible IPOs (positive 3-month return): 134

6 Months from Entry Date (equivalent to 9 months from IPO):
  % Positive Returns: 0.7% (Based on 134 data points)
  Average Return: -20.62%
  Average Value of $1000 Investment: $793.81

9 Months from Entry Date (equivalent to 12 months from IPO):
  % Positive Returns: 0.7% (Based on 134 data points)
  Average Return: -29.72%
  Average Value of $1000 Investment: $702.82

12 Months from Entry Date (equivalent to 15 months from IPO):
  % Positive Returns: 0.7% (Based on 134 data points)
  Average Return: -29.11%
  Average Value of $1000 Investment: $708.89

18 Months from Entry Date (equivalent to 21 months from IPO):
  % Positive Returns: 100.0% (Based on 1 data points)
  Average Return: 8.16%
  Average Value of $1000 Investment: $1081.57

--- Processing IPOs for year 2022 ---
Found 180 cleaned IPOs for 2022.


ERROR:yfinance:$PLTN: possibly delisted; no timezone found
ERROR:yfinance:$TENK: possibly delisted; no timezone found
ERROR:yfinance:$AQU: possibly delisted; no timezone found
ERROR:yfinance:$QOMO: possibly delisted; no timezone found
ERROR:yfinance:$NXU: possibly delisted; no timezone found
ERROR:yfinance:$EFHT: possibly delisted; no timezone found
ERROR:yfinance:$CHG: possibly delisted; no timezone found
ERROR:yfinance:$HMAC: possibly delisted; no timezone found
ERROR:yfinance:$PTWO: possibly delisted; no timezone found
ERROR:yfinance:$MOBV: possibly delisted; no timezone found
ERROR:yfinance:$BRSH: possibly delisted; no timezone found
ERROR:yfinance:$FLFV: possibly delisted; no timezone found
ERROR:yfinance:$ACAC: possibly delisted; no timezone found
ERROR:yfinance:$PFHC: possibly delisted; no timezone found
ERROR:yfinance:$PNAC: possibly delisted; no timezone found
ERROR:yfinance:$MCAC: possibly delisted; no timezone found
ERROR:yfinance:$CHEA: possibly delisted; no timezone found



--- Analysis for IPOs in 2022 ---
Total IPOs in 2022: 118
Eligible IPOs (positive 3-month return): 32

6 Months from Entry Date (equivalent to 9 months from IPO):
  % Positive Returns: 3.1% (Based on 32 data points)
  Average Return: -7.01%
  Average Value of $1000 Investment: $929.90

9 Months from Entry Date (equivalent to 12 months from IPO):
  % Positive Returns: 3.1% (Based on 32 data points)
  Average Return: -9.02%
  Average Value of $1000 Investment: $909.84

12 Months from Entry Date (equivalent to 15 months from IPO):
  % Positive Returns: 3.1% (Based on 32 data points)
  Average Return: -8.68%
  Average Value of $1000 Investment: $913.21

18 Months from Entry Date (equivalent to 21 months from IPO):
  No valid data points for this period.

--- Processing IPOs for year 2023 ---
Found 153 cleaned IPOs for 2023.


ERROR:yfinance:$RYZB: possibly delisted; no timezone found
ERROR:yfinance:$NNAG: possibly delisted; no timezone found
ERROR:yfinance:$SPGC: possibly delisted; no timezone found
ERROR:yfinance:$LQR: possibly delisted; no timezone found
ERROR:yfinance:$HRYU: possibly delisted; no timezone found
ERROR:yfinance:$PXDT: possibly delisted; no timezone found
ERROR:yfinance:$WRNT: possibly delisted; no timezone found
ERROR:yfinance:$IPXX: possibly delisted; no timezone found
ERROR:yfinance:$SGE: possibly delisted; no timezone found
ERROR:yfinance:$GODN: possibly delisted; no timezone found
ERROR:yfinance:$TMTC: possibly delisted; no timezone found
ERROR:yfinance:$SBXC: possibly delisted; no price data found  (1d 2023-02-23 00:00:00 -> 2024-11-28 00:00:00)
ERROR:yfinance:$MARX: possibly delisted; no timezone found
ERROR:yfinance:$BLAC: possibly delisted; no timezone found
ERROR:yfinance:$PTHR: possibly delisted; no timezone found
ERROR:yfinance:$CETU: possibly delisted; no timezone found
ERROR:y


--- Analysis for IPOs in 2023 ---
Total IPOs in 2023: 136
Eligible IPOs (positive 3-month return): 49

6 Months from Entry Date (equivalent to 9 months from IPO):
  % Positive Returns: 2.0% (Based on 49 data points)
  Average Return: 22.85%
  Average Value of $1000 Investment: $1228.54

9 Months from Entry Date (equivalent to 12 months from IPO):
  % Positive Returns: 2.0% (Based on 49 data points)
  Average Return: 3.21%
  Average Value of $1000 Investment: $1032.10

12 Months from Entry Date (equivalent to 15 months from IPO):
  % Positive Returns: 2.0% (Based on 49 data points)
  Average Return: -5.82%
  Average Value of $1000 Investment: $941.83

18 Months from Entry Date (equivalent to 21 months from IPO):
  % Positive Returns: 0.0% (Based on 1 data points)
  Average Return: -88.88%
  Average Value of $1000 Investment: $111.23

--- Processing IPOs for year 2024 ---
Found 225 cleaned IPOs for 2024.


ERROR:yfinance:$SPHA: possibly delisted; no timezone found
ERROR:yfinance:$IZTC: possibly delisted; no timezone found



--- Analysis for IPOs in 2024 ---
Total IPOs in 2024: 223
Eligible IPOs (positive 3-month return): 101

6 Months from Entry Date (equivalent to 9 months from IPO):
  % Positive Returns: 1.6% (Based on 64 data points)
  Average Return: 6.73%
  Average Value of $1000 Investment: $1067.33

9 Months from Entry Date (equivalent to 12 months from IPO):
  % Positive Returns: 3.0% (Based on 33 data points)
  Average Return: 11.98%
  Average Value of $1000 Investment: $1119.84

12 Months from Entry Date (equivalent to 15 months from IPO):
  % Positive Returns: 7.7% (Based on 13 data points)
  Average Return: -3.16%
  Average Value of $1000 Investment: $968.45

18 Months from Entry Date (equivalent to 21 months from IPO):
  No valid data points for this period.
