# Module 2 Homework

In [1]:
# IMPORTS
import numpy as np
import pandas as pd
import requests
import time

# Fin Data Sources
import yfinance as yf
import pandas_datareader as pdr

# Data viz
import plotly.graph_objs as go
import plotly.express as px

import time
import datetime as dt

# for graphs
import matplotlib.pyplot as plt

# for technical indicators
import talib

## Question 1: IPO Filings Web Scraping and Data Processing

**What's the total sum ($m) of 2023 filings that happenned of Fridays?**

Re-use the [Code Snippet 1] example to get the data from web for this endpoint: https://stockanalysis.com/ipos/filings/  
Convert the 'Filing Date' to datetime(), 'Shares Offered' to float64 (if '-' is encountered, populate with NaNs).  
Define a new field 'Avg_price' based on the "Price Range", which equals to NaN if no price is specified, to the price (if only one number is provided), or to the average of 2 prices (if a range is given).  
You may be inspired by the function `extract_numbers()` in [Code Snippet 4], or you can write your own function to "parse" a string.   
Define a column "Shares_offered_value", which equals to "Shares Offered" * "Avg_price" (when both columns are defined; otherwise, it's NaN)  

Find the total sum in $m (millions of USD, closest INTEGER number) for all fillings during 2023, which happened on Fridays (`Date.dt.dayofweek()==4`). You should see 32 records in total, 24 of it is not null.  

(additional: you can read about [S-1 IPO filing](https://www.dfinsolutions.com/knowledge-hub/thought-leadership/knowledge-resources/what-s-1-ipo-filing) to understand the context)


In [2]:
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3",
}

url = "https://stockanalysis.com/ipos/filings/"
response = requests.get(url, headers=headers)

ipo_dfs = pd.read_html(response.text)


  ipo_dfs = pd.read_html(response.text)


In [3]:
ipos_2023_df = ipo_dfs[0].copy()

In [4]:
ipos_2023_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 325 entries, 0 to 324
Data columns (total 5 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   Filing Date     325 non-null    object
 1   Symbol          325 non-null    object
 2   Company Name    325 non-null    object
 3   Price Range     325 non-null    object
 4   Shares Offered  325 non-null    object
dtypes: object(5)
memory usage: 12.8+ KB


In [5]:
# convert to datetime
ipos_2023_df["Filing Date"] = pd.to_datetime(ipos_2023_df["Filing Date"])
ipos_2023_df["Shares Offered"] = pd.to_numeric(ipos_2023_df["Shares Offered"], errors="coerce")


In [6]:

# # Convert 'Return' to numeric format (percentage)
# ipos_2023_df["Return"] = (
#     pd.to_numeric(ipos_2023_df["Return"].str.replace("%", ""), errors="coerce") / 100
# )

In [7]:
ipos_2023_df

Unnamed: 0,Filing Date,Symbol,Company Name,Price Range,Shares Offered
0,2024-04-29,HWEC,"HW Electro Co., Ltd.",$3.00,3750000.0
1,2024-04-29,DTSQ,DT Cloud Star Acquisition Corporation,$10.00,6000000.0
2,2024-04-26,EURK,Eureka Acquisition Corp,$10.00,5000000.0
3,2024-04-26,HDL,Super Hi International Holding Ltd.,-,
4,2024-04-22,DRJT,Derun Group Inc,$5.00,
...,...,...,...,...,...
320,2020-01-21,GOXS,"Goxus, Inc.",$8.00 - $10.00,1500000.0
321,2020-01-21,UTXO,"UTXO Acquisition, Inc.",$10.00,5000000.0
322,2019-12-09,LOHA,Loha Co. Ltd,$8.00 - $10.00,2500000.0
323,2019-10-04,ZGHB,China Eco-Materials Group Co. Limited,$4.00,4300000.0


In [8]:
ipos_2023_df.isnull().sum()

Filing Date        0
Symbol             0
Company Name       0
Price Range        0
Shares Offered    72
dtype: int64

In [9]:
missing_prices_df = ipos_2023_df[
    ipos_2023_df["Price Range"].astype(str).str.find("-") >= 0
]
missing_prices_df

Unnamed: 0,Filing Date,Symbol,Company Name,Price Range,Shares Offered
3,2024-04-26,HDL,Super Hi International Holding Ltd.,-,
7,2024-04-15,GAUZ,Gauzy Ltd.,-,
8,2024-04-12,BOW,Bowhead Specialty Holdings Inc.,-,
9,2024-04-05,SPHL,Springview Holdings Ltd,$4.00 - $5.00,2000000.0
10,2024-04-02,FFFZ,Fuxing China Group Limited,$4.00 - $4.50,2000000.0
...,...,...,...,...,...
318,2020-06-17,ALEH,ALE Group Holding Limited,$4.00 - $6.00,4200000.0
319,2020-02-18,CAST,"FreeCast, Inc.",-,
320,2020-01-21,GOXS,"Goxus, Inc.",$8.00 - $10.00,1500000.0
322,2019-12-09,LOHA,Loha Co. Ltd,$8.00 - $10.00,2500000.0


In [10]:
def process_price(price):
    if not price:  # Check if the price is an empty string
        return np.nan  # Return NaN for empty strings

    # Replace 0 with NaN
    if price == "-":
        return np.nan
    
    # Remove "$" sign
    price = price.replace("$", "")

    # Split price ranges and calculate average
    if "-" in price:
        lower, upper = price.split("-")
        lower = float(lower.strip())
        upper = float(upper.strip())
        price = (lower + upper) / 2

    return float(price)

In [11]:
ipos_2023_df["Avg Price"] = ipos_2023_df["Price Range"].apply(process_price)

In [12]:
# Define a column "Shares_offered_value", which equals to "Shares Offered" * "Avg_price" (when both columns are defined; otherwise, it's NaN)
# Define a function to calculate "Shares_offered_value"
def calculate_shares_offered_value(shares_offered, avg_price):
    # shares_offered = df['Shares Offered']
    # avg_price = df['Avg_price']
    if pd.notna(shares_offered) and pd.notna(avg_price):
        return shares_offered * avg_price
    else:
        return np.nan


ipos_2023_df["Shares_offered_value"] = ipos_2023_df.apply(
    lambda x: calculate_shares_offered_value(x["Shares Offered"], x["Avg Price"]),
    axis=1,
)

In [13]:
#Find the total sum in $m (millions of USD, closest INTEGER number) for all fillings during 2023, which happened on Fridays
#(`Date.dt.dayofweek()==4`). You should see 32 records in total, 24 of it is not null.  

In [14]:
friday_filings_2023 = ipos_2023_df[
    (ipos_2023_df["Filing Date"].dt.year == 2023)
    & (ipos_2023_df["Filing Date"].dt.dayofweek == 4)
].dropna()
friday_filings_2023

Unnamed: 0,Filing Date,Symbol,Company Name,Price Range,Shares Offered,Avg Price,Shares_offered_value
49,2023-12-29,LEC,Lafayette Energy Corp,$3.50 - $4.50,1200000.0,4.0,4800000.0
61,2023-12-08,ENGS,Energys Group Limited,$4.00 - $6.00,2000000.0,5.0,10000000.0
62,2023-12-08,LNKS,Linkers Industries Limited,$4.00 - $6.00,2200000.0,5.0,11000000.0
81,2023-10-27,RAY,Raytech Holding Limited,$4.00 - $5.00,1500000.0,4.5,6750000.0
88,2023-10-13,ORIS,Oriental Rise Holdings Limited,$4.00,2000000.0,4.0,8000000.0
91,2023-10-06,QMMM,QMMM Holdings Limited,$4.00,2125000.0,4.0,8500000.0
95,2023-09-29,KAPA,"Kairos Pharma, Ltd.",$4.00,1550000.0,4.0,6200000.0
96,2023-09-29,VAPA,Valens Pay Global Limited,$5.00 - $6.00,1000000.0,5.5,5500000.0
103,2023-09-15,ACSB,Acesis Holdings Corporation,$4.00 - $6.00,1300000.0,5.0,6500000.0
121,2023-07-07,AZI,Autozi Internet Technology (Global) Ltd.,$4.00 - $5.00,1250000.0,4.5,5625000.0


In [15]:
len(friday_filings_2023)

25

In [16]:
friday_filings_2023["Shares_offered_value"].sum()/1_000_000

285.7

## Question 2:  IPOs "Fixed days hold" strategy


**Find the optimal number of days X (between 1 and 30), where 75% quantile growth is the highest?**


Reuse [Code Snippet 1] to retrieve the list of IPOs from 2023 and 2024 (from URLs: https://stockanalysis.com/ipos/2023/ and https://stockanalysis.com/ipos/2024/).  
Get all OHLCV daily prices for all stocks with an "IPO date" before March 1, 2024 ("< 2024-03-01") - 185 tickers. Sometimes you may need to adjust the symbol name (e.g., 'IBAC' on stockanalysis.com -> 'IBACU' on Yahoo Finance) to locate OHLCV prices for all stocks.

Let's assume you managed to buy a new stock (listed on IPO) on the first day at the [Adj Close] price]. Your strategy is to hold for exactly X full days (where X is between 1 and 30) and sell at the "Adj. Close" price in X days (e.g., if X=1, you sell on the next day).
Find X, when the 75% quantile growth (among 185 investments) is the highest. 

HINTs:
* You can generate 30 additional columns: growth_future_1d ... growth_future_30d, join that with the table of min_dates (first day when each stock has data on Yahoo Finance), and perform vector operations on the resulting dataset.
* You can use the `DataFrame.describe()` function to get mean, min, max, 25-50-75% quantiles.


Addtional: 
* You can also ensure that the mean and 50th percentile (median) investment returns are negative for most X values, implying a wager for a "lucky" investor who might be in the top 25%.
* What's your recommendation: Do you suggest pursuing this strategy for an optimal X?

In [174]:
def get_ipos_by_year(year="2024"):
    url = f"https://stockanalysis.com/ipos/{year}/"
    response = requests.get(url, headers=headers)
    df = pd.read_html(response.text)[0]
    # Data cleaning
    df["IPO Price"] = df["IPO Price"].str.replace("$", "")
    df["IPO Date"] = pd.to_datetime(
        df["IPO Date"], errors="coerce"
    )  # format="%Y-%m-%d",
    df["IPO Price"] = pd.to_numeric(df["IPO Price"].str.replace("$", ""), errors="coerce")
    df["Current"] = pd.to_numeric(df["Current"].str.replace("$", ""), errors="coerce")
    df["Return"] = (pd.to_numeric(df["Return"].str.replace("%", ""), errors="coerce") / 100)
    return df

In [175]:
ipo_2023_df = get_ipos_by_year(year="2023")

  df = pd.read_html(response.text)[0]


In [176]:
ipo_2024_df = get_ipos_by_year(year="2024")

  df = pd.read_html(response.text)[0]


In [177]:
stacked_ipo_df = pd.concat([ipo_2023_df, ipo_2024_df], axis=0)

In [178]:
stacked_ipo_df.head()

Unnamed: 0,IPO Date,Symbol,Company Name,IPO Price,Current,Return
0,2023-12-27,IROH,Iron Horse Acquisitions Corp.,10.0,10.05,0.005
1,2023-12-19,LGCB,Linkage Global Inc,4.0,3.1,-0.225
2,2023-12-15,ZKH,ZKH Group Limited,15.5,12.34,-0.2039
3,2023-12-15,BAYA,Bayview Acquisition Corp,10.0,10.17,0.017
4,2023-12-14,INHD,Inno Holdings Inc.,4.0,0.66,-0.834


In [179]:
stacked_ipo_df.tail()

Unnamed: 0,IPO Date,Symbol,Company Name,IPO Price,Current,Return
59,NaT,PSBD,Palmer Square Capital BDC Inc.,16.45,16.47,0.0012
60,NaT,CCTG,CCSC Technology International Holdings Limited,6.0,2.32,-0.6133
61,NaT,SYNX,Silynxcom Ltd.,4.0,3.2,-0.2
62,NaT,SDHC,Smith Douglas Homes Corp.,21.0,30.21,0.4386
63,NaT,ROMA,Roma Green Finance Limited,4.0,0.72,-0.8204


In [180]:
ipo_df = stacked_ipo_df[stacked_ipo_df["IPO Date"]<"2024-03-01"]
ipo_df

Unnamed: 0,IPO Date,Symbol,Company Name,IPO Price,Current,Return
0,2023-12-27,IROH,Iron Horse Acquisitions Corp.,10.0,10.05,0.0050
1,2023-12-19,LGCB,Linkage Global Inc,4.0,3.10,-0.2250
2,2023-12-15,ZKH,ZKH Group Limited,15.5,12.34,-0.2039
3,2023-12-15,BAYA,Bayview Acquisition Corp,10.0,10.17,0.0170
4,2023-12-14,INHD,Inno Holdings Inc.,4.0,0.66,-0.8340
...,...,...,...,...,...,...
149,2023-01-25,QSG,QuantaSing Group Ltd,12.5,3.15,-0.7480
150,2023-01-20,CVKD,"Cadrenal Therapeutics, Inc.",5.0,0.47,-0.9019
151,2023-01-13,SKWD,"Skyward Specialty Insurance Group, Inc.",15.0,37.58,1.5053
152,2023-01-13,ISRL,Israel Acquisitions Corp,10.0,10.91,0.0910


# Rename ticker symbols
PHTR to PHTRU

In [181]:
ipo_df.loc[ipo_df["Symbol"] == "PHTR"] = "PHTRU"

In [182]:
ipo_tickers = ipo_df["Symbol"].to_list()
ipo_tickers

['IROH',
 'LGCB',
 'ZKH',
 'BAYA',
 'INHD',
 'AFJK',
 'GSIW',
 'FEBO',
 'CLBR',
 'ELAB',
 'RR',
 'DDC',
 'SHIM',
 'GLAC',
 'SGN',
 'HG',
 'CRGX',
 'ANSC',
 'AITR',
 'GVH',
 'LXEO',
 'PAPL',
 'ATGL',
 'MNR',
 'WBUY',
 'NCL',
 'BIRK',
 'GMM',
 'PMEC',
 'LRHC',
 'GPAK',
 'SPKL',
 'QETA',
 'MSS',
 'ANL',
 'SYRA',
 'VSME',
 'LRE',
 'TURB',
 'MDBH',
 'KVYO',
 'CART',
 'DTCK',
 'RYZB',
 'NMRA',
 'ARM',
 'SPPL',
 'NWGL',
 'SWIN',
 'IVP',
 'NNAG',
 'SRM',
 'SPGC',
 'LQR',
 'NRXS',
 'FTEL',
 'MIRA',
 'PXDT',
 'CTNT',
 'HRYU',
 'SRFM',
 'PRZO',
 'HYAC',
 'KVAC',
 'JNVR',
 'ELWS',
 'WRNT',
 'TSBX',
 'ODD',
 'APGE',
 'NETD',
 'SGMT',
 'BOWN',
 'SXTP',
 'PWM',
 'VTMX',
 'INTS',
 'SVV',
 'KGS',
 'FIHL',
 'GENK',
 'BUJA',
 'BOF',
 'AZTR',
 'CAVA',
 'ESHA',
 'ATMU',
 'ATS',
 'IPXX',
 'CWD',
 'SGE',
 'SLRN',
 'ALCY',
 'KVUE',
 'GODN',
 'TRNR',
 'AACT',
 'JYD',
 'USGO',
 'UCAR',
 'WLGS',
 'TPET',
 'TCJH',
 'GDTC',
 'VCIG',
 'GDHG',
 'ARBB',
 'ISPR',
 'MGIH',
 'MWG',
 'HSHP',
 'SFWL',
 'SYT',
 'HKIT',
 'C

In [183]:
ipo_dates  = ipo_df["IPO Date"].to_list()

In [184]:
historical_price_dfs =[]
failed_tickers = []
i = 0
for ticker, ipo_date in (zip(ipo_tickers, ipo_dates)):
    i = i+ 1
    start = ipo_date
    end = ipo_date + dt.timedelta(days=50)
    print(f"{i} - Getting historical data for {ticker}")
    try:
        historyPrices = yf.download(tickers=ticker,
                        start=start,
                        end=end,
                        interval="1d")
        historyPrices["Ticker"] = ticker
        if len(historyPrices) >0:
            historical_price_dfs.append(historyPrices)
        else:
            failed_tickers.append(ticker)
    except:
        failed_tickers.append(ticker)
    # sleep 1 sec between downloads - not to overload the API server
    time.sleep(1)

1 - Getting historical data for IROH


[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['IROH']: Exception("%ticker%: Data doesn't exist for startDate = 1703653200, endDate = 1707973200")
[*********************100%%**********************]  1 of 1 completed

2 - Getting historical data for LGCB



[*********************100%%**********************]  1 of 1 completed

3 - Getting historical data for ZKH



[*********************100%%**********************]  1 of 1 completed

4 - Getting historical data for BAYA



[*********************100%%**********************]  1 of 1 completed

5 - Getting historical data for INHD





6 - Getting historical data for AFJK


[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['AFJK']: Exception("%ticker%: Data doesn't exist for startDate = 1701666000, endDate = 1705986000")
[*********************100%%**********************]  1 of 1 completed

7 - Getting historical data for GSIW



[*********************100%%**********************]  1 of 1 completed

8 - Getting historical data for FEBO





9 - Getting historical data for CLBR


[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['CLBR']: Exception("%ticker%: Data doesn't exist for startDate = 1700542800, endDate = 1704862800")
[*********************100%%**********************]  1 of 1 completed

10 - Getting historical data for ELAB



[*********************100%%**********************]  1 of 1 completed

11 - Getting historical data for RR



[*********************100%%**********************]  1 of 1 completed

12 - Getting historical data for DDC



[*********************100%%**********************]  1 of 1 completed

13 - Getting historical data for SHIM



[*********************100%%**********************]  1 of 1 completed

14 - Getting historical data for GLAC



[*********************100%%**********************]  1 of 1 completed

15 - Getting historical data for SGN



[*********************100%%**********************]  1 of 1 completed

16 - Getting historical data for HG



[*********************100%%**********************]  1 of 1 completed

17 - Getting historical data for CRGX





18 - Getting historical data for ANSC


[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['ANSC']: Exception("%ticker%: Data doesn't exist for startDate = 1699506000, endDate = 1703826000")


19 - Getting historical data for AITR


[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['AITR']: Exception("%ticker%: Data doesn't exist for startDate = 1699506000, endDate = 1703826000")
[*********************100%%**********************]  1 of 1 completed

20 - Getting historical data for GVH



[*********************100%%**********************]  1 of 1 completed

21 - Getting historical data for LXEO



[*********************100%%**********************]  1 of 1 completed

22 - Getting historical data for PAPL



[*********************100%%**********************]  1 of 1 completed

23 - Getting historical data for ATGL



[*********************100%%**********************]  1 of 1 completed

24 - Getting historical data for MNR



[*********************100%%**********************]  1 of 1 completed

25 - Getting historical data for WBUY



[*********************100%%**********************]  1 of 1 completed

26 - Getting historical data for NCL



[*********************100%%**********************]  1 of 1 completed

27 - Getting historical data for BIRK



[*********************100%%**********************]  1 of 1 completed

28 - Getting historical data for GMM



[*********************100%%**********************]  1 of 1 completed

29 - Getting historical data for PMEC



[*********************100%%**********************]  1 of 1 completed

30 - Getting historical data for LRHC



[*********************100%%**********************]  1 of 1 completed

31 - Getting historical data for GPAK





32 - Getting historical data for SPKL


[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['SPKL']: Exception("%ticker%: Data doesn't exist for startDate = 1696564800, endDate = 1700888400")


33 - Getting historical data for QETA


[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['QETA']: Exception("%ticker%: Data doesn't exist for startDate = 1696564800, endDate = 1700888400")
[*********************100%%**********************]  1 of 1 completed

34 - Getting historical data for MSS



[*********************100%%**********************]  1 of 1 completed

35 - Getting historical data for ANL



[*********************100%%**********************]  1 of 1 completed

36 - Getting historical data for SYRA



[*********************100%%**********************]  1 of 1 completed

37 - Getting historical data for VSME



[*********************100%%**********************]  1 of 1 completed

38 - Getting historical data for LRE



[*********************100%%**********************]  1 of 1 completed

39 - Getting historical data for TURB



[*********************100%%**********************]  1 of 1 completed

40 - Getting historical data for MDBH



[*********************100%%**********************]  1 of 1 completed

41 - Getting historical data for KVYO



[*********************100%%**********************]  1 of 1 completed

42 - Getting historical data for CART



[*********************100%%**********************]  1 of 1 completed

43 - Getting historical data for DTCK





44 - Getting historical data for RYZB


[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['RYZB']: Exception('%ticker%: No timezone found, symbol may be delisted')
[*********************100%%**********************]  1 of 1 completed

45 - Getting historical data for NMRA



[*********************100%%**********************]  1 of 1 completed

46 - Getting historical data for ARM



[*********************100%%**********************]  1 of 1 completed

47 - Getting historical data for SPPL



[*********************100%%**********************]  1 of 1 completed

48 - Getting historical data for NWGL



[*********************100%%**********************]  1 of 1 completed

49 - Getting historical data for SWIN



[*********************100%%**********************]  1 of 1 completed

50 - Getting historical data for IVP





51 - Getting historical data for NNAG


[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['NNAG']: Exception("%ticker%: Data doesn't exist for startDate = 1692331200, endDate = 1696651200")
[*********************100%%**********************]  1 of 1 completed

52 - Getting historical data for SRM



[*********************100%%**********************]  1 of 1 completed

53 - Getting historical data for SPGC



[*********************100%%**********************]  1 of 1 completed

54 - Getting historical data for LQR



[*********************100%%**********************]  1 of 1 completed

55 - Getting historical data for NRXS



[*********************100%%**********************]  1 of 1 completed

56 - Getting historical data for FTEL



[*********************100%%**********************]  1 of 1 completed

57 - Getting historical data for MIRA



[*********************100%%**********************]  1 of 1 completed

58 - Getting historical data for PXDT



[*********************100%%**********************]  1 of 1 completed

59 - Getting historical data for CTNT



[*********************100%%**********************]  1 of 1 completed

60 - Getting historical data for HRYU



[*********************100%%**********************]  1 of 1 completed

61 - Getting historical data for SRFM



[*********************100%%**********************]  1 of 1 completed

62 - Getting historical data for PRZO





63 - Getting historical data for HYAC


[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['HYAC']: Exception("%ticker%: Data doesn't exist for startDate = 1690344000, endDate = 1694664000")


64 - Getting historical data for KVAC


[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['KVAC']: Exception("%ticker%: Data doesn't exist for startDate = 1690257600, endDate = 1694577600")
[*********************100%%**********************]  1 of 1 completed

65 - Getting historical data for JNVR



[*********************100%%**********************]  1 of 1 completed

66 - Getting historical data for ELWS



[*********************100%%**********************]  1 of 1 completed

67 - Getting historical data for WRNT



[*********************100%%**********************]  1 of 1 completed

68 - Getting historical data for TSBX



[*********************100%%**********************]  1 of 1 completed

69 - Getting historical data for ODD



[*********************100%%**********************]  1 of 1 completed

70 - Getting historical data for APGE





71 - Getting historical data for NETD


[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['NETD']: Exception("%ticker%: Data doesn't exist for startDate = 1689307200, endDate = 1693627200")
[*********************100%%**********************]  1 of 1 completed

72 - Getting historical data for SGMT





73 - Getting historical data for BOWN


[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['BOWN']: Exception("%ticker%: Data doesn't exist for startDate = 1689134400, endDate = 1693454400")
[*********************100%%**********************]  1 of 1 completed

74 - Getting historical data for SXTP



[*********************100%%**********************]  1 of 1 completed

75 - Getting historical data for PWM



[*********************100%%**********************]  1 of 1 completed

76 - Getting historical data for VTMX



[*********************100%%**********************]  1 of 1 completed

77 - Getting historical data for INTS



[*********************100%%**********************]  1 of 1 completed

78 - Getting historical data for SVV



[*********************100%%**********************]  1 of 1 completed

79 - Getting historical data for KGS



[*********************100%%**********************]  1 of 1 completed

80 - Getting historical data for FIHL



[*********************100%%**********************]  1 of 1 completed

81 - Getting historical data for GENK





82 - Getting historical data for BUJA


[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['BUJA']: Exception("%ticker%: Data doesn't exist for startDate = 1687924800, endDate = 1692244800")
[*********************100%%**********************]  1 of 1 completed

83 - Getting historical data for BOF



[*********************100%%**********************]  1 of 1 completed

84 - Getting historical data for AZTR



[*********************100%%**********************]  1 of 1 completed

85 - Getting historical data for CAVA



[*********************100%%**********************]  1 of 1 completed

86 - Getting historical data for ESHA



[*********************100%%**********************]  1 of 1 completed

87 - Getting historical data for ATMU



[*********************100%%**********************]  1 of 1 completed

88 - Getting historical data for ATS





89 - Getting historical data for IPXX


[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['IPXX']: Exception("%ticker%: Data doesn't exist for startDate = 1684987200, endDate = 1689307200")
[*********************100%%**********************]  1 of 1 completed

90 - Getting historical data for CWD



[*********************100%%**********************]  1 of 1 completed

91 - Getting historical data for SGE



[*********************100%%**********************]  1 of 1 completed

92 - Getting historical data for SLRN





93 - Getting historical data for ALCY


[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['ALCY']: Exception("%ticker%: Data doesn't exist for startDate = 1683259200, endDate = 1687579200")
[*********************100%%**********************]  1 of 1 completed

94 - Getting historical data for KVUE





95 - Getting historical data for GODN


[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['GODN']: Exception("%ticker%: Data doesn't exist for startDate = 1683000000, endDate = 1687320000")
[*********************100%%**********************]  1 of 1 completed

96 - Getting historical data for TRNR





97 - Getting historical data for AACT


[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['AACT']: Exception("%ticker%: Data doesn't exist for startDate = 1682049600, endDate = 1686369600")
[*********************100%%**********************]  1 of 1 completed

98 - Getting historical data for JYD



[*********************100%%**********************]  1 of 1 completed

99 - Getting historical data for USGO



[*********************100%%**********************]  1 of 1 completed

100 - Getting historical data for UCAR



[*********************100%%**********************]  1 of 1 completed

101 - Getting historical data for WLGS



[*********************100%%**********************]  1 of 1 completed

102 - Getting historical data for TPET



[*********************100%%**********************]  1 of 1 completed

103 - Getting historical data for TCJH



[*********************100%%**********************]  1 of 1 completed

104 - Getting historical data for GDTC



[*********************100%%**********************]  1 of 1 completed

105 - Getting historical data for VCIG



[*********************100%%**********************]  1 of 1 completed

106 - Getting historical data for GDHG



[*********************100%%**********************]  1 of 1 completed

107 - Getting historical data for ARBB



[*********************100%%**********************]  1 of 1 completed

108 - Getting historical data for ISPR



[*********************100%%**********************]  1 of 1 completed

109 - Getting historical data for MGIH



[*********************100%%**********************]  1 of 1 completed

110 - Getting historical data for MWG



[*********************100%%**********************]  1 of 1 completed

111 - Getting historical data for HSHP



[*********************100%%**********************]  1 of 1 completed

112 - Getting historical data for SFWL



[*********************100%%**********************]  1 of 1 completed

113 - Getting historical data for SYT



[*********************100%%**********************]  1 of 1 completed

114 - Getting historical data for HKIT



[*********************100%%**********************]  1 of 1 completed

115 - Getting historical data for CHSN



[*********************100%%**********************]  1 of 1 completed

116 - Getting historical data for TBMC



[*********************100%%**********************]  1 of 1 completed

117 - Getting historical data for HLP



[*********************100%%**********************]  1 of 1 completed

118 - Getting historical data for ZJYL



[*********************100%%**********************]  1 of 1 completed

119 - Getting historical data for TMTC



[*********************100%%**********************]  1 of 1 completed

120 - Getting historical data for YGFGF





121 - Getting historical data for OAKU


[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['OAKU']: Exception("%ticker%: Data doesn't exist for startDate = 1679630400, endDate = 1683950400")
[*********************100%%**********************]  1 of 1 completed

122 - Getting historical data for BANL



[*********************100%%**********************]  1 of 1 completed

123 - Getting historical data for OMH



[*********************100%%**********************]  1 of 1 completed

124 - Getting historical data for MGRX





125 - Getting historical data for FORL


[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['FORL']: Exception("%ticker%: Data doesn't exist for startDate = 1679025600, endDate = 1683345600")
[*********************100%%**********************]  1 of 1 completed

126 - Getting historical data for ICG



[*********************100%%**********************]  1 of 1 completed

127 - Getting historical data for IZM



[*********************100%%**********************]  1 of 1 completed

128 - Getting historical data for AESI



[*********************100%%**********************]  1 of 1 completed

129 - Getting historical data for AIXI





130 - Getting historical data for SBXC


[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['SBXC']: Exception("%ticker%: Data doesn't exist for startDate = 1677560400, endDate = 1681876800")
[*********************100%%**********************]  1 of 1 completed

131 - Getting historical data for BMR



[*********************100%%**********************]  1 of 1 completed

132 - Getting historical data for DIST



[*********************100%%**********************]  1 of 1 completed

133 - Getting historical data for GXAI



[*********************100%%**********************]  1 of 1 completed

134 - Getting historical data for MARX



[*********************100%%**********************]  1 of 1 completed

135 - Getting historical data for BFRG



[*********************100%%**********************]  1 of 1 completed

136 - Getting historical data for ENLT



[*********************100%%**********************]  1 of 1 completed

137 - Getting historical data for MLYS





138 - Getting historical data for PTHR


[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['PTHR']: Exception('%ticker%: No timezone found, symbol may be delisted')
[*********************100%%**********************]  1 of 1 completed

139 - Getting historical data for BLAC



[*********************100%%**********************]  1 of 1 completed

140 - Getting historical data for NXT



[*********************100%%**********************]  1 of 1 completed

141 - Getting historical data for HSAI



[*********************100%%**********************]  1 of 1 completed

142 - Getting historical data for LSDI



[*********************100%%**********************]  1 of 1 completed

143 - Getting historical data for LICN



[*********************100%%**********************]  1 of 1 completed

144 - Getting historical data for GPCR



[*********************100%%**********************]  1 of 1 completed

145 - Getting historical data for ASST





146 - Getting historical data for CETU


[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['CETU']: Exception("%ticker%: Data doesn't exist for startDate = 1675227600, endDate = 1679544000")
[*********************100%%**********************]  1 of 1 completed

147 - Getting historical data for TXO



[*********************100%%**********************]  1 of 1 completed

148 - Getting historical data for BREA



[*********************100%%**********************]  1 of 1 completed

149 - Getting historical data for GNLX



[*********************100%%**********************]  1 of 1 completed

150 - Getting historical data for QSG



[*********************100%%**********************]  1 of 1 completed

151 - Getting historical data for CVKD



[*********************100%%**********************]  1 of 1 completed

152 - Getting historical data for SKWD



[*********************100%%**********************]  1 of 1 completed

153 - Getting historical data for ISRL



[*********************100%%**********************]  1 of 1 completed

154 - Getting historical data for MGOL





In [28]:
len(historical_price_dfs)

131

In [29]:
len(historical_price_dfs[3])

33

In [30]:
len(failed_tickers)

23

In [31]:
failed_ipos = ipo_df[ipo_df["Symbol"].isin(failed_tickers)]
failed_ipos

Unnamed: 0,IPO Date,Symbol,Company Name,IPO Price,Current,Return
0,2023-12-27,IROH,Iron Horse Acquisitions Corp.,10.0,10.05,0.005
5,2023-12-04,AFJK,"Aimei Health Technology Co., Ltd Ordinary Share",10.0,10.32,0.032
8,2023-11-21,CLBR,Colombier Acquisition Corp.,10.0,10.35,0.035
17,2023-11-09,ANSC,Agriculture & Natural Solutions Acquisition Co...,10.0,10.24,0.0235
18,2023-11-09,AITR,AI TRANSPORTATION ACQUISITION CORP,10.0,10.32,0.032
31,2023-10-06,SPKL,Spark I Acquisition Corp. Class A Ordinary Share,10.0,10.3,0.03
32,2023-10-06,QETA,Quetta Acquisition Corporation,10.0,10.25,0.025
43,2023-09-15,RYZB,"RayzeBio, Inc.",18.0,62.49,2.4717
50,2023-08-18,NNAG,99 Acquisition Group Inc.,10.0,10.42,0.042
62,2023-07-26,HYAC,Haymaker Acquisition Corp. III,10.0,10.5,0.05


In [32]:
historical_failed_price_dfs = []
totally_failed_tickers = []
i = 0
for ticker, ipo_date in zip(failed_ipos["Symbol"], failed_ipos["IPO Date"]):
    i = i + 1
    start = ipo_date
    print(f"{i} - Getting historical data for {ticker}")
    try:
        historyPrices = yf.download(tickers=ticker, start=start, interval="1d")
        historyPrices["Ticker"] = ticker
        if len(historyPrices) > 0:
            historical_failed_price_dfs.append(historyPrices)
        else:
            totally_failed_tickers.append(ticker)
    except:
        totally_failed_tickers.append(ticker)
    # sleep 1 sec between downloads - not to overload the API server
    time.sleep(1)

[*********************100%%**********************]  1 of 1 completed

1 - Getting historical data for IROH



[*********************100%%**********************]  1 of 1 completed

2 - Getting historical data for AFJK



[*********************100%%**********************]  1 of 1 completed

3 - Getting historical data for CLBR



[*********************100%%**********************]  1 of 1 completed

4 - Getting historical data for ANSC



[*********************100%%**********************]  1 of 1 completed

5 - Getting historical data for AITR



[*********************100%%**********************]  1 of 1 completed

6 - Getting historical data for SPKL



[*********************100%%**********************]  1 of 1 completed

7 - Getting historical data for QETA





8 - Getting historical data for RYZB


[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['RYZB']: Exception('%ticker%: No timezone found, symbol may be delisted')
[*********************100%%**********************]  1 of 1 completed

9 - Getting historical data for NNAG



[*********************100%%**********************]  1 of 1 completed

10 - Getting historical data for HYAC



[*********************100%%**********************]  1 of 1 completed

11 - Getting historical data for KVAC



[*********************100%%**********************]  1 of 1 completed

12 - Getting historical data for NETD



[*********************100%%**********************]  1 of 1 completed

13 - Getting historical data for BOWN



[*********************100%%**********************]  1 of 1 completed

14 - Getting historical data for BUJA



[*********************100%%**********************]  1 of 1 completed

15 - Getting historical data for IPXX



[*********************100%%**********************]  1 of 1 completed

16 - Getting historical data for ALCY



[*********************100%%**********************]  1 of 1 completed

17 - Getting historical data for GODN



[*********************100%%**********************]  1 of 1 completed

18 - Getting historical data for AACT



[*********************100%%**********************]  1 of 1 completed

19 - Getting historical data for OAKU



[*********************100%%**********************]  1 of 1 completed

20 - Getting historical data for FORL



[*********************100%%**********************]  1 of 1 completed

21 - Getting historical data for SBXC



[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['PTHR']: Exception('%ticker%: No timezone found, symbol may be delisted')


22 - Getting historical data for PTHR


[*********************100%%**********************]  1 of 1 completed

23 - Getting historical data for CETU





In [33]:
totally_failed_tickers

['RYZB', 'PTHR']

RYZB has been acquired and is no longer listed.  
PTHR Pono Capital Three, Inc. uses PTHRU as symbol on Yahoo

In [34]:
len(historyPrices)

277

In [35]:

historyPrices = yf.download(tickers="PTHRU", period="max", interval="1d")

[*********************100%%**********************]  1 of 1 completed


# TODO: Rename PHTR to PHTRU and download the data from IPO to max available data.

In [36]:
historyPrices

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-02-10,10.150,10.180,10.1400,10.15,10.15,6306321
2023-02-13,10.150,10.165,10.1400,10.16,10.16,345450
2023-02-14,10.155,10.165,10.1546,10.16,10.16,453796
2023-02-15,10.160,10.180,10.1600,10.16,10.16,106114
2023-02-16,10.160,10.170,10.1600,10.17,10.17,2295
...,...,...,...,...,...,...
2024-01-08,9.700,9.700,9.7000,9.70,9.70,0
2024-01-09,8.820,8.820,7.2800,7.28,7.28,406
2024-01-10,7.280,7.280,7.2800,7.28,7.28,0
2024-01-11,5.500,5.600,5.5000,5.60,5.60,200


In [37]:
# for i in range (1, 31):
#     # DEBUG: ipo_df['Adj Close_sh_m_'+str(i)+'d'] = ipo_df['Adj Close'].shift(i)
#     ipo_df["growth_dax_" + str(i) + "d"] = ipo_df["Adj Close"] / ipo_df[
#         "Adj Close"
#     ].shift(i)

## Question 3: Is Growth Concentrated in the Largest Stocks?

**Get the share of days (percentage as int) when Large Stocks outperform (growth_7d - growth over 7 periods back) the Largest stocks?**


Reuse [Code Snippet 5] to obtain OHLCV stats for 33 stocks 
for 10 full years of data (2014-01-01 to 2023-12-31):

`US_STOCKS = ['MSFT', 'AAPL', 'GOOG', 'NVDA', 'AMZN', 'META', 'BRK-B', 'LLY', 'AVGO','V', 'JPM']`

`EU_STOCKS = ['NVO','MC.PA', 'ASML', 'RMS.PA', 'OR.PA', 'SAP', 'ACN', 'TTE', 'SIE.DE','IDEXY','CDI.PA']`

`INDIA_STOCKS = ['RELIANCE.NS','TCS.NS','HDB','BHARTIARTL.NS','IBN','SBIN.NS','LICI.NS','INFY','ITC.NS','HINDUNILVR.NS','LT.NS']`

`LARGEST_STOCKS = US_STOCKS + EU_STOCKS + INDIA_STOCKS`
<br/>

Now let's add the top 12-22 stocks (as of end-April 2024):
<br/>

`NEW_US = ['TSLA','WMT','XOM','UNH','MA','PG','JNJ','MRK','HD','COST','ORCL']`

`NEW_EU = ['PRX.AS','CDI.PA','AIR.PA','SU.PA','ETN','SNY','BUD','DTE.DE','ALV.DE','MDT','AI.PA','EL.PA']`

`NEW_INDIA = ['BAJFINANCE.NS','MARUTI.NS','HCLTECH.NS','TATAMOTORS.NS','SUNPHARMA.NS','ONGC.NS','ADANIENT.NS','ADANIENT.NS','NTPC.NS','KOTAKBANK.NS','TITAN.NS']`

`LARGE_STOCKS = NEW_EU + NEW_US + NEW_INDIA`

You should be able to obtain stats for 33 LARGEST STOCKS and 32 LARGE STOCKS.

Calculate  `growth_7d` for every stock and every day.
Get the average daily `growth_7d` for the LARGEST_STOCKS group vs. the LARGE_STOCKS group.

For example, for the first of data you should have:
| Date   |      ticker_category      |  growth_7d |
|----------|:-------------:|------:|
| 2014-01-01 |  LARGE | 1.011684 |
| 2014-01-01 |   LARGEST   |   1.011797 |

On that day, the LARGEST group was growing faster than LARGE one (new stocks).

Calculate the number of days when the LARGE GROUP (new smaller stocks) outperforms the LARGEST GROUP, divide it by the total number of trading days (which should be 2595 days), and convert it to a percentage (closest INTEGER value). For example, if you find that 1700 out of 2595 days meet this condition, it means that 1700/2595 = 0.655, or approximately 66% of days, the LARGE stocks were growing faster than the LARGEST ones. This suggests that you should consider extending your dataset with more stocks to seek higher growth.


In [38]:
ALL_TICKERS = ["MSFT", "AAPL"]

In [39]:
start = "2014-01-01"
end = "2023-12-31"
window = 7

def get_avg_x_day_growth(tickers, start=start, end=end, window=window):
    stocks_df = pd.DataFrame({"A": []})

    for i, ticker in enumerate(tickers):
        print(i, ticker)

        # Work with stock prices
        historyPrices = yf.download(tickers=ticker, start=start, end=end, interval="1d")

        # generate features for historical prices, and what we want to predict
        historyPrices["Ticker"] = ticker
        # historyPrices["Year"] = historyPrices.index.year
        # historyPrices["Month"] = historyPrices.index.month
        # historyPrices["Weekday"] = historyPrices.index.weekday
        historyPrices["Date"] = historyPrices.index.date

        # historical returns
        for i in [window]:
            historyPrices["growth_" + str(i) + "d"] = historyPrices[
                "Adj Close"
            ] / historyPrices["Adj Close"].shift(i)


        # sleep 1 sec between downloads - not to overload the API server
        time.sleep(1)

        if stocks_df.empty:
            stocks_df = historyPrices
        else:
            stocks_df = pd.concat([stocks_df, historyPrices], ignore_index=True)

    return stocks_df

In [40]:
US_STOCKS = ['MSFT', 'AAPL', 'GOOG', 'NVDA', 'AMZN', 'META', 'BRK-B', 'LLY', 'AVGO','V', 'JPM']
EU_STOCKS = ['NVO','MC.PA', 'ASML', 'RMS.PA', 'OR.PA', 'SAP', 'ACN', 'TTE', 'SIE.DE','IDEXY','CDI.PA']
INDIA_STOCKS = ['RELIANCE.NS','TCS.NS','HDB','BHARTIARTL.NS','IBN','SBIN.NS','LICI.NS','INFY','ITC.NS','HINDUNILVR.NS','LT.NS']

LARGEST_STOCKS = US_STOCKS + EU_STOCKS + INDIA_STOCKS

In [41]:
us_stocks_dfs = get_avg_x_day_growth(US_STOCKS, start=start, end=end, window=7)

0 MSFT


[*********************100%%**********************]  1 of 1 completed




1 AAPL


[*********************100%%**********************]  1 of 1 completed


2 GOOG


[*********************100%%**********************]  1 of 1 completed


3 NVDA


[*********************100%%**********************]  1 of 1 completed


4 AMZN


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

5 META





6 BRK-B


[*********************100%%**********************]  1 of 1 completed


7 LLY


[*********************100%%**********************]  1 of 1 completed


8 AVGO


[*********************100%%**********************]  1 of 1 completed


9 V


[*********************100%%**********************]  1 of 1 completed


10 JPM


[*********************100%%**********************]  1 of 1 completed


In [42]:
us_stocks_dfs.growth_7d.mean()

1.007611663488794

In [43]:
eu_stocks_dfs = get_avg_x_day_growth(EU_STOCKS, start=start, end=end, window=7)

0 NVO


[*********************100%%**********************]  1 of 1 completed


1 MC.PA


[*********************100%%**********************]  1 of 1 completed


2 ASML


[*********************100%%**********************]  1 of 1 completed


3 RMS.PA


[*********************100%%**********************]  1 of 1 completed


4 OR.PA


[*********************100%%**********************]  1 of 1 completed


5 SAP


[*********************100%%**********************]  1 of 1 completed


6 ACN


[*********************100%%**********************]  1 of 1 completed


7 TTE


[*********************100%%**********************]  1 of 1 completed


8 SIE.DE


[*********************100%%**********************]  1 of 1 completed


9 IDEXY


[*********************100%%**********************]  1 of 1 completed


10 CDI.PA


[*********************100%%**********************]  1 of 1 completed


In [44]:
eu_stocks_dfs.growth_7d.mean()

1.0050238763359471

In [45]:
india_stocks_dfs = get_avg_x_day_growth(INDIA_STOCKS, start=start, end=end, window=7)

0 RELIANCE.NS


[*********************100%%**********************]  1 of 1 completed


1 TCS.NS


[*********************100%%**********************]  1 of 1 completed


2 HDB


[*********************100%%**********************]  1 of 1 completed


3 BHARTIARTL.NS


[*********************100%%**********************]  1 of 1 completed


4 IBN


[*********************100%%**********************]  1 of 1 completed


5 SBIN.NS


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

6 LICI.NS





7 INFY


[*********************100%%**********************]  1 of 1 completed


8 ITC.NS


[*********************100%%**********************]  1 of 1 completed


9 HINDUNILVR.NS


[*********************100%%**********************]  1 of 1 completed


10 LT.NS


[*********************100%%**********************]  1 of 1 completed


In [46]:
india_stocks_dfs.growth_7d.mean()

1.005157121333949

In [47]:
largest_stocks_df = pd.concat([us_stocks_dfs, eu_stocks_dfs, india_stocks_dfs])

### Large Stocker 7 day growth

In [48]:
NEW_US = ['TSLA','WMT','XOM','UNH','MA','PG','JNJ','MRK','HD','COST','ORCL']

NEW_EU = ['PRX.AS','CDI.PA','AIR.PA','SU.PA','ETN','SNY','BUD','DTE.DE','ALV.DE','MDT','AI.PA','EL.PA']

NEW_INDIA = ['BAJFINANCE.NS','MARUTI.NS','HCLTECH.NS','TATAMOTORS.NS','SUNPHARMA.NS','ONGC.NS','ADANIENT.NS','ADANIENT.NS','NTPC.NS','KOTAKBANK.NS','TITAN.NS']

LARGE_STOCKS = NEW_EU + NEW_US + NEW_INDIA

In [49]:
new_us_stocks_dfs = get_avg_x_day_growth(NEW_US, start=start, end=end, window=7)
new_eu_stocks_dfs = get_avg_x_day_growth(NEW_EU, start=start, end=end, window=7)
new_india_stocks_dfs = get_avg_x_day_growth(NEW_INDIA, start=start, end=end, window=7)

0 TSLA


[*********************100%%**********************]  1 of 1 completed


1 WMT


[*********************100%%**********************]  1 of 1 completed


2 XOM


[*********************100%%**********************]  1 of 1 completed


3 UNH


[*********************100%%**********************]  1 of 1 completed


4 MA


[*********************100%%**********************]  1 of 1 completed


5 PG


[*********************100%%**********************]  1 of 1 completed


6 JNJ


[*********************100%%**********************]  1 of 1 completed


7 MRK


[*********************100%%**********************]  1 of 1 completed


8 HD


[*********************100%%**********************]  1 of 1 completed


9 COST


[*********************100%%**********************]  1 of 1 completed


10 ORCL


[*********************100%%**********************]  1 of 1 completed


0 PRX.AS


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

1 CDI.PA





2 AIR.PA


[*********************100%%**********************]  1 of 1 completed


3 SU.PA


[*********************100%%**********************]  1 of 1 completed


4 ETN


[*********************100%%**********************]  1 of 1 completed


5 SNY


[*********************100%%**********************]  1 of 1 completed


6 BUD


[*********************100%%**********************]  1 of 1 completed


7 DTE.DE


[*********************100%%**********************]  1 of 1 completed


8 ALV.DE


[*********************100%%**********************]  1 of 1 completed


9 MDT


[*********************100%%**********************]  1 of 1 completed


10 AI.PA


[*********************100%%**********************]  1 of 1 completed


11 EL.PA


[*********************100%%**********************]  1 of 1 completed


0 BAJFINANCE.NS


[*********************100%%**********************]  1 of 1 completed


1 MARUTI.NS


[*********************100%%**********************]  1 of 1 completed


2 HCLTECH.NS


[*********************100%%**********************]  1 of 1 completed


3 TATAMOTORS.NS


[*********************100%%**********************]  1 of 1 completed


4 SUNPHARMA.NS


[*********************100%%**********************]  1 of 1 completed


5 ONGC.NS


[*********************100%%**********************]  1 of 1 completed


6 ADANIENT.NS


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

7 ADANIENT.NS





8 NTPC.NS


[*********************100%%**********************]  1 of 1 completed


9 KOTAKBANK.NS


[*********************100%%**********************]  1 of 1 completed


10 TITAN.NS


[*********************100%%**********************]  1 of 1 completed


In [50]:
new_stocks_df = pd.concat([new_us_stocks_dfs, new_eu_stocks_dfs, new_india_stocks_dfs])


In [51]:
new_stocks_df

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume,Ticker,Date,growth_7d
0,9.986667,10.165333,9.770000,10.006667,10.006667,92826000,TSLA,2014-01-02,
1,10.000000,10.146000,9.906667,9.970667,9.970667,70425000,TSLA,2014-01-03,
2,10.000000,10.026667,9.682667,9.800000,9.800000,80416500,TSLA,2014-01-06,
3,9.841333,10.026667,9.683333,9.957333,9.957333,75511500,TSLA,2014-01-07,
4,9.923333,10.246667,9.917333,10.085333,10.085333,92448000,TSLA,2014-01-08,
...,...,...,...,...,...,...,...,...,...
27110,3580.000000,3638.449951,3560.550049,3627.350098,3627.350098,777099,TITAN.NS,2023-12-22,1.006940
27111,3635.000000,3665.000000,3623.449951,3656.699951,3656.699951,526101,TITAN.NS,2023-12-26,1.018182
27112,3668.000000,3695.000000,3645.000000,3689.250000,3689.250000,666625,TITAN.NS,2023-12-27,1.024635
27113,3699.899902,3737.000000,3680.699951,3715.100098,3715.100098,1033648,TITAN.NS,2023-12-28,1.026384


In [52]:
new_stocks_df.Date.unique()

array([datetime.date(2014, 1, 2), datetime.date(2014, 1, 3),
       datetime.date(2014, 1, 6), ..., datetime.date(2020, 11, 14),
       datetime.date(2021, 1, 1), datetime.date(2022, 12, 26)],
      dtype=object)

In [53]:
new_stocks_df["Ticker"].unique()

array(['TSLA', 'WMT', 'XOM', 'UNH', 'MA', 'PG', 'JNJ', 'MRK', 'HD',
       'COST', 'ORCL', 'PRX.AS', 'CDI.PA', 'AIR.PA', 'SU.PA', 'ETN',
       'SNY', 'BUD', 'DTE.DE', 'ALV.DE', 'MDT', 'AI.PA', 'EL.PA',
       'BAJFINANCE.NS', 'MARUTI.NS', 'HCLTECH.NS', 'TATAMOTORS.NS',
       'SUNPHARMA.NS', 'ONGC.NS', 'ADANIENT.NS', 'NTPC.NS',
       'KOTAKBANK.NS', 'TITAN.NS'], dtype=object)

In [54]:
new_grouped_df = new_stocks_df.groupby(by=["Date"])["growth_7d"].mean()
new_grouped_df

Date
2014-01-01         NaN
2014-01-02         NaN
2014-01-03         NaN
2014-01-06         NaN
2014-01-07         NaN
                ...   
2023-12-22    1.003014
2023-12-26    1.013505
2023-12-27    1.002167
2023-12-28    0.998583
2023-12-29    0.999313
Name: growth_7d, Length: 2595, dtype: float64

In [55]:
largest_grouped_df = largest_stocks_df.groupby(by=["Date"])["growth_7d"].mean()
largest_grouped_df

Date
2014-01-01         NaN
2014-01-02         NaN
2014-01-03         NaN
2014-01-06         NaN
2014-01-07         NaN
                ...   
2023-12-22    1.013788
2023-12-26    1.014900
2023-12-27    1.014155
2023-12-28    1.008533
2023-12-29    1.005337
Name: growth_7d, Length: 2595, dtype: float64

In [56]:
merged_df = pd.merge(largest_grouped_df, new_grouped_df, on="Date", how="inner").rename(
    columns={
        "growth_7d_x": "largest_growth_7d",
        "growth_7d_y": "newest_growth_7d",
    }
)
merged_df

Unnamed: 0_level_0,largest_growth_7d,newest_growth_7d
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2014-01-01,,
2014-01-02,,
2014-01-03,,
2014-01-06,,
2014-01-07,,
...,...,...
2023-12-22,1.013788,1.003014
2023-12-26,1.014900,1.013505
2023-12-27,1.014155,1.002167
2023-12-28,1.008533,0.998583


In [57]:
merged_df["largest > newest"] = merged_df["largest_growth_7d"] > merged_df["newest_growth_7d"]
merged_df

Unnamed: 0_level_0,largest_growth_7d,newest_growth_7d,largest > newest
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2014-01-01,,,False
2014-01-02,,,False
2014-01-03,,,False
2014-01-06,,,False
2014-01-07,,,False
...,...,...,...
2023-12-22,1.013788,1.003014,True
2023-12-26,1.014900,1.013505,True
2023-12-27,1.014155,1.002167,True
2023-12-28,1.008533,0.998583,True


In [58]:
counts = merged_df["largest > newest"].value_counts()
counts

largest > newest
True     1361
False    1234
Name: count, dtype: int64

In [59]:
counts[True]

1361

In [60]:
result = counts[False]/(counts[False] + counts[True])*100
result

47.552986512524086

## Question 4: Trying Another Technical Indicators strategy

**What's the total gross profit (in THOUSANDS of $) you'll get from trading on CCI (no fees assumption)?**


First, run the entire Colab to obtain the full DataFrame of data (after [Code Snippet 9]), and truncate it to the last full 10 years of data (2014-01-01 to 2023-12-31).
If you encounter any difficulties running the Colab - you can download it using this [link](https://drive.google.com/file/d/1m3Qisfs2XfWk6Sw_Uk5kHLWqwQ0q8SKb/view?usp=sharing).

Let's assume you've learned about the awesome **CCI indicator** ([Commodity Channel Index](https://www.investopedia.com/terms/c/commoditychannelindex.asp)), and decided to use only it for your operations.

You defined the "defensive" value of a high threshould of 200, and you trade only on Fridays (`Date.dt.dayofweek()==4`).

That is, every time you see that CCI is >200 for any stock (out of those 33), you'll invest $1000 (each record when CCI>200) at Adj.Close price and hold it for 1 week (5 trading days) in order to sell at the Adj. Close price.

What's the expected gross profit (no fees) that you get in THOUSANDS $ (closest integer value) over many operations in 10 years?
One operation calculations: if you invested $1000 and received $1010 in 5 days - you add $10 to gross profit, if you received $980 - add -$20 to gross profit.
You need to sum these results over all trades (460 times in 10 years).

In [65]:
# load the prepared dataframe
df = pd.read_parquet("stocks_df_combined_2024_05_03.parquet.brotli")


In [66]:
df.head()

Unnamed: 0,Open,High,Low,Close,Adj Close_x,Volume,Ticker,Year,Month,Weekday,...,growth_brent_oil_7d,growth_brent_oil_30d,growth_brent_oil_90d,growth_brent_oil_365d,growth_btc_usd_1d,growth_btc_usd_3d,growth_btc_usd_7d,growth_btc_usd_30d,growth_btc_usd_90d,growth_btc_usd_365d
0,0.088542,0.101563,0.088542,0.097222,0.060163,1031789000.0,MSFT,1986,1986-03-01,3,...,,,,,,,,,,
1,0.097222,0.102431,0.097222,0.100694,0.062311,308160000.0,MSFT,1986,1986-03-01,4,...,,,,,,,,,,
2,0.100694,0.103299,0.100694,0.102431,0.063386,133171200.0,MSFT,1986,1986-03-01,0,...,,,,,,,,,,
3,0.102431,0.103299,0.098958,0.099826,0.061774,67766400.0,MSFT,1986,1986-03-01,1,...,,,,,,,,,,
4,0.099826,0.100694,0.097222,0.09809,0.0607,47894400.0,MSFT,1986,1986-03-01,2,...,,,,,,,,,,


In [67]:
df["cci"] = talib.CCI(df.High.values, df.Low.values, df.Close.values, timeperiod=14)


In [68]:
df.columns

Index(['Open', 'High', 'Low', 'Close', 'Adj Close_x', 'Volume', 'Ticker',
       'Year', 'Month', 'Weekday',
       ...
       'growth_brent_oil_7d', 'growth_brent_oil_30d', 'growth_brent_oil_90d',
       'growth_brent_oil_365d', 'growth_btc_usd_1d', 'growth_btc_usd_3d',
       'growth_btc_usd_7d', 'growth_btc_usd_30d', 'growth_btc_usd_90d',
       'growth_btc_usd_365d'],
      dtype='object', length=202)

In [70]:
df.Weekday

0       3
1       4
2       0
3       1
4       2
       ..
5420    4
5421    0
5422    1
5423    3
5424    4
Name: Weekday, Length: 221076, dtype: int32

In [73]:
# runcate the dataframe to the last full 10 years of data (2014-01-01 to 2023-12-31)
df[(df["Date"] > "2014-01-01") & (df["Date"] < "2023-12-31")]

Unnamed: 0,Open,High,Low,Close,Adj Close_x,Volume,Ticker,Year,Month,Weekday,...,growth_brent_oil_7d,growth_brent_oil_30d,growth_brent_oil_90d,growth_brent_oil_365d,growth_btc_usd_1d,growth_btc_usd_3d,growth_btc_usd_7d,growth_btc_usd_30d,growth_btc_usd_90d,growth_btc_usd_365d
7011,37.349998,37.400002,37.099998,37.160000,31.233055,30632200.0,MSFT,2014,2014-01-01,3,...,0.964302,0.992998,0.970030,1.158676,,,,,,
7012,37.200001,37.220001,36.599998,36.910000,31.022942,31134800.0,MSFT,2014,2014-01-01,4,...,0.958139,0.984707,0.961500,1.143209,,,,,,
7013,36.849998,36.889999,36.110001,36.130001,30.367346,43603700.0,MSFT,2014,2014-01-01,0,...,0.953798,0.998223,0.968951,1.168236,,,,,,
7014,36.330002,36.490002,36.209999,36.410000,30.602680,35802800.0,MSFT,2014,2014-01-01,1,...,0.958653,0.993430,0.977598,1.097648,,,,,,
7015,36.000000,36.139999,35.580002,35.759998,30.056351,59971700.0,MSFT,2014,2014-01-01,2,...,0.955161,0.973383,0.974977,1.100781,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5338,3424.000000,3496.000000,3408.600098,3477.949951,3477.949951,1681707.0,LT.NS,2023,2023-12-01,4,...,1.064772,0.971018,0.939967,0.797881,1.002935,1.040865,1.049324,1.175398,1.655339,2.614201
5339,3477.949951,3508.350098,3477.949951,3490.050049,3490.050049,1072263.0,LT.NS,2023,2023-12-01,1,...,1.058217,0.982429,0.956014,0.801404,0.974945,0.972127,1.005911,1.134509,1.613511,2.513055
5340,3510.000000,3549.000000,3504.149902,3544.000000,3544.000000,1389266.0,LT.NS,2023,2023-12-01,2,...,1.040496,0.965806,0.943050,0.749506,1.021694,1.009920,0.995203,1.166121,1.607712,2.598696
5341,3545.000000,3559.949951,3500.500000,3518.050049,3518.050049,3371121.0,LT.NS,2023,2023-12-01,3,...,1.005645,0.965632,0.932881,0.730228,0.981240,0.977409,0.971705,1.126794,1.583988,2.575301


In [79]:
df_reduced = df[(df["Date"] > "2014-01-01") & (df["Date"] < "2023-12-31")].copy()

In [80]:
df_reduced.info()

<class 'pandas.core.frame.DataFrame'>
Index: 80755 entries, 7011 to 5342
Columns: 202 entries, Open to growth_btc_usd_365d
dtypes: datetime64[ns](3), float64(128), int32(66), int64(3), object(2)
memory usage: 104.7+ MB


In [81]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 221076 entries, 0 to 5424
Columns: 202 entries, Open to growth_btc_usd_365d
dtypes: datetime64[ns](3), float64(128), int32(66), int64(3), object(2)
memory usage: 286.7+ MB


In [104]:
# create a signal when cci > 100 and weekday ==4 (Friday)
threshold = 200
# Create a signal column
df_reduced['Signal'] = (df_reduced['cci'] > threshold) & (df_reduced['Weekday'] == 4)

# Convert boolean values to integers (0 or 1)
df_reduced['Signal'] = df_reduced['Signal'].astype(int)

In [109]:
#df_reduced["Adj Close_5"]= df_reduced["Adj Close_x"].shift(5) # WARNING: This produces incorrect results 
df_reduced["Adj Close_5"] =""

In [116]:
#df_pivot = df_reduced.pivot(index=)

In [143]:
multi_df = df_reduced.set_index(["Ticker", "Date"])
# Sort index
multi_df.sort_index(inplace=True)
multi_df["Adj Close_5"] = multi_df["Adj Close_x"].shift(5)
multi_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Open,High,Low,Close,Adj Close_x,Volume,Year,Month,Weekday,growth_1d,...,growth_brent_oil_90d,growth_brent_oil_365d,growth_btc_usd_1d,growth_btc_usd_3d,growth_btc_usd_7d,growth_btc_usd_30d,growth_btc_usd_90d,growth_btc_usd_365d,Signal,Adj Close_5
Ticker,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
AAPL,2014-01-02,19.845715,19.893929,19.715000,19.754642,17.296658,234684800.0,2014,2014-01-01,3,0.985937,...,0.970030,1.158676,,,,,,,0,
AAPL,2014-01-03,19.745001,19.775000,19.301071,19.320715,16.916714,392467600.0,2014,2014-01-01,4,0.978034,...,0.961500,1.143209,,,,,,,0,
AAPL,2014-01-06,19.194643,19.528570,19.057142,19.426071,17.008961,412610800.0,2014,2014-01-01,0,1.005453,...,0.968951,1.168236,,,,,,,0,
AAPL,2014-01-07,19.440001,19.498571,19.211430,19.287144,16.887331,317209200.0,2014,2014-01-01,1,0.992849,...,0.977598,1.097648,,,,,,,0,
AAPL,2014-01-08,19.243214,19.484285,19.238930,19.409286,16.994268,258529600.0,2014,2014-01-01,2,1.006332,...,0.974977,1.100781,,,,,,,0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
V,2023-12-22,260.000000,261.209991,258.220001,258.429993,257.949005,5111500.0,2023,2023-12-01,4,0.995723,...,0.939967,0.797881,1.002935,1.040865,1.049324,1.175398,1.655339,2.614201,0,257.549744
V,2023-12-26,258.529999,259.739990,258.529999,259.160004,258.677643,2085100.0,2023,2023-12-01,1,1.002825,...,0.956014,0.801404,0.974945,0.972127,1.005911,1.134509,1.613511,2.513055,0,257.889099
V,2023-12-27,259.250000,259.769989,258.309998,258.929993,258.448059,4034700.0,2023,2023-12-01,2,0.999112,...,0.943050,0.749506,1.021694,1.009920,0.995203,1.166121,1.607712,2.598696,0,259.506104
V,2023-12-28,258.540009,260.970001,258.540009,260.399994,259.915344,3020500.0,2023,2023-12-01,3,1.005677,...,0.932881,0.730228,0.981240,0.977409,0.971705,1.126794,1.583988,2.575301,0,256.631439


In [149]:
multi_df.loc[("ACN",)]["Adj Close_5"] = multi_df.loc[("ACN",)]["Adj Close_x"].shift(5)

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  multi_df.loc[("ACN",)]["Adj Close_5"] = multi_df.loc[("ACN",)]["Adj Close_x"].shift(5)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.

In [117]:
df_pivot = df_reduced.pivot(index="Date", columns=["Ticker"], values=["Adj Close_x","Adj Close_5", "Signal"]).copy()
df_pivot

Unnamed: 0_level_0,Adj Close_x,Adj Close_x,Adj Close_x,Adj Close_x,Adj Close_x,Adj Close_x,Adj Close_x,Adj Close_x,Adj Close_x,Adj Close_x,...,Signal,Signal,Signal,Signal,Signal,Signal,Signal,Signal,Signal,Signal
Ticker,AAPL,ACN,AMZN,ASML,AVGO,BHARTIARTL.NS,BRK-B,CDI.PA,GOOG,HDB,...,NVO,OR.PA,RELIANCE.NS,RMS.PA,SAP,SBIN.NS,SIE.DE,TCS.NS,TTE,V
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2014-01-02,17.296658,67.617828,19.8985,83.166664,39.869324,280.350159,117.5,108.00708,27.724083,15.844661,...,0,0,0,0,0,0,0,0,0,0
2014-01-03,16.916714,67.84285,19.822001,82.87851,40.020786,280.904541,117.57,108.086708,27.521841,15.844661,...,0,0,0,0,0,0,0,0,0,0
2014-01-06,17.008961,67.126099,19.681499,81.176628,39.748173,280.904541,116.279999,105.937706,27.828691,16.172998,...,0,0,0,0,0,0,0,0,0,0
2014-01-07,16.887331,67.942871,19.901501,80.492279,40.126816,282.908875,116.190002,105.062187,28.365179,16.158926,...,0,0,0,0,0,0,0,0,0,0
2014-01-08,16.994268,68.467964,20.096001,80.708397,40.725029,283.50592,115.339996,104.505028,28.42421,16.0229,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-12-22,193.353287,351.798248,153.419998,749.656372,1117.222046,989.549988,356.470001,700.377502,142.720001,67.0,...,0,0,0,0,0,0,0,0,0,0
2023-12-26,192.803986,350.785828,153.410004,759.767517,1127.089966,999.849976,356.829987,,142.820007,66.720001,...,0,,0,,0,0,,0,0,0
2023-12-27,192.903839,351.172943,153.339996,761.112427,1121.394287,1021.200012,356.950012,695.922852,141.440002,67.040001,...,0,0,0,0,0,0,0,0,0,0
2023-12-28,193.333298,348.959625,153.380005,754.955994,1117.650269,1036.699951,357.570007,697.90271,141.279999,67.220001,...,0,0,0,0,0,0,0,0,0,0


In [126]:
Adj_Close = df_pivot["Adj Close_x"]["AAPL"]

In [125]:
Adj_Close_5 = df_pivot["Adj Close_x"]["AAPL"].shift(5)
Adj_Close_5

Date
2014-01-02          None
2014-01-03          None
2014-01-06          None
2014-01-07          None
2014-01-08          None
                 ...    
2023-12-22    197.318222
2023-12-26     195.64035
2023-12-27    196.689026
2023-12-28    194.581711
2023-12-29    194.431885
Name: AAPL, Length: 2594, dtype: object

In [124]:
signal = df_pivot["Signal"]["AAPL"]
signal

Date
2014-01-02    0
2014-01-03    0
2014-01-06    0
2014-01-07    0
2014-01-08    0
             ..
2023-12-22    0
2023-12-26    0
2023-12-27    0
2023-12-28    0
2023-12-29    0
Name: AAPL, Length: 2594, dtype: object

In [133]:
type(signal)

pandas.core.series.Series

In [162]:
tickers = list(df_reduced.Ticker.unique())
tickers

['MSFT',
 'AAPL',
 'GOOG',
 'NVDA',
 'AMZN',
 'META',
 'BRK-B',
 'LLY',
 'AVGO',
 'V',
 'JPM',
 'NVO',
 'MC.PA',
 'ASML',
 'RMS.PA',
 'OR.PA',
 'SAP',
 'ACN',
 'TTE',
 'SIE.DE',
 'IDEXY',
 'CDI.PA',
 'RELIANCE.NS',
 'TCS.NS',
 'HDB',
 'BHARTIARTL.NS',
 'IBN',
 'SBIN.NS',
 'LICI.NS',
 'INFY',
 'ITC.NS',
 'HINDUNILVR.NS',
 'LT.NS']

In [167]:
stock_dfs = []
gross_profit_total = 0
num_trades_total = 0

for ticker in tickers:
    stock_df = pd.DataFrame([df_pivot["Adj Close_x"][ticker], df_pivot["Adj Close_x"][ticker].shift(-5), df_pivot["Signal"][ticker]]).T
    stock_df.columns = ["Adj_Close", "Adj_Close_5", "signal"]
    stock_df["growth_5d"] = stock_df["Adj_Close_5"]/ stock_df["Adj_Close"] -1
    stock_df["gross_profit"] = stock_df["growth_5d"]* stock_df["signal"] * 1000
    stock_dfs.append(stock_df)
    gross_profit = stock_df["gross_profit"].sum()
    gross_profit_total = gross_profit_total + gross_profit
    num_trades = stock_df["signal"].sum()
    num_trades_total = num_trades_total + num_trades
    print(f"Ticker: {ticker} - # of trades = {num_trades} - gross profit = {gross_profit}")

Ticker: MSFT - # of trades = 19.0 - gross profit = 64.2493616583778
Ticker: AAPL - # of trades = 14.0 - gross profit = 41.67024522713312
Ticker: GOOG - # of trades = 19.0 - gross profit = -32.838931524184616
Ticker: NVDA - # of trades = 15.0 - gross profit = 240.87234881045958
Ticker: AMZN - # of trades = 24.0 - gross profit = 240.71163686476348
Ticker: META - # of trades = 12.0 - gross profit = -192.81351131070198
Ticker: BRK-B - # of trades = 8.0 - gross profit = 23.683325117288653
Ticker: LLY - # of trades = 14.0 - gross profit = -191.39254541822459
Ticker: AVGO - # of trades = 25.0 - gross profit = -69.27355238209086
Ticker: V - # of trades = 13.0 - gross profit = 33.05624507588156
Ticker: JPM - # of trades = 11.0 - gross profit = -35.899064213728124
Ticker: NVO - # of trades = 19.0 - gross profit = 188.75557272213572
Ticker: MC.PA - # of trades = 22.0 - gross profit = -115.67854791122235
Ticker: ASML - # of trades = 11.0 - gross profit = 76.15402326919516
Ticker: RMS.PA - # of tra

In [171]:
print(f"Gross profit: {gross_profit_total}")
print(f"Number of trades: {num_trades_total}")

Gross profit: 805.1770998486627
Number of trades: 460.0


Costs for 2 x 460 (Buy + Sell) transactions with Degiro would be 2,210.50 €. This strategy is not profitable when commissions are included.

In [173]:
# Print summary of all the trades
# for ticker, df in zip (tickers, stock_dfs):
#     print(ticker)
#     print(df[df["signal"] == 1])