In [1]:
# Import dependencies
# Note: you must pip install yfinance prior to importing
import pandas as pd
import yfinance as yf
import datetime
import numpy as np
from sqlalchemy import create_engine
import psycopg2

In [2]:
# Example of .info method and company information available
bli = yf.Ticker("BLI")
bli.info

{'zip': '94608',
 'sector': 'Healthcare',
 'fullTimeEmployees': 293,
 'longBusinessSummary': 'Berkeley Lights, Inc., a digital cell biology company, focuses on enabling and accelerating the rapid development and commercialization of biotherapeutics and other cell-based products. The company offers an integrated platform, which comprise of proprietary consumables, including OptoSelect chips and reagent kits, automation systems, and application and workflow software. It serves in North America, the Asia Pacific, and Europe. Berkeley Lights, Inc. was incorporated in 2011 and is headquartered in Emeryville, California.',
 'city': 'EmeryVille',
 'phone': '510 858 2855',
 'state': 'CA',
 'country': 'United States',
 'companyOfficers': [],
 'website': 'https://www.berkeleylights.com',
 'maxAge': 1,
 'address1': '5858 Horton Street',
 'industry': 'Biotechnology',
 'address2': 'Suite 320',
 'ebitdaMargins': -0.88914,
 'profitMargins': -0.98206,
 'grossMargins': 0.66913,
 'operatingCashflow': -4

In [3]:
# Read in list of companies that have gone public (IPO) in the last 5 years
yr1 = pd.read_csv("../resources/ipos-2019.csv")
yr2 = pd.read_csv("../resources/ipos-2020.csv")
yr3 = pd.read_csv("../resources/ipos-2021.csv")
yr4 = pd.read_csv("../resources/ipos-2022.csv")

In [4]:
# Build list of IPO ticker symbols
y1_tickrs = [tckr for tckr in yr1.Symbol]
y2_tickrs = [tckr for tckr in yr2.Symbol]
y3_tickrs = [tckr for tckr in yr3.Symbol]
y4_tickrs = [tckr for tckr in yr4.Symbol]

# Create full list of ticker symbols for the last 4 years
symbols = y1_tickrs + y2_tickrs + y3_tickrs + y4_tickrs
# Remove null values from symbols list
symbols = [x for x in symbols if str(x) != 'nan']
# Replace '=' sign from substrings in symbols list to complete clean list of ticker symbols
for sym in symbols:
    if "=" in sym:
        symbols[symbols.index(sym)] = sym.replace("=","")

In [5]:
# Create empty dataframe for stock data
stock_data = pd.DataFrame()

# Iterate through each symbol in the IPO list
for i in symbols:
    # print the symbol which is being downloaded
    print( str(symbols.index(i)) + str(' : ') + i, sep=',', end=',', flush=True)  
    
    try:
        # download the stock price 
        stock = []
        stock = yf.download(i,period='max', progress=False)
        
        # append the individual stock prices 
        if len(stock) == 0:
            None
        else:
            stock['TCKR']=i
            stock_data = stock_data.append(stock,sort=False)
            
    except Exception:
        None
        
# Reset index on stock_data DataFrame to preserve date
stock_data = stock_data.reset_index(level=0)

0 : MKDTY,1 : INDO,2 : MNPR,3 : CIIC,
1 Failed download:
- CIIC: No data found, symbol may be delisted
4 : HCCO,
1 Failed download:
- HCCO: No data found, symbol may be delisted
5 : OCFT,6 : SPT,7 : BILL,8 : EH,9 : LIVK,
1 Failed download:
- LIVK: No data found, symbol may be delisted
10 : XP,11 : LMPX,12 : ALUS,
1 Failed download:
- ALUS: No data found, symbol may be delisted
13 : CHPM,
1 Failed download:
- CHPM: No data found, symbol may be delisted
14 : PINE,15 : PTAC,
1 Failed download:
- PTAC: No data found, symbol may be delisted
16 : CAN,17 : GRNV,
1 Failed download:
- GRNV: No data found, symbol may be delisted
18 : SITM,19 : AMHC,
1 Failed download:
- AMHC: No data found, symbol may be delisted
20 : SAQN,
1 Failed download:
- SAQN: No data found, symbol may be delisted
21 : YAYO,22 : ETNB,23 : CNSP,24 : JIH,
1 Failed download:
- JIH: No data found, symbol may be delisted
25 : KRKR,26 : MOHOY,27 : SRAC,
1 Failed download:
- SRAC: No data found, symbol may be delisted
28 : TELA,

303 : SPFR,
1 Failed download:
- SPFR: No data found, symbol may be delisted
304 : CAP,
1 Failed download:
- CAP: No data found, symbol may be delisted
305 : RSVA,
1 Failed download:
- RSVA: No data found, symbol may be delisted
306 : TACA,
1 Failed download:
- TACA: No data found, symbol may be delisted
307 : SGAM,
1 Failed download:
- SGAM: No data found, symbol may be delisted
308 : FRX,
1 Failed download:
- FRX: No data found, symbol may be delisted
309 : SPRQ,
1 Failed download:
- SPRQ: No data found, symbol may be delisted
310 : GNPK,
1 Failed download:
- GNPK: No data found, symbol may be delisted
311 : HFEN,
1 Failed download:
- HFEN: No data found, symbol may be delisted
312 : OZON,313 : SV,
1 Failed download:
- SV: No data found, symbol may be delisted
314 : TINV,315 : VCVC,
1 Failed download:
- VCVC: No data found, symbol may be delisted
316 : VMAR,317 : BREZ,318 : ARBG,319 : CAS,
1 Failed download:
- CAS: No data found, symbol may be delisted
320 : KWAC,321 : LSAQ,
1 Failed

524 : HZAC,
1 Failed download:
- HZAC: No data found, symbol may be delisted
525 : KYMR,526 : NNOX,527 : BFT,
1 Failed download:
- BFT: No data found, symbol may be delisted
528 : HRMY,529 : INBX,530 : AONE,
1 Failed download:
- AONE: No data found, symbol may be delisted
531 : FIII,
1 Failed download:
- FIII: No data found, symbol may be delisted
532 : NGA,
1 Failed download:
- NGA: No data found, symbol may be delisted
533 : STPK,
1 Failed download:
- STPK: No data found, symbol may be delisted
534 : CVAC,535 : DCT,536 : DGNR,
1 Failed download:
- DGNR: No data found, symbol may be delisted
537 : DMYD,
1 Failed download:
- DMYD: No data found, symbol may be delisted
538 : LCAP,
1 Failed download:
- LCAP: No data found, symbol may be delisted
539 : BEKE,540 : NTST,541 : RBAC,
1 Failed download:
- RBAC: No data found, symbol may be delisted
542 : FAII,
1 Failed download:
- FAII: No data found, symbol may be delisted
543 : FSDC,
1 Failed download:
- FSDC: No data found, symbol may be de

709 : IMAB,710 : LIZI,711 : VEL,712 : GMFI,713 : WTMA,714 : AOGO,715 : GDNR,716 : NXGL,717 : LSPR,718 : NVAC,719 : SAGA,720 : APCA,721 : RCAC,722 : ADRT,723 : BNOX,724 : CFFS,725 : EMLD,726 : IMMX,727 : SANG,728 : SHAP,729 : AHRN,730 : BFAC,731 : EVE,732 : GNTA,733 : IOT,734 : IVCB,735 : IVCP,736 : PRLH,737 : KACL,738 : SIDU,739 : VINE,740 : ALSA,741 : BRKH,742 : ATEK,743 : FRBN,744 : FXCO,745 : GFGD,746 : HAIA,747 : PORT,748 : TCOA,749 : XPDB,750 : GGAA,751 : HCP,752 : HORI,753 : IGTA,754 : NRSN,755 : NU,756 : RJAC,757 : SCUA,758 : TGAA,759 : WEL,760 : CING,761 : REFI,762 : APXI,763 : DAOO,764 : GLLI,765 : JWAC,766 : MTVC,767 : BIOS,768 : BOCN,769 : BPAC,770 : STET,771 : GEEX,772 : ROC,773 : UTAA,774 : CMCA,775 : PACI,776 : ROCL,777 : TLGY,778 : CNGL,779 : AERC,780 : BRD,781 : MNTN,782 : MAAQ,783 : VHNA,784 : LAX,785 : LGTO,786 : AHI,787 : ALOR,788 : CREC,789 : FINW,790 : IFIN,791 : LITM,792 : NFNT,793 : ZING,794 : ARIZ,795 : BLEU,796 : SG,797 : BRZE,798 : IREN,799 : LFAC,800 : NETC,8

611 : PANA,1340 : ACBA,1341 : UTME,1342 : COMP,1343 : EM,1344 : KARO,1345 : ULCC,1346 : ACHL,1347 : COUR,1348 : MBTC,1349 : TWOA,1350 : ALHC,1351 : DSGN,1352 : EWTX,1353 : HYW,1354 : IKNA,1355 : TDUP,1356 : ZH,1357 : CRCT,1358 : DSEY,1359 : EEIQ,161 : LCA,1361 : LVTX,1362 : MSDA,1363 : OLK,1364 : SEMR,1365 : VZIO,1366 : ACVA,1367 : CRZN,1368 : DCRC,
1 Failed download:
- DCRC: No data found, symbol may be delisted
1369 : DISA,1370 : DOCN,1371 : KVSB,
1 Failed download:
- KVSB: No data found, symbol may be delisted
1372 : KVSC,1373 : NGC,1374 : ACTD,
1 Failed download:
- ACTD: No data found, symbol may be delisted
1375 : DGNU,1376 : GGPI,
1 Failed download:
- GGPI: No data found, symbol may be delisted
1377 : GLHA,1378 : LEGA,1379 : MOVE,1380 : NBST,1381 : NGCA,
1 Failed download:
- NGCA: No data found, symbol may be delisted
1382 : OPA,1383 : RKTA,1384 : STRE,1385 : TBSA,1386 : UPC,1387 : VGII,
1 Failed download:
- VGII: No data found, symbol may be delisted
1388 : AFAQ,1389 : AFCG,1390

1691 : HCIC,1692 : OCAX,1693 : PLTK,1694 : ROT,
1 Failed download:
- ROT: No data found, symbol may be delisted
1695 : TBA,
1 Failed download:
- TBA: No data found, symbol may be delisted
1696 : ENVI,
1 Failed download:
- ENVI: No data found, symbol may be delisted
1697 : MON,1698 : POSH,1699 : SLCR,1700 : WOOF,1701 : AFRM,1702 : DLCA,1703 : FCAX,1704 : FINM,1705 : HLAH,1706 : MSGM,1707 : NGAB,
1 Failed download:
- NGAB: No data found, symbol may be delisted
1708 : PNTM,1709 : ADEX,1710 : BCYP,
1 Failed download:
- BCYP: No data found, symbol may be delisted
1711 : ENFA,
1 Failed download:
- ENFA: No data found, symbol may be delisted
1712 : GPAC,1713 : KUKE,1714 : QLI,1715 : SWET,1716 : LJAQ,1717 : CGEM,1718 : EPHY,1719 : EPWR,1720 : GRCL,1721 : GSAQ,
1 Failed download:
- GSAQ: No data found, symbol may be delisted
1722 : KLAQ,1723 : LHC,1724 : LWAC,
1 Failed download:
- LWAC: No data found, symbol may be delisted
1725 : MCAD,
1 Failed download:
- MCAD: No data found, symbol may be de

In [6]:
# Generate list of unique IPO's with financial data
listed_ipos = stock_data['TCKR'].unique()

# Build a dataframe for all company infomration
company_info = pd.DataFrame(listed_ipos, columns=['TCKR'])
company_growth = pd.DataFrame(listed_ipos, columns=['TCKR'])

# Iterate through rows to gather company info and add to DF
for index, row in company_info.iterrows():
    ticker = row.TCKR
    ticker_obj = yf.Ticker(ticker)
    ticker_info = ticker_obj.info
    print(index)
    try:
        company_info.loc[index, 'sector'] = ticker_info['sector']
    except KeyError:
        company_info.loc[index, 'sector'] = np.nan
    try:
        company_info.loc[index, 'industry'] = ticker_info['industry']
    except KeyError:
        company_info.loc[index, 'industry'] = np.nan
    try:
        company_info.loc[index, 'country'] = ticker_info['country']
    except KeyError:
        company_info.loc[index, 'country'] = np.nan
    try:
        company_info.loc[index, 'growth_rate'] = ticker_info['revenueGrowth']
    except KeyError:
        company_info.loc[index, 'growth_rate'] = np.nan
    try:
        company_info.loc[index, 'debtToEquity'] = ticker_info['debtToEquity']
    except KeyError:
        company_info.loc[index, 'debtToEquity'] = np.nan
    try:
        company_info.loc[index, 'currentRatio'] = ticker_info['currentRatio']
    except KeyError:
        company_info.loc[index, 'currentRatio'] = np.nan
    try:
        company_info.loc[index, 'forwardPE'] = ticker_info['forwardPE']
    except KeyError:
        company_info.loc[index, 'forwardPE'] = np.nan
    try:
        company_growth.loc[index, 'revenue_growth'] = ticker_info['revenueGrowth']
    except KeyError:
        company_growth.loc[index, 'revenue_growth'] = np.nan

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
27

In [7]:
# Merge company info onto stock_data DataFrame
# stock_data_ml_df = pd.merge(stock_data, company_info, how='left', left_on='Name', right_on='TCKR')

In [8]:
# Raw historical stock dataset 
stock_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 671674 entries, 0 to 671673
Data columns (total 8 columns):
 #   Column     Non-Null Count   Dtype         
---  ------     --------------   -----         
 0   Date       671674 non-null  datetime64[ns]
 1   Open       671674 non-null  float64       
 2   High       671674 non-null  float64       
 3   Low        671674 non-null  float64       
 4   Close      671674 non-null  float64       
 5   Adj Close  671674 non-null  float64       
 6   Volume     671674 non-null  float64       
 7   TCKR       671674 non-null  object        
dtypes: datetime64[ns](1), float64(6), object(1)
memory usage: 41.0+ MB


In [9]:
# Raw qualitative current data pertaining to each IPO stock
company_info.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1575 entries, 0 to 1574
Data columns (total 8 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   TCKR          1575 non-null   object 
 1   sector        1554 non-null   object 
 2   industry      1554 non-null   object 
 3   country       1554 non-null   object 
 4   growth_rate   662 non-null    float64
 5   debtToEquity  761 non-null    float64
 6   currentRatio  1498 non-null   float64
 7   forwardPE     705 non-null    float64
dtypes: float64(4), object(4)
memory usage: 98.6+ KB


In [10]:
# Drop null values from final dataset
# stock_data_ml_df.dropna(inplace=True)

# Drop duplicate/unnecessary columns
# stock_data_ml_df.drop(columns='Name', inplace=True)

In [11]:
# View sample of DataFrame
stock_data.sample(10)

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,TCKR
426906,2022-04-27,6.4,6.56,6.01,6.23,6.23,1495500.0,COOK
9452,2022-03-10,0.43,0.43,0.38,0.38,0.38,207100.0,YAYO
181839,2021-07-19,24.0,25.290001,23.52,24.66,24.66,264800.0,OLMA
242280,2022-09-02,41.970001,42.75,40.326,40.790001,40.790001,5382900.0,U
194775,2022-01-24,9.8,9.81,9.79,9.8,9.8,82100.0,BOAC
237535,2022-10-17,1.59,1.62,1.55,1.6,1.6,70000.0,TSHA
328087,2022-05-02,10.55,11.03,10.55,10.9,10.9,13900.0,VEL
456342,2021-07-29,17.98,18.42,17.459999,17.879999,17.879999,299200.0,IAS
519868,2021-08-13,25.950001,29.200001,24.75,28.23,28.23,7697800.0,CRCT
227856,2022-09-14,9.33,9.78,9.16,9.69,9.69,130232.0,IMPX


In [12]:
# View sample of DataFrame
company_info.sample(10)

Unnamed: 0,TCKR,sector,industry,country,growth_rate,debtToEquity,currentRatio,forwardPE
825,EWCZ,Consumer Defensive,Household & Personal Products,United States,0.114,283.991,2.57,28.788464
365,NTST,Real Estate,REIT—Retail,United States,0.603,40.724,1.859,79.347824
546,MNTN,Financial Services,Shell Companies,United States,,,1.087,
828,AMCI,Financial Services,Shell Companies,United States,,,0.106,
711,GIAC,Financial Services,Shell Companies,Israel,,,,
0,MKDTY,Basic Materials,Chemicals,China,-0.671,,0.928,-0.002444
1326,BLUA,Financial Services,Shell Companies,United States,,,0.262,
127,WAFU,Consumer Defensive,Education & Training Services,China,0.346,3.814,2.706,
409,PANA,Financial Services,Shell Companies,United States,,,1.571,
543,CNGL,Financial Services,Shell Companies,United States,,,2.557,


In [13]:
# Create engine to connect to Postgres
from config import db_password
db_string = f"postgresql://postgres:{db_password}@launch-it-1.cyo6pvehqvyz.us-east-1.rds.amazonaws.com:5432/launch-it-1"
engine = create_engine(db_string)

In [14]:
# Send cleaned data to the database
stock_data.to_sql(name="launchit", con=engine, if_exists='replace', index=False)
company_info.to_sql(name="launchit_info", con=engine, if_exists='replace', index=False)
company_growth.to_sql(name="launchit_growth", con=engine, if_exists='replace', index=False)

In [15]:
# stock_data_ml_df.info()