In [1]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import OneHotEncoder
import pandas as pd
import tensorflow as tf
import psycopg2
import numpy as np

pd.set_option('display.max_columns',85)

# Import data from Database

In [2]:
conn = psycopg2.connect("dbname=ipo-database user=postgres password=0nlineSQL")

In [3]:
master_df = pd.read_sql_query("SELECT * from master_data;", conn)
master_df.head()

Unnamed: 0,symbol,trade_date,issuer,lead_jointlead_managers,offer_price,opening_price,firstday_close,firstday_percent_pxchng,dollar_change_opening,dollar_change_close,star_ratings,performed,asset_type,company_name,exchange,currency,country,sector,industry,address,three_mth_date,three_mth_ipo,price_change,three_mth_return,price_gain_loss,fiscaldate_end,reported_currency,gross_profit,total_revenue,costof_revenue,costof_goodservices_sold,operating_income,selling_gen_admin,research_dev,operating_expenses,investment_income_net,net_interest_income,interest_income,interest_expense,noninterest_income,othernon_operatingincome,depreciation,...,deferred_revenue,current_debt,shortterm_debt,total_noncurrent_liabilities,capital_lease_obligations,longterm_debt,current_longterm_debt,longterm_debt_noncurrent,short_longterm_debt_total,other_current_liabilities,other_noncurrent_liabilities,total_shareholder_equity,treasury_stock,retained_earnings,common_stock,common_stockshares_outstanding,operating_cashflow,payments_foroperating_activities,proceeds_fromoperating_activities,changein_operating_liabilities,changein_operating_assets,dpr_depletion_amort,capital_expenditures,changein_receivables,changein_inventory,profit_loss,cashflow_frominvestment,cashflow_fromfinancing,proceeds_fromrepayment_shortterm_debt,payments_forrepurchase_commonstock,payments_forrepurchase_equity,payments_forrepurchase_preferredstock,div_payout,div_payout_commonstock,div_payout_preferredstock,proceeds_fromissuance_commonstock,proceeds_fromissuance_longterm_debtcapital_secnet,proceeds_fromissuance_preferredstock,proceeds_fromrepurchase_equity,proceeds_fromsale_treasurystock,changein_cash_cashequivalents,changein_exchangerate
0,BSN.U,2020-09-11,Broadstone Acquisition,Citigroup,$10.00,$9.84,$9.60,-0.04,-$0.16,-$0.40,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,LEAP.U,2020-09-11,Ribbit LEAP,JPMorgan,$10.00,$11.20,$11.55,0.16,$1.20,$1.55,1.0,Missed,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,SNPR.U,2020-09-11,Tortoise Acquisition Corp. II,Barclays/ Goldman Sachs,$10.00,$10.35,$10.51,0.05,$0.35,$0.51,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,TWCTU,2020-09-11,TWC Tech Holdings II,Citigroup/ Deutsche Bank Securities,$10.00,$10.08,$10.07,0.01,$0.08,$0.07,1.0,,Common Stock,TWC Tech Holdings II Corp,NASDAQ,USD,USA,,,"Four Embarcadero Center, San Francisco, CA, Un...",2020-12-11,10.52,0.44,0.043651,Gain,,,,,,,,,,,,,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,242.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,
4,NSH.U,2020-09-10,NavSight Holdings,Credit Suisse,$10.00,$10.00,$10.00,0.0,$0.00,$0.00,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [4]:
list(master_df)

['symbol',
 'trade_date',
 'issuer',
 'lead_jointlead_managers',
 'offer_price',
 'opening_price',
 'firstday_close',
 'firstday_percent_pxchng',
 'dollar_change_opening',
 'dollar_change_close',
 'star_ratings',
 'performed',
 'asset_type',
 'company_name',
 'exchange',
 'currency',
 'country',
 'sector',
 'industry',
 'address',
 'three_mth_date',
 'three_mth_ipo',
 'price_change',
 'three_mth_return',
 'price_gain_loss',
 'fiscaldate_end',
 'reported_currency',
 'gross_profit',
 'total_revenue',
 'costof_revenue',
 'costof_goodservices_sold',
 'operating_income',
 'selling_gen_admin',
 'research_dev',
 'operating_expenses',
 'investment_income_net',
 'net_interest_income',
 'interest_income',
 'interest_expense',
 'noninterest_income',
 'othernon_operatingincome',
 'depreciation',
 'dpr_and_amort',
 'income_beforetax',
 'incometax_expense',
 'interest_debt_expense',
 'netincome_cont_operations',
 'comprehensive_income_netoftax',
 'ebit',
 'ebitda',
 'netincome',
 'total_assets',
 't

## Calculated Columns

In [5]:
# Net profit margin
master_df["net_profit_margin"] = master_df["netincome"]/master_df["total_revenue"]
master_df["net_profit_margin"]

0      NaN
1      NaN
2      NaN
3      NaN
4      NaN
        ..
3465   NaN
3466   NaN
3467   NaN
3468   NaN
3469   NaN
Name: net_profit_margin, Length: 3470, dtype: float64

In [6]:
# Calculate Gross Profit Margin = gross_profit/total_revenue
master_df["gross_profit_margin"] = master_df["gross_profit"]/master_df["total_revenue"]
master_df["gross_profit_margin"]

0      NaN
1      NaN
2      NaN
3      NaN
4      NaN
        ..
3465   NaN
3466   NaN
3467   NaN
3468   NaN
3469   NaN
Name: gross_profit_margin, Length: 3470, dtype: float64

### Calculate Debt Asset Ratio:

The formula for the debt to asset ratio is as follows: Debt/Asset = (Short-term Debt + Long-term Debt) / Total Assets. Where: Total Assets may include all current and non-current assets on the company's balance sheet, or may only include certain assets such as Property, Plant & Equipment (PP&E)

In [7]:
# Debt to Asset Ratio
master_df["debt_asset_ratio"] = master_df["short_longterm_debt_total"]/master_df["total_assets"]
master_df

Unnamed: 0,symbol,trade_date,issuer,lead_jointlead_managers,offer_price,opening_price,firstday_close,firstday_percent_pxchng,dollar_change_opening,dollar_change_close,star_ratings,performed,asset_type,company_name,exchange,currency,country,sector,industry,address,three_mth_date,three_mth_ipo,price_change,three_mth_return,price_gain_loss,fiscaldate_end,reported_currency,gross_profit,total_revenue,costof_revenue,costof_goodservices_sold,operating_income,selling_gen_admin,research_dev,operating_expenses,investment_income_net,net_interest_income,interest_income,interest_expense,noninterest_income,othernon_operatingincome,depreciation,...,total_noncurrent_liabilities,capital_lease_obligations,longterm_debt,current_longterm_debt,longterm_debt_noncurrent,short_longterm_debt_total,other_current_liabilities,other_noncurrent_liabilities,total_shareholder_equity,treasury_stock,retained_earnings,common_stock,common_stockshares_outstanding,operating_cashflow,payments_foroperating_activities,proceeds_fromoperating_activities,changein_operating_liabilities,changein_operating_assets,dpr_depletion_amort,capital_expenditures,changein_receivables,changein_inventory,profit_loss,cashflow_frominvestment,cashflow_fromfinancing,proceeds_fromrepayment_shortterm_debt,payments_forrepurchase_commonstock,payments_forrepurchase_equity,payments_forrepurchase_preferredstock,div_payout,div_payout_commonstock,div_payout_preferredstock,proceeds_fromissuance_commonstock,proceeds_fromissuance_longterm_debtcapital_secnet,proceeds_fromissuance_preferredstock,proceeds_fromrepurchase_equity,proceeds_fromsale_treasurystock,changein_cash_cashequivalents,changein_exchangerate,net_profit_margin,gross_profit_margin,debt_asset_ratio
0,BSN.U,2020-09-11,Broadstone Acquisition,Citigroup,$10.00,$9.84,$9.60,-0.04,-$0.16,-$0.40,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,LEAP.U,2020-09-11,Ribbit LEAP,JPMorgan,$10.00,$11.20,$11.55,0.16,$1.20,$1.55,1.0,Missed,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,SNPR.U,2020-09-11,Tortoise Acquisition Corp. II,Barclays/ Goldman Sachs,$10.00,$10.35,$10.51,0.05,$0.35,$0.51,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,TWCTU,2020-09-11,TWC Tech Holdings II,Citigroup/ Deutsche Bank Securities,$10.00,$10.08,$10.07,0.01,$0.08,$0.07,1.0,,Common Stock,TWC Tech Holdings II Corp,NASDAQ,USD,USA,,,"Four Embarcadero Center, San Francisco, CA, Un...",2020-12-11,10.52,0.44,0.043651,Gain,,,,,,,,,,,,,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,242.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0
4,NSH.U,2020-09-10,NavSight Holdings,Credit Suisse,$10.00,$10.00,$10.00,0.00,$0.00,$0.00,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3465,UNWR,2000-05-18,US Unwired,Donaldson Lufkin & Jenrette,$11.00,$11.03,$11.06,0.01,$0.03,$0.06,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3466,NGTC,2000-05-18,Nogatech,W.R. Hambrecht+Co,$12.00,$11.88,$9.41,-0.22,-$0.13,-$2.59,1.0,,Common Stock,Nogatech Inc,NASDAQ,USD,USA,Other,Other,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3467,IBEM,2000-05-18,iBEAM Broadcasting,Morgan Stanley Dean Witter,$10.00,$11.50,$14.00,0.40,$1.50,$4.00,2.0,,Common Stock,Ibeam Broadcasting Corp,NASDAQ,USD,USA,Other,Other,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3468,NUFO,2000-05-18,New Focus,Credit Suisse First Boston,$20.00,$40.00,$51.00,1.55,$20.00,$31.00,3.0,,Common Stock,New Focus Inc,NASDAQ,USD,USA,Other,Other,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


### Calculate Current Ratio:

This is a financial KPI that measure the company's ability to pay off its short term financial obligations in one year.

The formula for Current Ratio is Total Current Assets / Total Current Liabilities.

In [8]:
# Current Ratio
master_df["current_ratio"] = master_df["total_current_assets"]/master_df["total_current_liabilities"]
master_df

Unnamed: 0,symbol,trade_date,issuer,lead_jointlead_managers,offer_price,opening_price,firstday_close,firstday_percent_pxchng,dollar_change_opening,dollar_change_close,star_ratings,performed,asset_type,company_name,exchange,currency,country,sector,industry,address,three_mth_date,three_mth_ipo,price_change,three_mth_return,price_gain_loss,fiscaldate_end,reported_currency,gross_profit,total_revenue,costof_revenue,costof_goodservices_sold,operating_income,selling_gen_admin,research_dev,operating_expenses,investment_income_net,net_interest_income,interest_income,interest_expense,noninterest_income,othernon_operatingincome,depreciation,...,capital_lease_obligations,longterm_debt,current_longterm_debt,longterm_debt_noncurrent,short_longterm_debt_total,other_current_liabilities,other_noncurrent_liabilities,total_shareholder_equity,treasury_stock,retained_earnings,common_stock,common_stockshares_outstanding,operating_cashflow,payments_foroperating_activities,proceeds_fromoperating_activities,changein_operating_liabilities,changein_operating_assets,dpr_depletion_amort,capital_expenditures,changein_receivables,changein_inventory,profit_loss,cashflow_frominvestment,cashflow_fromfinancing,proceeds_fromrepayment_shortterm_debt,payments_forrepurchase_commonstock,payments_forrepurchase_equity,payments_forrepurchase_preferredstock,div_payout,div_payout_commonstock,div_payout_preferredstock,proceeds_fromissuance_commonstock,proceeds_fromissuance_longterm_debtcapital_secnet,proceeds_fromissuance_preferredstock,proceeds_fromrepurchase_equity,proceeds_fromsale_treasurystock,changein_cash_cashequivalents,changein_exchangerate,net_profit_margin,gross_profit_margin,debt_asset_ratio,current_ratio
0,BSN.U,2020-09-11,Broadstone Acquisition,Citigroup,$10.00,$9.84,$9.60,-0.04,-$0.16,-$0.40,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,LEAP.U,2020-09-11,Ribbit LEAP,JPMorgan,$10.00,$11.20,$11.55,0.16,$1.20,$1.55,1.0,Missed,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,SNPR.U,2020-09-11,Tortoise Acquisition Corp. II,Barclays/ Goldman Sachs,$10.00,$10.35,$10.51,0.05,$0.35,$0.51,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,TWCTU,2020-09-11,TWC Tech Holdings II,Citigroup/ Deutsche Bank Securities,$10.00,$10.08,$10.07,0.01,$0.08,$0.07,1.0,,Common Stock,TWC Tech Holdings II Corp,NASDAQ,USD,USA,,,"Four Embarcadero Center, San Francisco, CA, Un...",2020-12-11,10.52,0.44,0.043651,Gain,,,,,,,,,,,,,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,242.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,4.138819
4,NSH.U,2020-09-10,NavSight Holdings,Credit Suisse,$10.00,$10.00,$10.00,0.00,$0.00,$0.00,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3465,UNWR,2000-05-18,US Unwired,Donaldson Lufkin & Jenrette,$11.00,$11.03,$11.06,0.01,$0.03,$0.06,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3466,NGTC,2000-05-18,Nogatech,W.R. Hambrecht+Co,$12.00,$11.88,$9.41,-0.22,-$0.13,-$2.59,1.0,,Common Stock,Nogatech Inc,NASDAQ,USD,USA,Other,Other,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3467,IBEM,2000-05-18,iBEAM Broadcasting,Morgan Stanley Dean Witter,$10.00,$11.50,$14.00,0.40,$1.50,$4.00,2.0,,Common Stock,Ibeam Broadcasting Corp,NASDAQ,USD,USA,Other,Other,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3468,NUFO,2000-05-18,New Focus,Credit Suisse First Boston,$20.00,$40.00,$51.00,1.55,$20.00,$31.00,3.0,,Common Stock,New Focus Inc,NASDAQ,USD,USA,Other,Other,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


### Drop rows with N/As and infinite values

In [9]:
master_df = master_df.dropna(subset=["gross_profit_margin","debt_asset_ratio","current_ratio"])
master_df

Unnamed: 0,symbol,trade_date,issuer,lead_jointlead_managers,offer_price,opening_price,firstday_close,firstday_percent_pxchng,dollar_change_opening,dollar_change_close,star_ratings,performed,asset_type,company_name,exchange,currency,country,sector,industry,address,three_mth_date,three_mth_ipo,price_change,three_mth_return,price_gain_loss,fiscaldate_end,reported_currency,gross_profit,total_revenue,costof_revenue,costof_goodservices_sold,operating_income,selling_gen_admin,research_dev,operating_expenses,investment_income_net,net_interest_income,interest_income,interest_expense,noninterest_income,othernon_operatingincome,depreciation,...,capital_lease_obligations,longterm_debt,current_longterm_debt,longterm_debt_noncurrent,short_longterm_debt_total,other_current_liabilities,other_noncurrent_liabilities,total_shareholder_equity,treasury_stock,retained_earnings,common_stock,common_stockshares_outstanding,operating_cashflow,payments_foroperating_activities,proceeds_fromoperating_activities,changein_operating_liabilities,changein_operating_assets,dpr_depletion_amort,capital_expenditures,changein_receivables,changein_inventory,profit_loss,cashflow_frominvestment,cashflow_fromfinancing,proceeds_fromrepayment_shortterm_debt,payments_forrepurchase_commonstock,payments_forrepurchase_equity,payments_forrepurchase_preferredstock,div_payout,div_payout_commonstock,div_payout_preferredstock,proceeds_fromissuance_commonstock,proceeds_fromissuance_longterm_debtcapital_secnet,proceeds_fromissuance_preferredstock,proceeds_fromrepurchase_equity,proceeds_fromsale_treasurystock,changein_cash_cashequivalents,changein_exchangerate,net_profit_margin,gross_profit_margin,debt_asset_ratio,current_ratio
15,AUVI,2020-08-31,Applied UV,"Network 1 Financial Securities,",$5.00,$5.75,$11.60,1.32,$0.75,$6.60,0.0,,Common Stock,"Applied UV, Inc",NASDAQ,USD,USA,Consumer Cyclical,"Furnishings, Fixtures & Appliances","150 N. Macquesten Parkway, Mount Vernon, NY, U...",2020-11-30,5.18,-0.57,-0.099130,Loss,2020-09-30,USD,7.817800e+04,1.560633e+06,1.482455e+06,1.482455e+06,-8.949670e+05,9.251080e+05,48037.0,2.455600e+06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,5.325850e+05,3.175300e+05,9.929700e+04,0.0,416827.0,8.537970e+05,0.0,5.445513e+06,0.0,1.745910e+05,633.0,6.334308e+06,-4.298970e+05,0.0,0.0,0.0,0.0,9182.0,55814.0,0.0,-45156.0,0.0,-5.581400e+04,4927529.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.573314,0.050094,0.053702,4.352835
16,HCDI,2020-08-28,Harbor Custom Development,ThinkEquity (a division of Fordham Financial M...,$6.00,$5.50,$7.50,0.25,-$0.50,$1.50,1.0,,Common Stock,"Harbor Custom Development, Inc",NASDAQ,USD,USA,Real Estate,Real Estate-Development,"11505 Burnham Drive, Gig Harbor, WA, United St...",,,,,,2020-09-30,USD,6.226000e+05,7.806500e+06,7.183900e+06,7.183900e+06,-8.356000e+05,0.000000e+00,0.0,8.642100e+06,0.0,-163900.0,0.0,163900.0,0.0,0.0,0.0,...,2.858600e+06,3.774400e+06,2.625700e+07,0.0,30031400.0,0.000000e+00,0.0,9.512600e+06,0.0,-2.574500e+06,11957000.0,5.628048e+06,-6.957600e+06,0.0,0.0,0.0,0.0,199400.0,79300.0,0.0,0.0,0.0,1.057000e+05,8871000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.055992,0.079754,0.658664,0.091950
19,XPEV,2020-08-27,XPeng,Credit Suisse/ J.P. Morgan/ BofA Securities,$15.00,$23.10,$21.22,0.41,$8.10,$6.22,3.0,,Common Stock,XPeng Inc,NYSE,USD,USA,Consumer Cyclical,Auto Manufacturers,"No. 8 Songgang Road, Guangzhou, China, 510640",2020-11-27,64.28,41.18,1.782684,Gain,2020-09-30,CNY,9.154100e+07,1.990119e+09,1.898578e+09,1.898578e+09,-1.744184e+09,1.203792e+09,635373000.0,3.734303e+09,0.0,19290000.0,23216000.0,3926000.0,0.0,0.0,0.0,...,4.092390e+08,1.667490e+09,2.804000e+08,0.0,0.0,1.387595e+09,56844000.0,1.972801e+10,0.0,-1.053500e+10,94000.0,1.426361e+09,7.472520e+08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-5.924452e+09,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.577232,0.045998,0.000000,3.921096
39,BEKE,2020-08-13,KE Holdings,Goldman Sachs/ Morgan Stanley/ China Renaissance,$20.00,$35.06,$37.44,0.87,$15.06,$17.44,3.0,,Common Stock,KE Holdings Inc,NYSE,USD,USA,Real Estate,Real Estate Services,"Building Fudao, Beijing, China, 100085",2020-11-13,72.53,37.47,1.068739,Gain,2020-06-30,CNY,4.383230e+09,2.054892e+10,1.616568e+10,1.616568e+10,3.435050e+09,3.675157e+09,523670000.0,1.670661e+10,0.0,58486000.0,105005000.0,39926000.0,0.0,0.0,0.0,...,5.367205e+09,4.885370e+09,2.769275e+09,0.0,0.0,1.103751e+10,93757000.0,3.376086e+10,0.0,-1.000450e+10,205000.0,3.382438e+09,9.133566e+09,0.0,0.0,0.0,0.0,272184000.0,148186000.0,0.0,0.0,0.0,2.751829e+09,329069000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.138040,0.213307,0.000000,1.886462
46,IBEX,2020-08-07,IBEX Ltd.,Citigroup/ RBC Capital Markets/ Baird,$19.00,$18.00,$15.40,-0.19,-$1.00,-$3.60,1.0,,Common Stock,IBEX Limited,NASDAQ,USD,USA,Technology,Software-Application,"1700 Pennsylvania Avenue NW, Washington, DC, U...",,,,,,2020-06-30,USD,3.187100e+07,1.008800e+08,6.900900e+07,6.900900e+07,2.303000e+06,1.531300e+07,0.0,8.019000e+06,0.0,-2238000.0,0.0,-4737000.0,0.0,0.0,0.0,...,7.471200e+07,3.782000e+06,2.747600e+07,0.0,31258000.0,1.220000e+07,2081000.0,1.614800e+07,0.0,-1.095270e+08,12000.0,1.752083e+07,1.806600e+07,0.0,0.0,0.0,0.0,6012000.0,761000.0,0.0,0.0,0.0,-6.400000e+05,-11045000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.037718,0.315930,0.160104,0.814326
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2629,ACLI,2005-10-06,American Commercial Lines,Merrill Lynch/UBS Investment Bank,$21.00,$26.50,$28.30,0.35,$5.50,$7.30,1.0,Missed,Common Stock,AMERICAN COMMERCIAL LINES INC.,NASDAQ,USD,USA,Other,Other,"1701 East Market Street\r\n47130,Jeffersonvill...",,,,,,2005-09-30,USD,0.000000e+00,1.672970e+08,1.334700e+08,1.334700e+08,1.114900e+07,1.139300e+07,0.0,0.000000e+00,0.0,0.0,0.0,7869000.0,0.0,0.0,0.0,...,0.000000e+00,3.906450e+08,0.000000e+00,0.0,0.0,8.482400e+07,0.0,1.053050e+08,0.0,3.211000e+06,232000.0,0.000000e+00,1.035700e+07,0.0,0.0,0.0,0.0,0.0,26129000.0,0.0,0.0,0.0,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020957,0.000000,0.000000,0.000000
2722,EVVV,2005-06-15,ev3,Piper Jaffray/Banc of America,$14.00,$13.50,$14.15,0.01,-$0.50,$0.15,1.0,,Common Stock,ev3 Inc,NASDAQ,USD,USA,Other,Other,"3033 Campus Drive, Suite 100\r\n55441,Plymouth...",2005-09-15,19.91,6.41,0.474815,Gain,2005-06-30,USD,0.000000e+00,3.154000e+07,1.161600e+07,1.161600e+07,0.000000e+00,4.418800e+07,12759000.0,0.000000e+00,0.0,0.0,0.0,6346000.0,0.0,0.0,0.0,...,0.000000e+00,0.000000e+00,0.000000e+00,0.0,0.0,2.547600e+07,0.0,2.812520e+08,0.0,0.000000e+00,489000.0,0.000000e+00,0.000000e+00,0.0,0.0,0.0,0.0,0.0,6119000.0,0.0,0.0,0.0,0.000000e+00,202700000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000
2907,SRVY,2004-07-15,Greenfield Online,Lehman Brothers,$13.00,$16.00,$18.70,0.44,$3.00,$5.70,2.0,,Common Stock,GREENFIELD ONLINE INC,NASDAQ,USD,USA,Other,Other,"21 River Road\r\n06897,Wilton,USA\r\n",2004-10-15,23.01,7.01,0.438125,Gain,2004-06-30,USD,0.000000e+00,1.022900e+07,2.180000e+06,2.180000e+06,1.946000e+06,5.858000e+06,265000.0,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000e+00,1.369000e+06,0.000000e+00,0.0,0.0,5.266000e+06,0.0,6.647000e+06,-131000.0,0.000000e+00,0.0,0.000000e+00,1.390000e+05,0.0,0.0,0.0,0.0,0.0,860000.0,0.0,0.0,0.0,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.168834,0.000000,0.000000,0.000000
2932,LDIS,2004-06-15,Leadis Technology,Goldman Sachs/Merrill Lynch,$14.00,$14.27,$13.10,-0.06,$0.27,-$0.90,3.0,Missed,Common Stock,Leadis Technology Inc,NASDAQ,USD,USA,Other,Other,"PO Box 61657\r\n94088,Sunnyvale,USA\r\n",2004-09-15,11.87,-2.40,-0.168185,Loss,2004-06-30,USD,0.000000e+00,4.132500e+07,2.596900e+07,2.596900e+07,1.013000e+07,5.134000e+06,3083000.0,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000e+00,0.000000e+00,0.000000e+00,0.0,0.0,6.346000e+06,0.0,1.178650e+08,0.0,2.157700e+07,101944000.0,0.000000e+00,1.613000e+06,0.0,0.0,0.0,0.0,0.0,188000.0,0.0,0.0,0.0,1.312000e+06,76776000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.150321,0.000000,0.000000,0.000000


In [10]:
# Remove infinity values from calculated columns dividing by 0
master_df = master_df.replace([-np.inf, np.inf], np.nan)
master_df

Unnamed: 0,symbol,trade_date,issuer,lead_jointlead_managers,offer_price,opening_price,firstday_close,firstday_percent_pxchng,dollar_change_opening,dollar_change_close,star_ratings,performed,asset_type,company_name,exchange,currency,country,sector,industry,address,three_mth_date,three_mth_ipo,price_change,three_mth_return,price_gain_loss,fiscaldate_end,reported_currency,gross_profit,total_revenue,costof_revenue,costof_goodservices_sold,operating_income,selling_gen_admin,research_dev,operating_expenses,investment_income_net,net_interest_income,interest_income,interest_expense,noninterest_income,othernon_operatingincome,depreciation,...,capital_lease_obligations,longterm_debt,current_longterm_debt,longterm_debt_noncurrent,short_longterm_debt_total,other_current_liabilities,other_noncurrent_liabilities,total_shareholder_equity,treasury_stock,retained_earnings,common_stock,common_stockshares_outstanding,operating_cashflow,payments_foroperating_activities,proceeds_fromoperating_activities,changein_operating_liabilities,changein_operating_assets,dpr_depletion_amort,capital_expenditures,changein_receivables,changein_inventory,profit_loss,cashflow_frominvestment,cashflow_fromfinancing,proceeds_fromrepayment_shortterm_debt,payments_forrepurchase_commonstock,payments_forrepurchase_equity,payments_forrepurchase_preferredstock,div_payout,div_payout_commonstock,div_payout_preferredstock,proceeds_fromissuance_commonstock,proceeds_fromissuance_longterm_debtcapital_secnet,proceeds_fromissuance_preferredstock,proceeds_fromrepurchase_equity,proceeds_fromsale_treasurystock,changein_cash_cashequivalents,changein_exchangerate,net_profit_margin,gross_profit_margin,debt_asset_ratio,current_ratio
15,AUVI,2020-08-31,Applied UV,"Network 1 Financial Securities,",$5.00,$5.75,$11.60,1.32,$0.75,$6.60,0.0,,Common Stock,"Applied UV, Inc",NASDAQ,USD,USA,Consumer Cyclical,"Furnishings, Fixtures & Appliances","150 N. Macquesten Parkway, Mount Vernon, NY, U...",2020-11-30,5.18,-0.57,-0.099130,Loss,2020-09-30,USD,7.817800e+04,1.560633e+06,1.482455e+06,1.482455e+06,-8.949670e+05,9.251080e+05,48037.0,2.455600e+06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,5.325850e+05,3.175300e+05,9.929700e+04,0.0,416827.0,8.537970e+05,0.0,5.445513e+06,0.0,1.745910e+05,633.0,6.334308e+06,-4.298970e+05,0.0,0.0,0.0,0.0,9182.0,55814.0,0.0,-45156.0,0.0,-5.581400e+04,4927529.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.573314,0.050094,0.053702,4.352835
16,HCDI,2020-08-28,Harbor Custom Development,ThinkEquity (a division of Fordham Financial M...,$6.00,$5.50,$7.50,0.25,-$0.50,$1.50,1.0,,Common Stock,"Harbor Custom Development, Inc",NASDAQ,USD,USA,Real Estate,Real Estate-Development,"11505 Burnham Drive, Gig Harbor, WA, United St...",,,,,,2020-09-30,USD,6.226000e+05,7.806500e+06,7.183900e+06,7.183900e+06,-8.356000e+05,0.000000e+00,0.0,8.642100e+06,0.0,-163900.0,0.0,163900.0,0.0,0.0,0.0,...,2.858600e+06,3.774400e+06,2.625700e+07,0.0,30031400.0,0.000000e+00,0.0,9.512600e+06,0.0,-2.574500e+06,11957000.0,5.628048e+06,-6.957600e+06,0.0,0.0,0.0,0.0,199400.0,79300.0,0.0,0.0,0.0,1.057000e+05,8871000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.055992,0.079754,0.658664,0.091950
19,XPEV,2020-08-27,XPeng,Credit Suisse/ J.P. Morgan/ BofA Securities,$15.00,$23.10,$21.22,0.41,$8.10,$6.22,3.0,,Common Stock,XPeng Inc,NYSE,USD,USA,Consumer Cyclical,Auto Manufacturers,"No. 8 Songgang Road, Guangzhou, China, 510640",2020-11-27,64.28,41.18,1.782684,Gain,2020-09-30,CNY,9.154100e+07,1.990119e+09,1.898578e+09,1.898578e+09,-1.744184e+09,1.203792e+09,635373000.0,3.734303e+09,0.0,19290000.0,23216000.0,3926000.0,0.0,0.0,0.0,...,4.092390e+08,1.667490e+09,2.804000e+08,0.0,0.0,1.387595e+09,56844000.0,1.972801e+10,0.0,-1.053500e+10,94000.0,1.426361e+09,7.472520e+08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-5.924452e+09,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.577232,0.045998,0.000000,3.921096
39,BEKE,2020-08-13,KE Holdings,Goldman Sachs/ Morgan Stanley/ China Renaissance,$20.00,$35.06,$37.44,0.87,$15.06,$17.44,3.0,,Common Stock,KE Holdings Inc,NYSE,USD,USA,Real Estate,Real Estate Services,"Building Fudao, Beijing, China, 100085",2020-11-13,72.53,37.47,1.068739,Gain,2020-06-30,CNY,4.383230e+09,2.054892e+10,1.616568e+10,1.616568e+10,3.435050e+09,3.675157e+09,523670000.0,1.670661e+10,0.0,58486000.0,105005000.0,39926000.0,0.0,0.0,0.0,...,5.367205e+09,4.885370e+09,2.769275e+09,0.0,0.0,1.103751e+10,93757000.0,3.376086e+10,0.0,-1.000450e+10,205000.0,3.382438e+09,9.133566e+09,0.0,0.0,0.0,0.0,272184000.0,148186000.0,0.0,0.0,0.0,2.751829e+09,329069000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.138040,0.213307,0.000000,1.886462
46,IBEX,2020-08-07,IBEX Ltd.,Citigroup/ RBC Capital Markets/ Baird,$19.00,$18.00,$15.40,-0.19,-$1.00,-$3.60,1.0,,Common Stock,IBEX Limited,NASDAQ,USD,USA,Technology,Software-Application,"1700 Pennsylvania Avenue NW, Washington, DC, U...",,,,,,2020-06-30,USD,3.187100e+07,1.008800e+08,6.900900e+07,6.900900e+07,2.303000e+06,1.531300e+07,0.0,8.019000e+06,0.0,-2238000.0,0.0,-4737000.0,0.0,0.0,0.0,...,7.471200e+07,3.782000e+06,2.747600e+07,0.0,31258000.0,1.220000e+07,2081000.0,1.614800e+07,0.0,-1.095270e+08,12000.0,1.752083e+07,1.806600e+07,0.0,0.0,0.0,0.0,6012000.0,761000.0,0.0,0.0,0.0,-6.400000e+05,-11045000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.037718,0.315930,0.160104,0.814326
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2629,ACLI,2005-10-06,American Commercial Lines,Merrill Lynch/UBS Investment Bank,$21.00,$26.50,$28.30,0.35,$5.50,$7.30,1.0,Missed,Common Stock,AMERICAN COMMERCIAL LINES INC.,NASDAQ,USD,USA,Other,Other,"1701 East Market Street\r\n47130,Jeffersonvill...",,,,,,2005-09-30,USD,0.000000e+00,1.672970e+08,1.334700e+08,1.334700e+08,1.114900e+07,1.139300e+07,0.0,0.000000e+00,0.0,0.0,0.0,7869000.0,0.0,0.0,0.0,...,0.000000e+00,3.906450e+08,0.000000e+00,0.0,0.0,8.482400e+07,0.0,1.053050e+08,0.0,3.211000e+06,232000.0,0.000000e+00,1.035700e+07,0.0,0.0,0.0,0.0,0.0,26129000.0,0.0,0.0,0.0,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020957,0.000000,0.000000,0.000000
2722,EVVV,2005-06-15,ev3,Piper Jaffray/Banc of America,$14.00,$13.50,$14.15,0.01,-$0.50,$0.15,1.0,,Common Stock,ev3 Inc,NASDAQ,USD,USA,Other,Other,"3033 Campus Drive, Suite 100\r\n55441,Plymouth...",2005-09-15,19.91,6.41,0.474815,Gain,2005-06-30,USD,0.000000e+00,3.154000e+07,1.161600e+07,1.161600e+07,0.000000e+00,4.418800e+07,12759000.0,0.000000e+00,0.0,0.0,0.0,6346000.0,0.0,0.0,0.0,...,0.000000e+00,0.000000e+00,0.000000e+00,0.0,0.0,2.547600e+07,0.0,2.812520e+08,0.0,0.000000e+00,489000.0,0.000000e+00,0.000000e+00,0.0,0.0,0.0,0.0,0.0,6119000.0,0.0,0.0,0.0,0.000000e+00,202700000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000
2907,SRVY,2004-07-15,Greenfield Online,Lehman Brothers,$13.00,$16.00,$18.70,0.44,$3.00,$5.70,2.0,,Common Stock,GREENFIELD ONLINE INC,NASDAQ,USD,USA,Other,Other,"21 River Road\r\n06897,Wilton,USA\r\n",2004-10-15,23.01,7.01,0.438125,Gain,2004-06-30,USD,0.000000e+00,1.022900e+07,2.180000e+06,2.180000e+06,1.946000e+06,5.858000e+06,265000.0,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000e+00,1.369000e+06,0.000000e+00,0.0,0.0,5.266000e+06,0.0,6.647000e+06,-131000.0,0.000000e+00,0.0,0.000000e+00,1.390000e+05,0.0,0.0,0.0,0.0,0.0,860000.0,0.0,0.0,0.0,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.168834,0.000000,0.000000,0.000000
2932,LDIS,2004-06-15,Leadis Technology,Goldman Sachs/Merrill Lynch,$14.00,$14.27,$13.10,-0.06,$0.27,-$0.90,3.0,Missed,Common Stock,Leadis Technology Inc,NASDAQ,USD,USA,Other,Other,"PO Box 61657\r\n94088,Sunnyvale,USA\r\n",2004-09-15,11.87,-2.40,-0.168185,Loss,2004-06-30,USD,0.000000e+00,4.132500e+07,2.596900e+07,2.596900e+07,1.013000e+07,5.134000e+06,3083000.0,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000e+00,0.000000e+00,0.000000e+00,0.0,0.0,6.346000e+06,0.0,1.178650e+08,0.0,2.157700e+07,101944000.0,0.000000e+00,1.613000e+06,0.0,0.0,0.0,0.0,0.0,188000.0,0.0,0.0,0.0,1.312000e+06,76776000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.150321,0.000000,0.000000,0.000000


# Preprocess

In [11]:
IPO_df = master_df[["sector","industry","price_gain_loss","debt_asset_ratio", "gross_profit_margin","net_profit_margin","operating_cashflow", "current_ratio"]]
IPO_df

Unnamed: 0,sector,industry,price_gain_loss,debt_asset_ratio,gross_profit_margin,net_profit_margin,operating_cashflow,current_ratio
15,Consumer Cyclical,"Furnishings, Fixtures & Appliances",Loss,0.053702,0.050094,-0.573314,-4.298970e+05,4.352835
16,Real Estate,Real Estate-Development,,0.658664,0.079754,-0.055992,-6.957600e+06,0.091950
19,Consumer Cyclical,Auto Manufacturers,Gain,0.000000,0.045998,-0.577232,7.472520e+08,3.921096
39,Real Estate,Real Estate Services,Gain,0.000000,0.213307,0.138040,9.133566e+09,1.886462
46,Technology,Software-Application,,0.160104,0.315930,-0.037718,1.806600e+07,0.814326
...,...,...,...,...,...,...,...,...
2629,Other,Other,,0.000000,0.000000,0.020957,1.035700e+07,0.000000
2722,Other,Other,Gain,0.000000,0.000000,0.000000,0.000000e+00,0.000000
2907,Other,Other,Gain,0.000000,0.000000,0.168834,1.390000e+05,0.000000
2932,Other,Other,Loss,0.000000,0.000000,0.150321,1.613000e+06,0.000000


In [12]:
IPO_df.shape

(503, 8)

In [13]:
IPO_df = IPO_df.dropna()
IPO_df.shape

(296, 8)

In [14]:
IPO_df.nunique()

sector                  12
industry                67
price_gain_loss          2
debt_asset_ratio        90
gross_profit_margin    190
net_profit_margin      259
operating_cashflow     246
current_ratio          216
dtype: int64

# Encoding

In [15]:
# Binary encode
IPO_df_encoded = pd.get_dummies(IPO_df, columns=["sector","industry","price_gain_loss"])
IPO_df_encoded.head()

Unnamed: 0,debt_asset_ratio,gross_profit_margin,net_profit_margin,operating_cashflow,current_ratio,sector_Basic Materials,sector_Communication Services,sector_Consumer Cyclical,sector_Consumer Defensive,sector_Energy,sector_Financial Services,sector_Healthcare,sector_Industrials,sector_Other,sector_Real Estate,sector_Technology,sector_Utilities,industry_Advertising Agencies,industry_Aerospace & Defense,industry_Airlines,industry_Airports & Air Services,industry_Apparel Manufacturing,industry_Asset Management,industry_Auto & Truck Dealerships,industry_Auto Manufacturers,industry_Biotechnology,industry_Building Materials,industry_Building Products & Equipment,industry_Capital Markets,industry_Chemicals,industry_Communication Equipment,industry_Computer Hardware,industry_Credit Services,industry_Diagnostics & Research,industry_Discount Stores,industry_Drug Manufacturers-Specialty & Generic,industry_Education & Training Services,industry_Electrical Equipment & Parts,industry_Electronic Gaming & Multimedia,industry_Entertainment,"industry_Furnishings, Fixtures & Appliances",industry_Gambling,...,industry_Insurance Brokers,industry_Insurance-Diversified,industry_Insurance-Property & Casualty,industry_Integrated Freight & Logistics,industry_Internet Content & Information,industry_Internet Retail,industry_Lodging,industry_Medical Care Facilities,industry_Medical Devices,industry_Metal Fabrication,industry_Mortgage Finance,industry_Oil & Gas Drilling,industry_Oil & Gas Equipment & Services,industry_Oil & Gas Midstream,industry_Other,industry_Packaged Foods,industry_Packaging & Containers,industry_Pharmaceutical Retailers,industry_Pollution & Treatment Controls,industry_REIT-Diversified,industry_Real Estate Services,industry_Real Estate-Development,industry_Rental & Leasing Services,industry_Residential Construction,industry_Resorts & Casinos,industry_Restaurants,industry_Semiconductors,industry_Shell Companies,industry_Software-Application,industry_Software-Infrastructure,industry_Specialty Chemicals,industry_Specialty Industrial Machinery,industry_Specialty Retail,industry_Staffing & Employment Services,industry_Telecom Services,industry_Travel Services,industry_Utilities-Regulated Gas,industry_Utilities-Regulated Water,industry_Utilities-Renewable,industry_Waste Management,price_gain_loss_Gain,price_gain_loss_Loss
15,0.053702,0.050094,-0.573314,-429897.0,4.352835,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
19,0.0,0.045998,-0.577232,747252000.0,3.921096,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
39,0.0,0.213307,0.13804,9133566000.0,1.886462,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
51,0.165354,0.090474,-0.124847,-6337000.0,1.601221,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
96,0.0,0.536819,-0.300338,23477000.0,4.286222,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0


# Target Feature

In [16]:
y = IPO_df_encoded.price_gain_loss_Gain
X = IPO_df_encoded.drop(columns=["price_gain_loss_Gain","price_gain_loss_Loss"])

In [17]:
y

15      0
19      1
39      1
51      1
96      1
       ..
2528    0
2722    1
2907    1
2932    0
3020    1
Name: price_gain_loss_Gain, Length: 296, dtype: uint8

In [18]:
X

Unnamed: 0,debt_asset_ratio,gross_profit_margin,net_profit_margin,operating_cashflow,current_ratio,sector_Basic Materials,sector_Communication Services,sector_Consumer Cyclical,sector_Consumer Defensive,sector_Energy,sector_Financial Services,sector_Healthcare,sector_Industrials,sector_Other,sector_Real Estate,sector_Technology,sector_Utilities,industry_Advertising Agencies,industry_Aerospace & Defense,industry_Airlines,industry_Airports & Air Services,industry_Apparel Manufacturing,industry_Asset Management,industry_Auto & Truck Dealerships,industry_Auto Manufacturers,industry_Biotechnology,industry_Building Materials,industry_Building Products & Equipment,industry_Capital Markets,industry_Chemicals,industry_Communication Equipment,industry_Computer Hardware,industry_Credit Services,industry_Diagnostics & Research,industry_Discount Stores,industry_Drug Manufacturers-Specialty & Generic,industry_Education & Training Services,industry_Electrical Equipment & Parts,industry_Electronic Gaming & Multimedia,industry_Entertainment,"industry_Furnishings, Fixtures & Appliances",industry_Gambling,industry_Health Information Services,industry_Information Technology Services,industry_Insurance Brokers,industry_Insurance-Diversified,industry_Insurance-Property & Casualty,industry_Integrated Freight & Logistics,industry_Internet Content & Information,industry_Internet Retail,industry_Lodging,industry_Medical Care Facilities,industry_Medical Devices,industry_Metal Fabrication,industry_Mortgage Finance,industry_Oil & Gas Drilling,industry_Oil & Gas Equipment & Services,industry_Oil & Gas Midstream,industry_Other,industry_Packaged Foods,industry_Packaging & Containers,industry_Pharmaceutical Retailers,industry_Pollution & Treatment Controls,industry_REIT-Diversified,industry_Real Estate Services,industry_Real Estate-Development,industry_Rental & Leasing Services,industry_Residential Construction,industry_Resorts & Casinos,industry_Restaurants,industry_Semiconductors,industry_Shell Companies,industry_Software-Application,industry_Software-Infrastructure,industry_Specialty Chemicals,industry_Specialty Industrial Machinery,industry_Specialty Retail,industry_Staffing & Employment Services,industry_Telecom Services,industry_Travel Services,industry_Utilities-Regulated Gas,industry_Utilities-Regulated Water,industry_Utilities-Renewable,industry_Waste Management
15,0.053702,0.050094,-0.573314,-4.298970e+05,4.352835,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
19,0.000000,0.045998,-0.577232,7.472520e+08,3.921096,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
39,0.000000,0.213307,0.138040,9.133566e+09,1.886462,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
51,0.165354,0.090474,-0.124847,-6.337000e+06,1.601221,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
96,0.000000,0.536819,-0.300338,2.347700e+07,4.286222,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2528,0.000000,0.000000,0.000000,1.954000e+06,0.000000,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2722,0.000000,0.000000,0.000000,0.000000e+00,0.000000,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2907,0.000000,0.000000,0.168834,1.390000e+05,0.000000,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2932,0.000000,0.000000,0.150321,1.613000e+06,0.000000,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [19]:
X.describe()

Unnamed: 0,debt_asset_ratio,gross_profit_margin,net_profit_margin,operating_cashflow,current_ratio,sector_Basic Materials,sector_Communication Services,sector_Consumer Cyclical,sector_Consumer Defensive,sector_Energy,sector_Financial Services,sector_Healthcare,sector_Industrials,sector_Other,sector_Real Estate,sector_Technology,sector_Utilities,industry_Advertising Agencies,industry_Aerospace & Defense,industry_Airlines,industry_Airports & Air Services,industry_Apparel Manufacturing,industry_Asset Management,industry_Auto & Truck Dealerships,industry_Auto Manufacturers,industry_Biotechnology,industry_Building Materials,industry_Building Products & Equipment,industry_Capital Markets,industry_Chemicals,industry_Communication Equipment,industry_Computer Hardware,industry_Credit Services,industry_Diagnostics & Research,industry_Discount Stores,industry_Drug Manufacturers-Specialty & Generic,industry_Education & Training Services,industry_Electrical Equipment & Parts,industry_Electronic Gaming & Multimedia,industry_Entertainment,"industry_Furnishings, Fixtures & Appliances",industry_Gambling,industry_Health Information Services,industry_Information Technology Services,industry_Insurance Brokers,industry_Insurance-Diversified,industry_Insurance-Property & Casualty,industry_Integrated Freight & Logistics,industry_Internet Content & Information,industry_Internet Retail,industry_Lodging,industry_Medical Care Facilities,industry_Medical Devices,industry_Metal Fabrication,industry_Mortgage Finance,industry_Oil & Gas Drilling,industry_Oil & Gas Equipment & Services,industry_Oil & Gas Midstream,industry_Other,industry_Packaged Foods,industry_Packaging & Containers,industry_Pharmaceutical Retailers,industry_Pollution & Treatment Controls,industry_REIT-Diversified,industry_Real Estate Services,industry_Real Estate-Development,industry_Rental & Leasing Services,industry_Residential Construction,industry_Resorts & Casinos,industry_Restaurants,industry_Semiconductors,industry_Shell Companies,industry_Software-Application,industry_Software-Infrastructure,industry_Specialty Chemicals,industry_Specialty Industrial Machinery,industry_Specialty Retail,industry_Staffing & Employment Services,industry_Telecom Services,industry_Travel Services,industry_Utilities-Regulated Gas,industry_Utilities-Regulated Water,industry_Utilities-Renewable,industry_Waste Management
count,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0
mean,0.200184,-0.088677,-21.333989,43138410.0,2.922452,0.013514,0.064189,0.081081,0.037162,0.016892,0.067568,0.168919,0.050676,0.385135,0.023649,0.081081,0.010135,0.003378,0.003378,0.003378,0.003378,0.006757,0.010135,0.003378,0.006757,0.10473,0.006757,0.003378,0.010135,0.003378,0.003378,0.003378,0.027027,0.013514,0.003378,0.013514,0.023649,0.006757,0.003378,0.003378,0.006757,0.003378,0.010135,0.006757,0.003378,0.006757,0.003378,0.003378,0.047297,0.023649,0.003378,0.003378,0.016892,0.003378,0.003378,0.003378,0.010135,0.003378,0.385135,0.010135,0.006757,0.006757,0.003378,0.006757,0.013514,0.003378,0.003378,0.003378,0.003378,0.006757,0.006757,0.003378,0.037162,0.023649,0.003378,0.003378,0.003378,0.003378,0.006757,0.003378,0.003378,0.003378,0.003378,0.010135
std,1.527585,11.48555,276.167377,589429500.0,6.764782,0.115655,0.245505,0.273422,0.189479,0.129085,0.251427,0.375315,0.219706,0.487451,0.152209,0.273422,0.100332,0.058124,0.058124,0.058124,0.058124,0.08206,0.100332,0.058124,0.08206,0.306723,0.08206,0.058124,0.100332,0.058124,0.058124,0.058124,0.162437,0.115655,0.058124,0.115655,0.152209,0.08206,0.058124,0.058124,0.08206,0.058124,0.100332,0.08206,0.058124,0.08206,0.058124,0.058124,0.212634,0.152209,0.058124,0.058124,0.129085,0.058124,0.058124,0.058124,0.100332,0.058124,0.487451,0.100332,0.08206,0.08206,0.058124,0.08206,0.115655,0.058124,0.058124,0.058124,0.058124,0.08206,0.08206,0.058124,0.189479,0.152209,0.058124,0.058124,0.058124,0.058124,0.08206,0.058124,0.058124,0.058124,0.058124,0.100332
min,-0.002688,-175.590909,-4641.666667,-3832413000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,-0.298724,-5500500.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.0,0.310851,0.0,114250.7,1.293946,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,0.041241,0.777554,0.07359,26844500.0,2.933432,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,25.93292,56.985936,68.089193,9133566000.0,77.056876,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


# Split Training and Testing sets

In [20]:
# Split training/test datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)

# Create a StandardScaler instance
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Logistic Regression
our basic logistic regression parameters, which include:

- The solver parameter is set to 'lbfgs', which is an algorithm for learning and optimization. The particular solver isn't very important in this example, but note that a number of optimizers exist.
- The max_iter parameter will be set to 200 iterations, which will give the model sufficient opportunity to converge on effective weights

In [21]:
from sklearn.linear_model import LogisticRegression

# Define the logistic regression model
log_classifier = LogisticRegression(solver="lbfgs",max_iter=200)

# Train the model
log_classifier.fit(X_train,y_train)

# Evaluate the model
y_pred = log_classifier.predict(X_test)
print(f" Logistic regression model accuracy: {accuracy_score(y_test,y_pred):.3f}")

 Logistic regression model accuracy: 0.622


# Random Forest

In [22]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Create a random forest classifier.
rf_model = RandomForestClassifier(n_estimators=128, random_state=78)

# Fitting the model
rf_model = rf_model.fit(X_train_scaled, y_train)

# Evaluate the model
y_pred = rf_model.predict(X_test_scaled)
print(f" Random forest predictive accuracy: {accuracy_score(y_test,y_pred):.3f}")

 Random forest predictive accuracy: 0.500


# SVM
- issues running when operating cash flow was added

In [23]:
# from sklearn.svm import SVC

# # Create the SVM model
# svm = SVC(kernel='linear')

# # Train the model
# svm.fit(X_train, y_train)

# # Evaluate the model
# y_pred = svm.predict(X_test_scaled)
# print(f" SVM model accuracy: {accuracy_score(y_test,y_pred):.3f}")

# Deep Learning

In [24]:
len(X_train_scaled[0])

84

In [25]:
# Define the model - deep neural net
number_input_features = len(X_train_scaled[0])
hidden_nodes_layer1 =  number_input_features*1
# hidden_nodes_layer2 = number_input_features

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu")
)

# Second hidden layer
# nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))


# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Compile the Sequential model together and customize metrics
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [26]:
# Train the model 
fit_model = nn.fit(X_train_scaled, y_train, epochs=100) 
# Evaluate the model using the test data 
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
3/3 - 0s - loss: 1.0060 - accuracy: 0.5541
Loss: 1.0059772729873657, Accuracy: 0.5540540814399719
