In [1]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import OneHotEncoder
import pandas as pd
import tensorflow as tf

In [2]:
# Import our input dataset
IPO_df = pd.read_csv("raw_dataset/company_overview.csv")
IPO_df

Unnamed: 0.1,Unnamed: 0,SYMBOL,ASSET_TYPE,COMPANY_NAME,EXCHANGE,CURRENCY,COUNTRY,SECTOR,INDUSTRY,ADDRESS
0,0,MRM,Common Stock,MEDIROM Healthcare Technologies Inc,NASDAQ,USD,USA,Consumer Cyclical,Personal Services,"Tradepia Odaiba, Tokyo, Japan, 135-0091"
1,1,VTAQ,Common Stock,Ventoux CCM Acquisition Corp,NASDAQ,USD,USA,Financial Services,Shell Companies,"1 East Putnam Avenue, Greenwich, CT, United St..."
2,2,IKT,Common Stock,"Inhibikase Therapeutics, Inc",NASDAQ,USD,USA,Healthcare,Biotechnology,"3350 Riverwood Parkway, Atlanta, GA, United St..."
3,3,GBS,Common Stock,GBS Inc,NASDAQ,USD,USA,Healthcare,Medical Devices,"708 3rd Avenue, New York, NY, United States, 1..."
4,4,HCAR,Common Stock,Healthcare Services Acquisition Corporation,NASDAQ,USD,USA,Financial Services,Shell Companies,"7809 Woodmont Avenue, Bethesda, MD, United Sta..."
...,...,...,...,...,...,...,...,...,...,...
656,656,RBKB,Common Stock,"Rhinebeck Bancorp, Inc",NASDAQ,USD,USA,Financial Services,Banks-Regional,"2 Jefferson Plaza, Poughkeepsie, NY, United St..."
657,657,BCOW,Common Stock,"1895 Bancorp of Wisconsin, Inc",NASDAQ,USD,USA,Financial Services,Banks-Regional,"7001 West Edgerton Avenue, Greenfield, WI, Uni..."
658,658,MDJH,Common Stock,MDJM Ltd,NASDAQ,USD,USA,Real Estate,Real Estate Services,"Saidun Center, Tianjin, China"
659,659,MTC,Common Stock,"Mmtec, Inc",NASDAQ,USD,USA,Technology,Software-Application,"Air China Century Building, Beijing, China, 10..."


In [3]:
return_df = pd.read_csv("raw_dataset/three_month_return.csv")
return_df

Unnamed: 0,SYMBOL,IPO_DATE,COMPANY_NAME,IPO_PRICE,CURRENT_PRICE,PERCENTAGE_RETURN,THREE_MTH_DATE,THREE_MTH_IPO,PRICE_CHANGE,THREE_MTH_RETURN,PRICE_GAIN_LOSS
0,SVOK,2020-12-18,Seven Oaks Acquisition,10.0,9.91,-0.0090,2021-03-18,10.0450,0.0450,0.004500,Gain
1,GFX,2020-12-18,Golden Falcon Acquisition,10.0,9.83,-0.0170,2021-03-18,9.7516,-0.2484,-0.024840,Loss
2,MASS,2020-12-18,908 Devices,20.0,53.80,1.6900,2021-03-18,49.3800,29.3800,1.469000,Gain
3,DUNE,2020-12-18,Dune Acquisition,10.0,9.74,-0.0260,2021-03-18,9.7600,-0.2400,-0.024000,Loss
4,MTAC,2020-12-18,MedTech Acquisition,10.0,9.94,-0.0060,2021-03-18,9.8800,-0.1200,-0.012000,Loss
...,...,...,...,...,...,...,...,...,...,...,...
478,RBKB,2019-01-17,Rhinebeck Bancorp,10.0,10.49,0.0490,2019-04-17,11.6600,1.6600,0.166000,Gain
479,BCOW,2019-01-09,1895 Bancorp of Wisconsin,10.0,14.94,0.4940,2019-04-09,9.6700,-0.3300,-0.033000,Loss
480,MDJH,2019-01-08,MDJM Ltd.,5.0,4.25,-0.1500,2019-04-08,3.4730,-1.5270,-0.305400,Loss
481,MTC,2019-01-08,"MMTec, Inc.",4.0,2.83,-0.2925,2019-04-08,4.3301,0.3301,0.082525,Gain


# Preprocess

In [4]:
# Generate our categorical variable list
IPO_cat = IPO_df.dtypes[IPO_df.dtypes == "object"].index.tolist()


# Check the number of unique values in each column
IPO_df[IPO_cat].nunique()

SYMBOL          661
ASSET_TYPE        1
COMPANY_NAME    661
EXCHANGE          7
CURRENCY          1
COUNTRY           1
SECTOR           12
INDUSTRY         73
ADDRESS         598
dtype: int64

In [7]:
IPO_df = pd.merge(return_df, IPO_df, left_on = 'SYMBOL', right_on= 'SYMBOL', how='left')

In [9]:
IPO_df

Unnamed: 0.1,SYMBOL,IPO_DATE,COMPANY_NAME_x,IPO_PRICE,CURRENT_PRICE,PERCENTAGE_RETURN,THREE_MTH_DATE,THREE_MTH_IPO,PRICE_CHANGE,THREE_MTH_RETURN,PRICE_GAIN_LOSS,Unnamed: 0,ASSET_TYPE,COMPANY_NAME_y,EXCHANGE,CURRENCY,COUNTRY,SECTOR,INDUSTRY,ADDRESS
0,SVOK,2020-12-18,Seven Oaks Acquisition,10.0,9.91,-0.0090,2021-03-18,10.0450,0.0450,0.004500,Gain,9.0,Common Stock,Seven Oaks Acquisition Corp,NASDAQ,USD,USA,Financial Services,Shell Companies,"445 Park Avenue, New York, NY, United States, ..."
1,GFX,2020-12-18,Golden Falcon Acquisition,10.0,9.83,-0.0170,2021-03-18,9.7516,-0.2484,-0.024840,Loss,10.0,Common Stock,Golden Falcon Acquisition Corp,NYSE,USD,USA,Financial Services,Shell Companies,"850 Library Avenue, Newark, DE, United States,..."
2,MASS,2020-12-18,908 Devices,20.0,53.80,1.6900,2021-03-18,49.3800,29.3800,1.469000,Gain,11.0,Common Stock,908 Devices Inc,NASDAQ,USD,USA,Healthcare,Medical Devices,"645 Summer Street, Boston, MA, United States, ..."
3,DUNE,2020-12-18,Dune Acquisition,10.0,9.74,-0.0260,2021-03-18,9.7600,-0.2400,-0.024000,Loss,12.0,Common Stock,Dune Acquisition Corporation,NASDAQ,USD,USA,Financial Services,Shell Companies,"700 South Rosemary Avenue, West Palm Beach, FL..."
4,MTAC,2020-12-18,MedTech Acquisition,10.0,9.94,-0.0060,2021-03-18,9.8800,-0.1200,-0.012000,Loss,13.0,Common Stock,MedTech Acquisition Corporation,NASDAQ,USD,USA,Financial Services,Shell Companies,"600 Fifth Avenue, New York, NY, United States,..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
478,RBKB,2019-01-17,Rhinebeck Bancorp,10.0,10.49,0.0490,2019-04-17,11.6600,1.6600,0.166000,Gain,656.0,Common Stock,"Rhinebeck Bancorp, Inc",NASDAQ,USD,USA,Financial Services,Banks-Regional,"2 Jefferson Plaza, Poughkeepsie, NY, United St..."
479,BCOW,2019-01-09,1895 Bancorp of Wisconsin,10.0,14.94,0.4940,2019-04-09,9.6700,-0.3300,-0.033000,Loss,657.0,Common Stock,"1895 Bancorp of Wisconsin, Inc",NASDAQ,USD,USA,Financial Services,Banks-Regional,"7001 West Edgerton Avenue, Greenfield, WI, Uni..."
480,MDJH,2019-01-08,MDJM Ltd.,5.0,4.25,-0.1500,2019-04-08,3.4730,-1.5270,-0.305400,Loss,658.0,Common Stock,MDJM Ltd,NASDAQ,USD,USA,Real Estate,Real Estate Services,"Saidun Center, Tianjin, China"
481,MTC,2019-01-08,"MMTec, Inc.",4.0,2.83,-0.2925,2019-04-08,4.3301,0.3301,0.082525,Gain,659.0,Common Stock,"Mmtec, Inc",NASDAQ,USD,USA,Technology,Software-Application,"Air China Century Building, Beijing, China, 10..."


In [10]:
IPO_df = IPO_df.drop(["SYMBOL","COMPANY_NAME_x", "COMPANY_NAME_y","ADDRESS"],1)
IPO_df

Unnamed: 0.1,IPO_DATE,IPO_PRICE,CURRENT_PRICE,PERCENTAGE_RETURN,THREE_MTH_DATE,THREE_MTH_IPO,PRICE_CHANGE,THREE_MTH_RETURN,PRICE_GAIN_LOSS,Unnamed: 0,ASSET_TYPE,EXCHANGE,CURRENCY,COUNTRY,SECTOR,INDUSTRY
0,2020-12-18,10.0,9.91,-0.0090,2021-03-18,10.0450,0.0450,0.004500,Gain,9.0,Common Stock,NASDAQ,USD,USA,Financial Services,Shell Companies
1,2020-12-18,10.0,9.83,-0.0170,2021-03-18,9.7516,-0.2484,-0.024840,Loss,10.0,Common Stock,NYSE,USD,USA,Financial Services,Shell Companies
2,2020-12-18,20.0,53.80,1.6900,2021-03-18,49.3800,29.3800,1.469000,Gain,11.0,Common Stock,NASDAQ,USD,USA,Healthcare,Medical Devices
3,2020-12-18,10.0,9.74,-0.0260,2021-03-18,9.7600,-0.2400,-0.024000,Loss,12.0,Common Stock,NASDAQ,USD,USA,Financial Services,Shell Companies
4,2020-12-18,10.0,9.94,-0.0060,2021-03-18,9.8800,-0.1200,-0.012000,Loss,13.0,Common Stock,NASDAQ,USD,USA,Financial Services,Shell Companies
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
478,2019-01-17,10.0,10.49,0.0490,2019-04-17,11.6600,1.6600,0.166000,Gain,656.0,Common Stock,NASDAQ,USD,USA,Financial Services,Banks-Regional
479,2019-01-09,10.0,14.94,0.4940,2019-04-09,9.6700,-0.3300,-0.033000,Loss,657.0,Common Stock,NASDAQ,USD,USA,Financial Services,Banks-Regional
480,2019-01-08,5.0,4.25,-0.1500,2019-04-08,3.4730,-1.5270,-0.305400,Loss,658.0,Common Stock,NASDAQ,USD,USA,Real Estate,Real Estate Services
481,2019-01-08,4.0,2.83,-0.2925,2019-04-08,4.3301,0.3301,0.082525,Gain,659.0,Common Stock,NASDAQ,USD,USA,Technology,Software-Application


# Encoding

In [12]:
# Binary encode
IPO_df_encoded = pd.get_dummies(IPO_df, columns=["EXCHANGE","SECTOR","INDUSTRY","PRICE_GAIN_LOSS","ASSET_TYPE","CURRENCY","COUNTRY"])
IPO_df_encoded.head()

Unnamed: 0.1,IPO_DATE,IPO_PRICE,CURRENT_PRICE,PERCENTAGE_RETURN,THREE_MTH_DATE,THREE_MTH_IPO,PRICE_CHANGE,THREE_MTH_RETURN,Unnamed: 0,EXCHANGE_CSE,...,INDUSTRY_Telecom Services,INDUSTRY_Thermal Coal,INDUSTRY_Trucking,INDUSTRY_Utilities-Regulated Gas,INDUSTRY_Waste Management,PRICE_GAIN_LOSS_Gain,PRICE_GAIN_LOSS_Loss,ASSET_TYPE_Common Stock,CURRENCY_USD,COUNTRY_USA
0,2020-12-18,10.0,9.91,-0.009,2021-03-18,10.045,0.045,0.0045,9.0,0,...,0,0,0,0,0,1,0,1,1,1
1,2020-12-18,10.0,9.83,-0.017,2021-03-18,9.7516,-0.2484,-0.02484,10.0,0,...,0,0,0,0,0,0,1,1,1,1
2,2020-12-18,20.0,53.8,1.69,2021-03-18,49.38,29.38,1.469,11.0,0,...,0,0,0,0,0,1,0,1,1,1
3,2020-12-18,10.0,9.74,-0.026,2021-03-18,9.76,-0.24,-0.024,12.0,0,...,0,0,0,0,0,0,1,1,1,1
4,2020-12-18,10.0,9.94,-0.006,2021-03-18,9.88,-0.12,-0.012,13.0,0,...,0,0,0,0,0,0,1,1,1,1


# Target Feature

In [None]:
y = IPO_df_encoded.THREE_MTH_RETURN.values
X = IPO_df_encoded.drop(columns=["THREE_MTH_RETURN"]).values

# Split Training and Testing sets

In [None]:
# Split training/test datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)

# Create a StandardScaler instance
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Deep Learning

In [None]:
# Define the model - deep neural net
number_input_features = len(X_train_scaled[0])
hidden_nodes_layer1 =  10
hidden_nodes_layer2 = 5

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu")
)

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))


# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Compile the Sequential model together and customize metrics
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [None]:
# Train the model 
fit_model = nn.fit(X_train_scaled, y_train, epochs=50) 
# Evaluate the model using the test data 
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")