In [None]:
import pandas as pd
import numpy as np
import sklearn as sk
import matplotlib.pyplot as plt
import yfinance as yf
import seaborn as sns

In [50]:
#Importing dataset and defining path variables
path = './data/'
data_2018 = 'ipo_stock_2010_2018.csv'
data_2019 = 'ipo_stock_2019.csv'
volume_data = 'IPO_trading_volumes.csv'


#Reading in file 1 data
df_ipo = pd.read_csv(path + data_2018)

#Reading in file 2 data
cols = ['Symbol', 'ipoDate', 'dayOfWeek', 'employees',
  'Name', 'CEOAge', 'Sector', 'Year', 'Month', 'Day']
df_ipo_2 = pd.read_csv(path + data_2019, encoding= 'ISO-8859-1', usecols= cols)

df_ipo_2

Unnamed: 0,Symbol,Year,Month,Day,dayOfWeek,Name,Sector,ipoDate,CEOAge,employees
0,A,1999,11,18,3,"Agilent Technologies, Inc.",Capital Goods,11/18/99 0:00,56.0,13500
1,AAC,2014,10,2,3,"AAC Holdings, Inc.",Health Care,10/2/14 0:00,46.0,2100
2,AAOI,2013,9,26,3,"Applied Optoelectronics, Inc.",Technology,9/26/13 0:00,54.0,3054
3,AAP,2001,11,29,3,Advance Auto Parts Inc,Consumer Services,11/29/01 0:00,59.0,71000
4,AAT,2011,1,13,3,"American Assets Trust, Inc.",Consumer Services,1/13/11 0:00,79.0,194
...,...,...,...,...,...,...,...,...,...,...
3757,ZUMZ,2005,5,6,4,Zumiez Inc.,Consumer Services,5/6/05 0:00,57.0,8900
3758,ZUO,2018,4,12,3,"Zuora, Inc.",,4/12/18 0:00,,933
3759,ZX,2011,5,12,3,China Zenix Auto International Limited,Capital Goods,5/12/11 0:00,47.0,-
3760,ZYME,2017,4,28,4,Zymeworks Inc.,Health Care,4/28/17 0:00,,147


In [51]:
#Cleaning up IPO Date formatting
df_ipo_2['Month'] = df_ipo_2.apply(lambda x: '0' + str(x['Month']) if len(str(x['Month'])) < 2 else x['Month'], axis = 1)
df_ipo_2['Day'] = df_ipo_2.apply(lambda x: '0' + str(x['Day']) if len(str(x['Day'])) < 2 else x['Day'], axis = 1)
df_ipo_2['ipoDate'] = df_ipo_2.apply(lambda x: str(x['Year']) + '-' + str(x['Month']) + '-' + str(x['Day']), axis = 1)
df_ipo_2.drop(columns=['Month', 'Day', 'Year'], inplace= True)
df_ipo_2

Unnamed: 0,Symbol,dayOfWeek,Name,Sector,ipoDate,CEOAge,employees
0,A,3,"Agilent Technologies, Inc.",Capital Goods,1999-11-18,56.0,13500
1,AAC,3,"AAC Holdings, Inc.",Health Care,2014-10-02,46.0,2100
2,AAOI,3,"Applied Optoelectronics, Inc.",Technology,2013-09-26,54.0,3054
3,AAP,3,Advance Auto Parts Inc,Consumer Services,2001-11-29,59.0,71000
4,AAT,3,"American Assets Trust, Inc.",Consumer Services,2011-01-13,79.0,194
...,...,...,...,...,...,...,...
3757,ZUMZ,4,Zumiez Inc.,Consumer Services,2005-05-06,57.0,8900
3758,ZUO,3,"Zuora, Inc.",,2018-04-12,,933
3759,ZX,3,China Zenix Auto International Limited,Capital Goods,2011-05-12,47.0,-
3760,ZYME,4,Zymeworks Inc.,Health Care,2017-04-28,,147


In [52]:
#Removing duplicate columns
df_ipo_2.drop_duplicates(inplace= True)

#Imputing missing values with mean for numerical columns
numeric_cols = df_ipo.select_dtypes(include=['int64','float64'])
categorical_cols = df_ipo.select_dtypes(include=['object'])

#Imputing numeric cols
numeric_cols.fillna(numeric_cols.mean(), inplace=True)
categorical_cols = categorical_cols.apply(lambda x: x.fillna(x.mode()[0]) if x.isna().sum() > 0 else x, axis = 0)


In [53]:
#Removing CEO birth year and creating CEO age feature
numeric_cols.CEO_born = 2022 - numeric_cols.CEO_born
numeric_cols.rename(columns = {'CEO_born': 'CEO_age'}, inplace= True)

#Adding day of the week feature
categorical_cols['IPO_weekday'] = pd.to_datetime(categorical_cols['Date Priced']).dt.day_of_week


In [54]:
#Concatenating imputed cols
df_ipo = pd.concat([numeric_cols, categorical_cols], axis = 1)
df_ipo.isna().sum()

Price                0
Shares               0
Offer Amount         0
employees            0
firstday_adjclose    0
firstday_open        0
firstday_spread      0
firstday_volume      0
inmonth_adjclose     0
inmonth_open         0
inmonth_spread       0
inmonth_volume       0
inweek_adjclose      0
inweek_open          0
inweek_spread        0
inweek_volume        0
employees2019        0
CEO_pay              0
CEO_age              0
Company Name         0
Symbol               0
Market               0
Date Priced          0
address              0
US_state             0
descriptions         0
link_nasdaq          0
year                 0
sector               0
industry             0
IPO_weekday          0
dtype: int64

In [55]:
#Dropping unwanted / inaccurate / incomplete cols
drop_columns = ['Offer Amount', 'firstday_adjclose', 'firstday_volume','inmonth_volume', 'inweek_volume',
       'firstday_open', 'firstday_spread', 'inmonth_adjclose', 'inmonth_open', 'inmonth_spread',
        'inweek_adjclose', 'inweek_open', 'inweek_spread','employees2019', 'firstday_volume',
       'address', 'US_state', 'descriptions', 'link_nasdaq','year', 'industry', 'year', 'industry', 'Price', 'CEO_pay', 'Shares', 'Market']
df_ipo.drop(drop_columns, axis = 1, inplace = True)
df_ipo

Unnamed: 0,employees,CEO_age,Company Name,Symbol,Date Priced,sector,IPO_weekday
0,128.0,57.460211,ANDATEE CHINA MARINE FUEL SERVICES CORP,AMCF,2010-01-26,Energy,1
1,3.0,60.000000,CHESAPEAKE LODGING TRUST,CHSP,2010-01-22,Real Estate,4
2,1486.0,50.000000,GENERAC HOLDINGS INC.,GNRC,2010-02-11,Industrials,3
3,568.0,62.000000,"QUINSTREET, INC",QNST,2010-02-11,Technology,3
4,6.0,61.000000,TERRENO REALTY CORP,TRNO,2010-02-10,Real Estate,2
...,...,...,...,...,...,...,...
1131,2559.0,48.000000,TENCENT MUSIC ENTERTAINMENT GROUP,TME,2018-12-12,Technology,2
1132,25.0,64.000000,"SYNTHORX, INC.",THOR,2018-12-07,Healthcare,4
1133,11.0,51.000000,DIAMEDICA THERAPEUTICS INC.,DMAC,2018-12-07,Healthcare,4
1134,700.0,49.000000,"MODERNA, INC.",MRNA,2018-12-07,Healthcare,4


In [56]:
#Creating trading volume features from trading volume dataset and yfinance data

#Reading in trading volume data (contains about 900 of the total 1100 companies)
df_ipo_volumes = pd.read_csv(path + volume_data, sep = ',', encoding= 'ISO-8859-1')

#Removing duplicate rows
df_ipo_volumes.drop_duplicates(inplace=True)

#Computing inweek and inmonth volumes
df_ipo_volumes['inweek_volume'] = df_ipo_volumes.loc[:, 'volumeDay1':'volumeDay6'].apply(lambda x: sum(x), axis = 1)
df_ipo_volumes['inmonth_volume'] = df_ipo_volumes.loc[:, 'volumeDay1': 'volumeDay29'].apply(lambda x: sum(x), axis = 1)
df_ipo_volumes = df_ipo_volumes[['Symbol', 'volumeDay0', 'inweek_volume']]
df_ipo_volumes

  df_ipo_volumes = pd.read_csv(path + volume_data, sep = ',', encoding= 'ISO-8859-1')


Unnamed: 0,Symbol,volumeDay0,inweek_volume
0,A,59753154.0,3.362232e+07
1,AAC,2799073.0,7.071238e+05
2,AAOI,948999.0,6.891676e+05
3,AAP,371100.0,1.358983e+06
4,AAT,15536889.0,4.181718e+06
...,...,...,...
3757,ZUMZ,6793424.0,1.955122e+06
3758,ZUO,13102419.0,5.575364e+06
3759,ZX,4987284.0,8.667534e+06
3760,ZYME,953887.0,2.718968e+05


In [57]:
#Renaming cols for dataset merging
df_ipo_2.rename(columns = {'dayOfWeek':'IPO_weekday',
    'CEOAge':'CEO_age',
    'Sector': 'sector'}, inplace=True)

#Adding computed trading volumes
df_ipo_2['firstday_volume'] = df_ipo_volumes['volumeDay0']
df_ipo_2['inweek_volume'] = df_ipo_volumes['inweek_volume']

df_ipo_2

Unnamed: 0,Symbol,IPO_weekday,Name,sector,ipoDate,CEO_age,employees,firstday_volume,inweek_volume
0,A,3,"Agilent Technologies, Inc.",Capital Goods,1999-11-18,56.0,13500,59753154.0,3.362232e+07
1,AAC,3,"AAC Holdings, Inc.",Health Care,2014-10-02,46.0,2100,2799073.0,7.071238e+05
2,AAOI,3,"Applied Optoelectronics, Inc.",Technology,2013-09-26,54.0,3054,948999.0,6.891676e+05
3,AAP,3,Advance Auto Parts Inc,Consumer Services,2001-11-29,59.0,71000,371100.0,1.358983e+06
4,AAT,3,"American Assets Trust, Inc.",Consumer Services,2011-01-13,79.0,194,15536889.0,4.181718e+06
...,...,...,...,...,...,...,...,...,...
3757,ZUMZ,4,Zumiez Inc.,Consumer Services,2005-05-06,57.0,8900,6793424.0,1.955122e+06
3758,ZUO,3,"Zuora, Inc.",,2018-04-12,,933,13102419.0,5.575364e+06
3759,ZX,3,China Zenix Auto International Limited,Capital Goods,2011-05-12,47.0,-,4987284.0,8.667534e+06
3760,ZYME,4,Zymeworks Inc.,Health Care,2017-04-28,,147,953887.0,2.718968e+05


In [58]:
#Converting dtypes
df_ipo_2.employees = df_ipo_2.employees.replace('-', 0)
df_ipo_2.employees = df_ipo_2.employees.astype('float64')

#Mean imputing offer price, sector, CEO age and employee columns
df_ipo_2['CEO_age'].fillna(df_ipo_2['CEO_age'].mean(), inplace=True)
df_ipo_2['employees'].fillna(df_ipo_2['employees'].mean(), inplace=True)

#Adding Other sector for missing sectors
df_ipo_2['sector'].fillna('Other', inplace = True)

#Filling missing volume data with means
df_ipo_2.firstday_volume.fillna(df_ipo_2.firstday_volume.mean(), inplace = True)
df_ipo_2.inweek_volume.fillna(df_ipo_2.inweek_volume.mean(), inplace = True)

#Dropping cols with missing closing prices
df_ipo_2.dropna(inplace=True)
df_ipo_2


Unnamed: 0,Symbol,IPO_weekday,Name,sector,ipoDate,CEO_age,employees,firstday_volume,inweek_volume
0,A,3,"Agilent Technologies, Inc.",Capital Goods,1999-11-18,56.000000,13500.0,59753154.0,3.362232e+07
1,AAC,3,"AAC Holdings, Inc.",Health Care,2014-10-02,46.000000,2100.0,2799073.0,7.071238e+05
2,AAOI,3,"Applied Optoelectronics, Inc.",Technology,2013-09-26,54.000000,3054.0,948999.0,6.891676e+05
3,AAP,3,Advance Auto Parts Inc,Consumer Services,2001-11-29,59.000000,71000.0,371100.0,1.358983e+06
4,AAT,3,"American Assets Trust, Inc.",Consumer Services,2011-01-13,79.000000,194.0,15536889.0,4.181718e+06
...,...,...,...,...,...,...,...,...,...
3757,ZUMZ,4,Zumiez Inc.,Consumer Services,2005-05-06,57.000000,8900.0,6793424.0,1.955122e+06
3758,ZUO,3,"Zuora, Inc.",Other,2018-04-12,54.787097,933.0,13102419.0,5.575364e+06
3759,ZX,3,China Zenix Auto International Limited,Capital Goods,2011-05-12,47.000000,0.0,4987284.0,8.667534e+06
3760,ZYME,4,Zymeworks Inc.,Health Care,2017-04-28,54.787097,147.0,953887.0,2.718968e+05


In [59]:
from datetime import timedelta

#Adding computed trading volumes
vol_check = df_ipo_2[['Symbol','firstday_volume', 'inweek_volume']]
df_ipo = df_ipo.merge(vol_check, how = 'left', on = 'Symbol')

#Computing missing trading volumes using yfinance

#defining function for volume extractions
def getVolumes(Symbol, Date):
    print(Symbol)

    getVolumes.counter += 1

    #Printing company progress
    print(getVolumes.counter)

    ipo_date = pd.to_datetime(Date)
    try:
        data = yf.download(Symbol, ipo_date, ipo_date + timedelta(days= 8))['Volume']
    except:
        return (np.NAN,np.NAN)
    if data.empty:
        return (np.NAN,np.NAN)
    first_day_volume = data[0]
    inweek_volume = sum(data[:7])
    return (first_day_volume, inweek_volume)

getVolumes.counter = 0

df_missing_vol = df_ipo.loc[df_ipo.firstday_volume.isna()].copy()
df_missing_vol[['firstday_volume', 'inweek_volume']] = df_missing_vol.apply(lambda x: getVolumes(x['Symbol'], x['Date Priced']), axis = 1, result_type='expand')

AMCF
1
[*********************100%***********************]  1 of 1 completed
CRMD
2
[*********************100%***********************]  1 of 1 completed

1 Failed download:
- CRMD: Data doesn't exist for startDate = 1269489600, endDate = 1270180800
ELMD
3
[*********************100%***********************]  1 of 1 completed
STND
4
[*********************100%***********************]  1 of 1 completed

1 Failed download:
- STND: No timezone found, symbol may be delisted
GNOM
5
[*********************100%***********************]  1 of 1 completed

1 Failed download:
- GNOM: Data doesn't exist for startDate = 1289451600, endDate = 1290142800
TBET
6
[*********************100%***********************]  1 of 1 completed
BGMD
7
[*********************100%***********************]  1 of 1 completed
PACD
8
[*********************100%***********************]  1 of 1 completed

1 Failed download:
- PACD: No timezone found, symbol may be delisted
ET
9
[*********************100%***********************]  1 o

In [60]:
#Joining in additional trading volumes
df_ipo.fillna(df_missing_vol, inplace=True)

#Filling in remaining NaNs with mean volumes
df_ipo['firstday_volume'].fillna(df_ipo['firstday_volume'].mean(), inplace=True)
df_ipo['inweek_volume'].fillna(df_ipo['inweek_volume'].mean(), inplace=True)

In [61]:
#Creating a larger dataset with lower amount of features from df_ipo and df_ipo_2
#Dropping unneeded columns in both datasets

#Renaming columns so dfs match
df_ipo.rename(columns={'Date Priced': 'ipoDate', 'Company Name': 'Name'}, inplace=True)

#Concatenating dataframes
df_master = pd.concat([df_ipo, df_ipo_2.loc[~df_ipo_2.Symbol.isin(df_ipo.Symbol)]], axis = 0)
df_master.reset_index(inplace=True)
df_master

Unnamed: 0,index,employees,CEO_age,Name,Symbol,ipoDate,sector,IPO_weekday,firstday_volume,inweek_volume
0,0,128.000000,57.460211,ANDATEE CHINA MARINE FUEL SERVICES CORP,AMCF,2010-01-26,Energy,1,955563.0,5.878947e+06
1,1,3.000000,60.000000,CHESAPEAKE LODGING TRUST,CHSP,2010-01-22,Real Estate,4,3492458.0,6.008493e+05
2,2,1486.000000,50.000000,GENERAC HOLDINGS INC.,GNRC,2010-02-11,Industrials,3,16892087.0,2.179334e+06
3,3,568.000000,62.000000,"QUINSTREET, INC",QNST,2010-02-11,Technology,3,5372077.0,3.362175e+06
4,4,6.000000,61.000000,TERRENO REALTY CORP,TRNO,2010-02-10,Real Estate,2,4515261.0,7.616977e+05
...,...,...,...,...,...,...,...,...,...,...
3901,3749,28.000000,50.000000,Zion Oil & Gas Inc,ZN,2007-01-03,Energy,2,26300.0,2.030277e+05
3902,3751,7447.262841,54.787097,China Southern Airlines Company Limited,ZNH,1997-07-30,Transportation,2,4703100.0,2.527164e+06
3903,3752,5473.000000,49.000000,"Zoe&#39;s Kitchen, Inc.",ZOES,2014-04-11,Consumer Services,4,5765164.0,2.644819e+06
3904,3757,8900.000000,57.000000,Zumiez Inc.,ZUMZ,2005-05-06,Consumer Services,4,6793424.0,1.955122e+06


In [62]:
#Pulling accurate prices from yahoo finance
from datetime import timedelta
counter = 0

def getPrices(Symbol, Date):
    print(Symbol)
    #Counter gets increased by 1 for every function call and outputs our progress
    getPrices.counter += 1
    print(str(getPrices.counter) + '/' + '3906')
    
    ipo_date = pd.to_datetime(Date, format = '%Y-%m-%d')
    try:
        data = yf.download(Symbol, ipo_date, ipo_date + timedelta(days = 50))
    except:
        return (np.NAN, np.NAN, np.NAN, np.NAN)
    if data.shape[0] < 30:
        return (np.NAN, np.NAN, np.NAN, np.NAN)

    return (data['Open'][0], data['Adj Close'][0], data['Adj Close'][6], data['Adj Close'][29])

#Initializing function counter
getPrices.counter = 0

df_master[['Y_Open_Price', 'Y_Close_Price_0', 'Y_Close_Price_6', 'Y_Close_Price_29']] = df_master.apply(lambda x: getPrices(x['Symbol'], x['ipoDate']), axis = 1, result_type='expand')
df_master


AMCF
1/3906
[*********************100%***********************]  1 of 1 completed
CHSP
2/3906
[*********************100%***********************]  1 of 1 completed

1 Failed download:
- CHSP: No timezone found, symbol may be delisted
GNRC
3/3906
[*********************100%***********************]  1 of 1 completed
QNST
4/3906
[*********************100%***********************]  1 of 1 completed
TRNO
5/3906
[*********************100%***********************]  1 of 1 completed
PDM
6/3906
[*********************100%***********************]  1 of 1 completed
IRWD
7/3906
[*********************100%***********************]  1 of 1 completed
SSNC
8/3906
[*********************100%***********************]  1 of 1 completed
STNG
9/3906
[*********************100%***********************]  1 of 1 completed
HTHT
10/3906
[*********************100%***********************]  1 of 1 completed
CRMD
11/3906
[*********************100%***********************]  1 of 1 completed
FIBK
12/3906
[*********************100

In [None]:
#Filtering out any pre 2006 IPOs (due to twitter founding date) and NA prices on Yahoo Finance
df_master = df_master.loc[(~df_master.Y_Open_Price.isna()) & (pd.to_datetime(df_master.ipoDate) >= pd.to_datetime('2006-01-01'))]
df_master


Unnamed: 0,index,employees,CEO_age,Name,Symbol,ipoDate,sector,IPO_weekday,firstday_volume,inweek_volume,Y_Open_Price,Y_Close_Price_0,Y_Open_Price_6,Y_Open_Price_29
0,0,128.000000,57.460211,ANDATEE CHINA MARINE FUEL SERVICES CORP,AMCF,2010-01-26,Energy,1,955563.0,5.878947e+06,6.500000,5.770000,6.000000,7.500000
2,2,1486.000000,50.000000,GENERAC HOLDINGS INC.,GNRC,2010-02-11,Industrials,3,16892087.0,2.179334e+06,13.000000,8.460629,8.625361,9.897091
3,3,568.000000,62.000000,"QUINSTREET, INC",QNST,2010-02-11,Technology,3,5372077.0,3.362175e+06,15.000000,15.000000,13.600000,16.049999
4,4,6.000000,61.000000,TERRENO REALTY CORP,TRNO,2010-02-10,Real Estate,2,4515261.0,7.616977e+05,18.750000,13.649583,13.722771,14.227767
5,5,109.000000,59.000000,"PIEDMONT OFFICE REALTY TRUST, INC.",PDM,2010-02-10,Real Estate,2,4343012.0,3.551568e+06,14.750000,7.987291,9.149545,10.081815
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3895,3736,59.000000,58.000000,"ZAIS Group Holdings, Inc.",ZAIS,2013-03-22,Finance,4,1511224.0,3.912183e+05,10.020000,10.020000,10.020000,10.120000
3897,3739,7447.262841,54.787097,ProShares UltraPro Short NASDAQ Biotechnology,ZBIO,2015-06-22,Other,0,0.0,1.593089e+05,41.900002,41.900002,44.299999,38.419998
3898,3743,3181.000000,41.000000,"Zillow Group, Inc.",ZG,2011-07-20,Miscellaneous,2,17549817.0,6.998521e+06,17.776737,11.153726,10.570627,10.954163
3900,3746,7447.262841,54.787097,Credit Suisse AG,ZIV,2010-11-29,Finance,0,0.0,7.145535e+04,12.298750,12.298750,13.318750,13.741250


In [None]:
#Building binary flags for three return periods (1 for underperformance, 0 for non underperformance, e.g. positive returns)
df_master['intra_day_up'] = df_master.apply(lambda x: int(x['Y_Close_Price_0'] < x['Y_Open_Price']), axis = 1)
df_master['intra_week_up'] = df_master.apply(lambda x: int(x['Y_Close_Price_6'] < x['Y_Open_Price']), axis = 1)
df_master['intra_month_up'] = df_master.apply(lambda x: int(x['Y_Close_Price_29'] < x['Y_Open_Price']), axis = 1)
df_master.drop(columns= ['Y_Close_Price_0', 'Y_Close_Price_6', 'Y_Close_Price_29'], inplace= True)
df_master

In [None]:
#Looking at class balances
print(df_master.intra_day_up.value_counts(normalize=True))
print(df_master.intra_week_up.value_counts(normalize=True))
print(df_master.intra_month_up.value_counts(normalize=True))

1    0.691769
0    0.308231
Name: intra_day_up, dtype: float64
1    0.69819
0    0.30181
Name: intra_week_up, dtype: float64
1    0.670753
0    0.329247
Name: intra_month_up, dtype: float64


In [None]:
#Getting S&P 500 data to capture market sentiment on that date
sp500 = yf.download('^GSPC', start = pd.to_datetime(df_master.ipoDate).min() - timedelta(days = 15), end = pd.to_datetime(df_master.ipoDate).max())
sp500 = sp500[['Close']].pct_change()

#Computing rolling 7 day return averages
sp500 = sp500.rolling(7).mean()

#Getting 7 day rolling return for each row entry in both datasets
df_ipo['SP500_lastweek_return'] = df_ipo.apply(lambda x: sp500.loc[sp500.index == pd.to_datetime(x['ipoDate'])]['Close'][0] if pd.to_datetime(x['ipoDate']) in sp500.index else 0, axis = 1)
df_master['SP500_lastweek_return'] = df_master.apply(lambda x: sp500.loc[sp500.index == pd.to_datetime(x['ipoDate'])]['Close'][0] if pd.to_datetime(x['ipoDate']) in sp500.index else 0, axis = 1)
df_master

[*********************100%***********************]  1 of 1 completed


Unnamed: 0,index,employees,CEO_age,Name,Symbol,ipoDate,sector,IPO_weekday,firstday_volume,inweek_volume,Y_Open_Price,Y_Close_Price_0,Y_Open_Price_6,Y_Open_Price_29,intra_day_up,intra_week_up,intra_month_up,SP500_lastweek_return
0,0,128.000000,57.460211,ANDATEE CHINA MARINE FUEL SERVICES CORP,AMCF,2010-01-26,Energy,1,955563.0,5.878947e+06,6.500000,5.770000,6.000000,7.500000,1,1,0,-0.007087
2,2,1486.000000,50.000000,GENERAC HOLDINGS INC.,GNRC,2010-02-11,Industrials,3,16892087.0,2.179334e+06,13.000000,8.460629,8.625361,9.897091,1,1,1,-0.003156
3,3,568.000000,62.000000,"QUINSTREET, INC",QNST,2010-02-11,Technology,3,5372077.0,3.362175e+06,15.000000,15.000000,13.600000,16.049999,0,1,0,-0.003156
4,4,6.000000,61.000000,TERRENO REALTY CORP,TRNO,2010-02-10,Real Estate,2,4515261.0,7.616977e+05,18.750000,13.649583,13.722771,14.227767,1,1,1,-0.002686
5,5,109.000000,59.000000,"PIEDMONT OFFICE REALTY TRUST, INC.",PDM,2010-02-10,Real Estate,2,4343012.0,3.551568e+06,14.750000,7.987291,9.149545,10.081815,1,1,1,-0.002686
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3895,3736,59.000000,58.000000,"ZAIS Group Holdings, Inc.",ZAIS,2013-03-22,Finance,4,1511224.0,3.912183e+05,10.020000,10.020000,10.020000,10.120000,0,0,0,0.000234
3897,3739,7447.262841,54.787097,ProShares UltraPro Short NASDAQ Biotechnology,ZBIO,2015-06-22,Other,0,0.0,1.593089e+05,41.900002,41.900002,44.299999,38.419998,0,0,1,0.000964
3898,3743,3181.000000,41.000000,"Zillow Group, Inc.",ZG,2011-07-20,Miscellaneous,2,17549817.0,6.998521e+06,17.776737,11.153726,10.570627,10.954163,1,1,1,0.000717
3900,3746,7447.262841,54.787097,Credit Suisse AG,ZIV,2010-11-29,Finance,0,0.0,7.145535e+04,12.298750,12.298750,13.318750,13.741250,0,0,0,0.001159


In [None]:
#Writing data to disk
df_master.to_csv(path + 'master_data.csv', index=False)