In [7]:
import pandas as pd
import numpy as np

In [8]:
# read csv
data = pd.read_csv('../data/combined_bloomberg_mostly_raw.csv')
data = data.dropna()
data = data.drop(columns=['Unnamed: 0'])

In [9]:
# clean numeric values
data['Issuer Ticker'] = data['Issuer Ticker'].str.split(' ').str[0]
data = data.set_index('Issuer Ticker')
data['Underpriced'] = (data['Offer To 1st Close'] > 0).astype(int)

In [10]:
# collect all the sectors, industry groups, industries, and sub-industries
sectors = data["Industry Sector"].unique()
industry_groups = []
industries = data["Industry Group"].unique()
sub_industries = data["Industry Subgroup"].unique()

print("no sectors:", len(sectors))
print("no industy_groups:", len(industry_groups))
print("no industries:", len(industries))
print("no sub_industries:", len(sub_industries))

no sectors: 10
no industy_groups: 0
no industries: 64
no sub_industries: 265


In [11]:
# replace all fields with a numeric value
data["Industry Sector"] = data["Industry Sector"].apply(lambda sector: np.where(sectors == sector)[0][0]).astype('float')
data["Industry Group"] = data["Industry Group"].apply(lambda industry: np.where(industries == industry)[0][0]).astype('float')
data["Industry Subgroup"] = data["Industry Subgroup"].apply(lambda sub_industry: np.where(sub_industries == sub_industry)[0][0]).astype('float')
data.dtypes

Issuer Name                     object
Sales - 1 Yr Growth            float64
Profit Margin                  float64
Return on Assets               float64
Offer Size (M)                 float64
Shares Outstanding (M)         float64
Offer Price                    float64
Offer To 1st Close             float64
Market Cap at Offer (M)        float64
Trade Date (US)                 object
cusip                           object
Cash Flow per Share            float64
Offer Size (M).1               float64
Shares Outstanding (M).1       float64
Instit Owner (% Shares Out)    float64
Instit Owner (Shares Held)     float64
Filing Term Price Range         object
Priced Range                    object
Industry Sector                float64
Industry Group                 float64
Industry Subgroup              float64
Underpriced                      int64
dtype: object

In [12]:
# add values to a csv
data.to_csv('../data/clean_bloomberg_with_sectors.csv')