In [2]:
import numpy as np
import pandas as pd

In [3]:
data = pd.read_csv('Data/merged_data.csv')

  exec(code_obj, self.user_global_ns, self.user_ns)


In [4]:
def compute_features(df_raw):
    df = df_raw.copy()
    
    # Computing quantities
    df['Net Assets'] = df['at'] - df['lct']
    
    # Change these 3
    df['Investment'] = np.where(df['scf'].isin([1,2,3]),
                                df['capx'] + df['ivch'] + df['aqc'] + df['fuseo'] - df['sppe'] - df['siv'],
                                np.where(df['scf'] == 7,df['capx'] + df['ivch'] + df['aqc'] - df['sppe'] - df['siv'] - df['ivstch'] - df['ivaco'],np.nan)
                                )
    df['Change in Working Capital'] = np.where(df['scf'] == 1,
                                               df['wcapc'] + df['chech'] + df['dlcch'],
                                               np.where(df['scf'].isin([2,3]),
                                                        - df['wcapc'] + df['chech'] + df['dlcch'],
                                                        np.where(df['scf'] == 7,
                                                                 - df['recch'] - df['invch'] - df['apalch'] - df['txach'] - df['aoloch'] + df['chech'] - df['fiao'] - df['dlcch'],
                                                                 np.nan)))
    
    df['Internal Cash Flow'] = np.where(df['scf'].isin([1,2,3]),
                                        df['ibc'] + df['xidoc'] + df['dpc'] + df['txdc'] + df['esubc'] + df['sppiv'] + df['fopo'] + df['fsrco'],
                                        np.where(df['scf'] == 7,
                                                 df['ibc'] + df['xidoc'] + df['dpc'] + df['txdc'] + df['esubc'] + df['sppiv'] + df['fopo'] + df['exre'],
                                                 np.nan))
    
    
    df['Financing Deficit'] = df['dv'] + df['Investment'] - df['Change in Working Capital'] - df['Internal Cash Flow']
    df['Net Debt Issued'] = df['dltis'] - df['dltr']
    df['Net Equity Issued'] = df['sstk'] - df['prstkc']
    df['Net External Financing'] = df['Net Debt Issued'] + df['Net Equity Issued']
    df['Total Debt'] = df['dlc'] + df['dltt']
    df['Book Capitalization'] = df['Total Debt'] + df['teq']
    
    df['Current maturity of long-term debt/net assets'] = df['dlc'] / df['Net Assets']
    df['Long Term Debt / Total Assets'] = df['dltt'] / df['at']
    df['Book Leverage'] = (df['dltt'] + df['dlc'])/(df['dltt'] + df['dlc'] + df['seq'])
    df['Cash Dividend / Net Assets'] = df['dv'] / df['Net Assets']
    df['Investment / df[Net Assets'] = df['Investment'] / df['Net Assets']
    df['Change in Working capital / df[Net Assets'] = df['Change in Working Capital'] / df['Net Assets']
    df['Internal Cash Flow / Net Assets'] = df['Internal Cash Flow'] / df['Net Assets']
    df['Financing Deficit / Net Assets'] = df['Financing Deficit'] / df['Net Assets']
    df['Gross LT Debt Issued / Net Assets'] = df['dltis'] / df['Net Assets']
    df['Net Debt Issued / Net Assets'] = df['Net Debt Issued'] / df['Net Assets']
    df['Net Equity Issued / Net Assets'] = df['Net Equity Issued'] / df['Net Assets']
    df['Net External Financing / Net Assets'] = df['Net External Financing'] / df['Net Assets']
    df['Total Debt / Book Capitalization'] = df['Total Debt'] / df['Book Capitalization']
    df['Tangibility'] = df['ppent'] / df['at']
    df['Market Value of Assets / df[Book Value of Assets'] = df['mkvalt'] / df['at']
    df['Log Sales'] = df['sale']
    df['Profitability'] = df['ni'] / df['at']
    df['Current Ratio'] = df['act'] - df['lct']
    df['Quick Ratio'] = (df['che'] + df['rect']) / df['lct']
    df['Debt to Equity'] = df['lt'] / df['teq']
    df['Interest Burden'] = 1 - (df['xint'] / df['oiadp'])
    df['Interest Coverage'] = df['oiadp'] / df['xint']
    df['Leverage'] = df['at'] / df['teq']
    df['Return on Sales (Profit Margin)'] =  df['oiadp'] / df['sale']
    df['Working Capital / Total Assets'] = ( df['act'] - df['lct']) / df['at']
    df['Retained Earnings / Total Assets'] =  df['re'] / df['at']
    df['Earning Before Interest & Tax / Total Assets'] =  df['oiadp'] / df['at']
    df['Market Value of Equity / Total Liabilities'] = ( df['prcc_f']* df['csho']) / df['lt']
    df['Sales / Total Assets'] =  df['sale']/ df['at']
    
    return(df)

data_w = compute_features(data)

In [5]:
def select_features(df):    
    
    # Selecting the feature matrix
    X = df[[]].to_numpy()
    y = df['Y'].to_numpy()
    
    # Standardising the input features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    
    # Returning X and y values
    return(X_scaled,y)

In [6]:
compute_features(data)

Unnamed: 0,YEAR,CUSIP,PERMNO,RET,E,sigmae,datadate,fyear,indfmt,consol,...,Debt to Equity,Interest Burden,Interest Coverage,Leverage,Return on Sales (Profit Margin),Working Capital / Total Assets,Retained Earnings / Total Assets,Earning Before Interest & Tax / Total Assets,Market Value of Equity / Total Liabilities,Sales / Total Assets
0,1970,000032,25881,,,,19701231,1970,INDL,C,...,,0.789291,4.745882,,0.088982,0.052496,0.166039,0.120598,1.067842,1.355306
1,1971,000032,25881,0.919538,28873125.0,0.745560,19711231,1971,INDL,C,...,,-0.003594,0.996419,,0.023664,0.381214,0.105012,0.037947,0.822095,1.603580
2,1972,000032,25881,0.574997,24226875.0,0.677104,19721231,1972,INDL,C,...,,0.730492,3.710459,,0.084657,0.368514,0.144723,0.146130,1.154179,1.726126
3,1973,000032,25881,0.891302,14602500.0,0.590349,19731231,1973,INDL,C,...,,0.784864,4.648227,,0.086808,0.382481,0.217905,0.150521,0.376401,1.733958
4,1974,000032,25881,0.341465,14602500.0,0.833422,19741231,1974,INDL,C,...,,0.766171,4.276622,,0.069429,0.436188,0.267533,0.136282,0.297039,1.962907
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
803737,2012,U72603,13705,,,,20121231,2012,INDL,C,...,0.527652,1.036364,-27.500000,1.527652,-0.035339,0.271935,-2.937589,-0.056371,19.789457,1.595154
803738,2013,U72603,13705,0.892308,204274200.0,1.175481,20131231,2013,INDL,C,...,0.530647,1.256705,-3.895522,1.530647,-0.005605,0.276858,-2.690139,-0.008588,14.521912,1.532230
803739,2014,U72603,13705,0.798850,176538110.0,0.572168,20141231,2014,INDL,C,...,0.406233,0.987282,78.630769,1.406233,0.107898,0.408152,-2.325455,0.150629,11.708859,1.396039
803740,2015,U72603,13705,0.748202,156855450.0,0.642701,20151231,2015,INDL,C,...,0.456190,0.990531,105.603448,1.456190,0.115971,0.351787,-1.547577,0.137619,11.503070,1.186667
