In [9]:
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn import metrics
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [10]:
df = pd.read_csv('preprocessed_data.csv')

In [11]:
df

Unnamed: 0,Date,Symbol,apturnover,arturnover,assetturnover,croic,commontocap,currentratio,ebitdagrowth,fcffgrowth,...,roe_simple,stdebttocap,enterprisevalue,country,employees,security_delisted,sic,ipo_price,current_price,success
0,2020-12-30,RSI,,16.1314,1.6674,,0.231097,0.7344,-4.970785,,...,2.130902,0.000000,6.634374e+08,United States of America,0,False,7990,21.130000,9.00,
1,2020-12-23,PHAR,,5.4113,0.5199,-0.325334,0.340220,5.5220,-0.037194,-0.967302,...,0.227771,0.000000,9.838328e+09,Netherlands,258,False,2834,19.190000,8.79,
2,2020-12-22,XL,7.1506,3.4712,0.1142,,0.998900,38.0183,-3.426013,,...,-0.320843,0.000582,1.677155e+09,United States of America,0,False,3714,18.890000,2.07,
3,2020-12-11,VVOS,1.5772,11.3512,0.7948,,0.929146,2.6070,-0.145210,,...,-0.712693,0.047616,9.071802e+07,United States of America,97,False,3841,7.070000,2.81,
4,2020-12-10,DM,3.5377,2.9840,0.0395,,0.981003,20.0856,0.737047,,...,-0.065930,0.018997,3.619117e+09,United States of America,0,False,3577,19.730000,3.80,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
901,2012-06-26,EQM,0.5954,4.9638,0.3077,-0.479825,0.759572,2.1685,1.185208,0.188767,...,0.184397,0.000000,1.931194e+09,United States of America,0,True,4922,14.514677,0.00,
902,2012-05-11,IRG,19.3188,65.8114,2.4524,0.019854,0.702414,0.6540,0.170757,1.025517,...,0.082040,0.000000,3.413480e+08,Spain,461,True,5812,16.850000,0.00,
903,2012-05-10,ADNC,7.1319,13.4547,1.3039,0.966075,1.000000,6.4160,0.737988,2.599090,...,0.106876,0.000000,3.534993e+07,United States of America,319,True,3674,19.000000,0.00,
904,2012-04-27,EDG,2.6535,7.0184,0.3407,-0.021939,0.703960,1.6537,-0.208686,-1.124927,...,0.110320,0.004761,8.156529e+09,South Africa,8964,True,1040,10.350000,0.00,


In [12]:
def success_req(df, col1, col2, target_col):
    
    """
    This function checks and modifies requirements for 
    success/failure of target value

    Parameters:

    df - df to be modified
    col1 - First column to check requirement
    col2 - First column to check requirement
    target_col - Target Columm

    """
        
    #STOCK SUCCESS REQUIREMENTS - if delisted then stock success is False
    df.loc[(df[col1] == 0), target_col] = False
    #current price is equal or lower than 50% of the ipo price it is False
    df.loc[(df[col1] >= df[col2]/2), target_col] = True
    #check logic
    df.loc[(df[col1] < df[col2]/2), target_col] = False
    
    return df

In [13]:
#success_req(df, "current_price", "ipo_price", "success")
for index, row in df.iterrows():
    if row['current_price'] <= row['ipo_price']*0.5 or row['security_delisted'] is True:
        df.loc[index,'success'] = False
    else:
        df.loc[index,'success'] = True

In [14]:
df['enterprisevalue']

0      6.634374e+08
1      9.838328e+09
2      1.677155e+09
3      9.071802e+07
4      3.619117e+09
           ...     
901    1.931194e+09
902    3.413480e+08
903    3.534993e+07
904    8.156529e+09
905    4.822189e+08
Name: enterprisevalue, Length: 906, dtype: float64

In [15]:
label = df[['Symbol','country','Date','sic','security_delisted']]

In [16]:
#df = df.drop(['Symbol','country','Date', 'sic','security_delisted'], axis=1)
df = df.drop('apturnover', 1)
df = df.drop('arturnover', 1)
df = df.drop('fcfftointerestex', 1)
df = df.drop('fcffgrowth', 1)
df = df.drop('croic', 1)
df = df.drop('grossmargin', 1)
df = df.drop('operatingmargin', 1)

In [17]:
df = df.fillna(df.mean())

In [18]:
df.isna().sum()

Date                       0
Symbol                     0
assetturnover              0
commontocap                0
currentratio               0
ebitdagrowth               0
investedcapitalturnover    0
leverageratio              0
nnep                       0
profitmargin               0
roe_simple                 0
stdebttocap                0
enterprisevalue            0
country                    0
employees                  0
security_delisted          0
sic                        0
ipo_price                  0
current_price              0
success                    0
dtype: int64

In [19]:
X = df.drop(['success'], axis=1)
y = df['success']

In [20]:
le = preprocessing.LabelEncoder()
le.fit(df['country'])
df['country_label'] = le.transform(df['country'])

In [21]:
df.columns

Index(['Date', 'Symbol', 'assetturnover', 'commontocap', 'currentratio',
       'ebitdagrowth', 'investedcapitalturnover', 'leverageratio', 'nnep',
       'profitmargin', 'roe_simple', 'stdebttocap', 'enterprisevalue',
       'country', 'employees', 'security_delisted', 'sic', 'ipo_price',
       'current_price', 'success', 'country_label'],
      dtype='object')

In [22]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(df[['assetturnover', 'commontocap', 'currentratio', 'ebitdagrowth', 'investedcapitalturnover', 'leverageratio', 'nnep', 'profitmargin', 'roe_simple', 'stdebttocap', 'enterprisevalue', 'country_label', 'employees']], df.success, test_size=0.2,random_state=109)

In [23]:
'''
from sklearn.svm import SVC
svclassifier = SVC(kernel='poly', degree=8)
svclassifier.fit(X_train, y_train)
'''

"\nfrom sklearn.svm import SVC\nsvclassifier = SVC(kernel='poly', degree=8)\nsvclassifier.fit(X_train, y_train)\n"

In [24]:
#Import svm model
from sklearn import svm

#Create a svm Classifier
clf = svm.SVC(kernel='rbf') # Linear Kernel

#Train the model using the training sets
clf.fit(X_train, y_train)

#Predict the response for test dataset
y_pred = clf.predict(X_test)

In [25]:
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
print("Recall:",metrics.recall_score(y_test, y_pred))
print("Precision:",metrics.precision_score(y_test, y_pred))
print("F1:",metrics.f1_score(y_test, y_pred))

Accuracy: 0.6208791208791209
Recall: 0.2602739726027397
Precision: 0.5588235294117647
F1: 0.3551401869158879
