In [4]:
import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics


In [77]:
def vectorize_and_replace(df, cols: list, prefix: str):
    """Vectorize the {cols} columns in {dataframe}\n
    and returns a {dataframe} copy with the vector after removing the {cols}\n
    new col name is {prefix}_vec """
    # df = dataframe.copy()

    lb = preprocessing.LabelBinarizer()
    vec = lb.fit_transform(df[cols]).tolist()

    df[f"{prefix}_vec"] = vec
    df = df.drop(cols, axis = 1)
    return df

In [89]:
def decode_price(price: str):
    '''
    format: $XXXX.XXXXA 
    where X is digit from the range [0,9]
    and A is an action multiplier where K means thousands and M means Millions. 
    '''

    if(price[0]!='$'):
        price = '$' + price

    k = 1000
    m = 1000000
    multiplier = 0
    try:
        symbol = price[-1].upper() # The upper method is used to reduce the need to check wether the symbol is 'k' or 'K'
    except:
        print(f"Error: The end of the string '{price}' does not contain 'K' or 'M'!")
        return None
    if(symbol=="K"):
        multiplier = k
    elif(symbol == "M"):
        multiplier = m
    elif symbol == "B" :
        multiplier = m * 100
    else:
        print(f"Error: The end of the string '{price}' does not contain 'K' or 'M'!")
        return None
    
    new_price= int(float(price[1:-1]) * multiplier)
    # print(f"Price before manipulation: {price}")
    # print(f"Price after manipulation: {new_price}")

    # TODO: after the function test we can remove the prints above.
    
    return new_price

def conv_to_float(df):
    for i, val in enumerate(df):
        if val != 0:
            df.iloc[i] = decode_price(val)
    return df

In [79]:
def conv_to_float(df):
    for i, val in enumerate(df):
        if val != 0:
            df.iloc[i] = decode_price(val)
    return df

In [90]:
df = pd.read_csv("df0_5000.csv").iloc[:,2:]
df

  exec(code_obj, self.user_global_ns, self.user_ns)


Unnamed: 0,company_name,company_about,founded,business model,employees,funding stage,raised,product stage,status,geographical markets,...,tag_application-optimization,tag_mainframe,tag_sourcing,tag_plastic-surgery,tag_dermatology,tag_card-payments,tag_paycheck,tag_remittances,targetmarket_unbanked,tag_it-architecture
0,Tastewise,Tastewise is an AI platform designed to help f...,7/2017,B2B,51-200,ROUND A,$21.5M,Released,active,"australia, canada, france, india, united kingd...",...,,,,,,,,,,
1,Wilk Technologies,Wilk is dedicated to revolutionizing the dairy...,6/2018,"B2B, B2B2C",11-50,Public,$4.69M,R&D,active,,...,,,,,,,,,,
2,Eco Pack Green Box,Eco Pack Green Box has developed and patented ...,3/2008,B2B,11-50,Revenue Financed,,Released,not_active,"canada, mexico, spain, united states",...,,,,,,,,,,
3,BeeHero,BeeHero has developed a platform that can pred...,10/2017,B2B,1-10,ROUND A,$24M,Released,active,"global, united states",...,,,,,,,,,,
4,Cham Foods,Cham Foods is a multinational company with man...,12/1970,"B2B, B2B2C",11-50,Public,,Released,active,"north america, europe, global, france, germany...",...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4983,Zaponet,"Zaponet is analytics solutions provider, offer...",2011,B2B,11-50,Bootstrapped,,Released,not_active,,...,,,,,,,,,,
4984,Storydoc,Storydoc can easily transform static PDFs and ...,1/2020,B2B,1-10,Bootstrapped,,Released,active,global,...,,,,,,,,,,
4985,PATX.io,PatX developed an AI system for the assessment...,8/2019,B2B,1-10,Bootstrapped,,Alpha,active,,...,,,,,,,,,,
4986,Peck,Peck's SaaS platform enables successful digita...,8/2019,B2B,1-10,Bootstrapped,,Beta,active,north america,...,,,,,,,,,,


In [91]:
df_test = df.copy()
df_test.shape

(4988, 2213)

In [92]:
tag_cols = [col for col in df_test.columns if col.startswith('tag_')]
targetmarket_cols = [col for col in df_test.columns if col.startswith('targetmarket_')]
sector_list = [col for col in df_test.columns if col.startswith("sector_")]
target_ind_list = [col  for col in df_test.columns if col.startswith("target_industry_")]
technology_list = [col  for col in df_test.columns if col.startswith("core_technology_")]


In [93]:
df_test[tag_cols] = df_test[tag_cols].fillna(0)
df_test[targetmarket_cols] = df_test[targetmarket_cols].fillna(0)
df_test[sector_list] = df_test[sector_list].fillna(0)
df_test[target_ind_list] = df_test[target_ind_list].fillna(0)
df_test[technology_list] = df_test[technology_list].fillna(0)


vectorize

In [94]:
df_test = vectorize_and_replace(df_test, tag_cols, "tag")
df_test = vectorize_and_replace(df_test, targetmarket_cols, "targetmarket")
df_test = vectorize_and_replace(df_test, sector_list, "sector")
df_test = vectorize_and_replace(df_test, target_ind_list, "target_ind")
df_test = vectorize_and_replace(df_test, technology_list, "tech")
df_test.shape

(4988, 23)

Replacing null values to 0

In [95]:
df_test['raised'] = df_test['raised'].fillna(0)
df_test['total_rounds'] = df_test['total_rounds'].fillna(0)
df_test['investors'] = df_test['investors'].fillna(0)
df_test['ipo_price'] = df_test['ipo_price'].fillna(0)

In [96]:
df_test.status.replace({'active' : 1, 'not_active' : 0 }, inplace=True)

Removing unused columns

In [97]:
df_test.drop(['use cases','academic spin-off','total_raised'], axis = 1, inplace = True)
df_test.shape

(4988, 20)

Decoding str to numeric value

In [98]:
df_test['ipo_price'] = conv_to_float(df_test["ipo_price"])
df_test['raised'] = conv_to_float(df_test["raised"])

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


In [21]:
df_test

Unnamed: 0,company_name,company_about,founded,business model,employees,funding stage,raised,product stage,status,geographical markets,products,fund_stage,total_rounds,investors,ipo_price,tag_vec,targetmarket_vec,sector_vec,target_ind_vec,tech_vec
0,Tastewise,Tastewise is an AI platform designed to help f...,7/2017,B2B,51-200,ROUND A,21500000,Released,1,"australia, canada, france, india, united kingd...",tastewise,A,3.0,3.0,0,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, ...","[1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1,Wilk Technologies,Wilk is dedicated to revolutionizing the dairy...,6/2018,"B2B, B2B2C",11-50,Public,4690000,R&D,1,,,Public,2.0,2.0,0,"[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, ...","[0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2,Eco Pack Green Box,Eco Pack Green Box has developed and patented ...,3/2008,B2B,11-50,Revenue Financed,0,Released,0,"canada, mexico, spain, united states",,,0.0,0.0,0,"[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
3,BeeHero,BeeHero has developed a platform that can pred...,10/2017,B2B,1-10,ROUND A,24000000,Released,1,"global, united states",,A,4.0,12.0,0,"[0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
4,Cham Foods,Cham Foods is a multinational company with man...,12/1970,"B2B, B2B2C",11-50,Public,0,Released,1,"north america, europe, global, france, germany...",,,0.0,0.0,0,"[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4983,Zaponet,"Zaponet is analytics solutions provider, offer...",2011,B2B,11-50,Bootstrapped,0,Released,0,,,,0.0,0.0,0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
4984,Storydoc,Storydoc can easily transform static PDFs and ...,1/2020,B2B,1-10,Bootstrapped,0,Released,1,global,,,0.0,0.0,0,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, ..."
4985,PATX.io,PatX developed an AI system for the assessment...,8/2019,B2B,1-10,Bootstrapped,0,Alpha,1,,,,0.0,0.0,0,"[0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, ..."
4986,Peck,Peck's SaaS platform enables successful digita...,8/2019,B2B,1-10,Bootstrapped,0,Beta,1,north america,,,0.0,0.0,0,"[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, ..."


Converting the numeric colums

In [99]:
df_test['raised'] = df_test['raised'].astype('float')
df_test['total_rounds'] = df_test['total_rounds'].astype('int')
df_test['investors'] = df_test['investors'].astype('int')
df_test['ipo_price'] = df_test['ipo_price'].astype('float')

In [103]:
df_test['business model'] = preprocessing.LabelEncoder().fit_transform(df_test['business model'])
df_test['business model']

0       0
1       1
2       0
3       0
4       1
       ..
4983    0
4984    0
4985    0
4986    0
4987    2
Name: business model, Length: 4988, dtype: int32

In [104]:
df_test.raised.mean()

6148873.9212109065

In [105]:
df_test.corr()

Unnamed: 0,business model,raised,status,total_rounds,investors,ipo_price
business model,1.0,-0.07476,-0.211418,-0.111628,-0.120556,-0.020432
raised,-0.07476,1.0,0.107865,0.484236,0.539615,0.273082
status,-0.211418,0.107865,1.0,0.147338,0.149546,0.037352
total_rounds,-0.111628,0.484236,0.147338,1.0,0.756064,0.12943
investors,-0.120556,0.539615,0.149546,0.756064,1.0,0.11845
ipo_price,-0.020432,0.273082,0.037352,0.12943,0.11845,1.0


In [106]:
df_test.loc[(df_test["status"]==1)&(df_test['raised']>4000000), 'suceeded'] = 1
df_test.loc[(df_test["status"]==0)|(df_test['raised']<=4000000), 'suceeded'] = 0

In [200]:
df_test.to_csv('cleaned.csv')

In [201]:
# df_test.loc["succeeded"] = df_test[(df_test['status'] == 1) & (df_test['raised'] > 5000000)]
df_test

Unnamed: 0,company_name,company_about,founded,business model,employees,funding stage,raised,product stage,status,geographical markets,...,fund_stage,total_rounds,investors,ipo_price,tag_vec,targetmarket_vec,sector_vec,target_ind_vec,tech_vec,suceeded
0,Tastewise,Tastewise is an AI platform designed to help f...,7/2017,B2B,51-200,ROUND A,21500000.0,Released,1,"australia, canada, france, india, united kingd...",...,A,3,3,0.0,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, ...","[1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1.0
1,Wilk Technologies,Wilk is dedicated to revolutionizing the dairy...,6/2018,"B2B, B2B2C",11-50,Public,4690000.0,R&D,1,,...,Public,2,2,0.0,"[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, ...","[0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1.0
2,Eco Pack Green Box,Eco Pack Green Box has developed and patented ...,3/2008,B2B,11-50,Revenue Financed,0.0,Released,0,"canada, mexico, spain, united states",...,,0,0,0.0,"[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.0
3,BeeHero,BeeHero has developed a platform that can pred...,10/2017,B2B,1-10,ROUND A,24000000.0,Released,1,"global, united states",...,A,4,12,0.0,"[0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1.0
4,Cham Foods,Cham Foods is a multinational company with man...,12/1970,"B2B, B2B2C",11-50,Public,0.0,Released,1,"north america, europe, global, france, germany...",...,,0,0,0.0,"[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4983,Zaponet,"Zaponet is analytics solutions provider, offer...",2011,B2B,11-50,Bootstrapped,0.0,Released,0,,...,,0,0,0.0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.0
4984,Storydoc,Storydoc can easily transform static PDFs and ...,1/2020,B2B,1-10,Bootstrapped,0.0,Released,1,global,...,,0,0,0.0,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, ...",0.0
4985,PATX.io,PatX developed an AI system for the assessment...,8/2019,B2B,1-10,Bootstrapped,0.0,Alpha,1,,...,,0,0,0.0,"[0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, ...",0.0
4986,Peck,Peck's SaaS platform enables successful digita...,8/2019,B2B,1-10,Bootstrapped,0.0,Beta,1,north america,...,,0,0,0.0,"[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, ...",0.0


In [107]:

xtrain, xtest, ytrain, ytest = train_test_split(df_test.iloc[:,:-1].select_dtypes(include=np.number), df_test.iloc[:,-1])


In [108]:
df_test.iloc[2930,:]

company_name                                                    SolarEdge
company_about           SolarEdge Technologies invented an inverter so...
founded                                                            8/2006
business model                                                          2
employees                                                            500+
funding stage                                                      Public
raised                                                        124700000.0
product stage                                                    Released
status                                                                  1
geographical markets    europe, asia, africa, global, australia, cambo...
products                                                              NaN
fund_stage                                                         Public
total_rounds                                                            6
investors                             

In [75]:
# TODO maybe a function for each vec col?
#      or maybe not vectorize !?

lst_vec = df_test.iloc[:,-6].tolist()
df_vec = pd.DataFrame(lst_vec)
df_vec.shape
xtrain, xtest, ytrain, ytest = train_test_split(df_vec, df_test.iloc[:,-1])

In [109]:
lr = LogisticRegression()
lr.fit(xtrain,ytrain)

LogisticRegression()

In [110]:
ytrain_pred = lr.predict(xtrain)
ytest_pred = lr.predict(xtest)

In [67]:
metrics.confusion_matrix(ytrain_pred, ytrain)

array([[3336,  208],
       [   7,  190]], dtype=int64)

In [111]:
print("Train results:")
print("accuracy is:",metrics.accuracy_score(ytrain_pred, ytrain))
print("precision is:",metrics.precision_score(ytrain_pred, ytrain))
print("recall is:",metrics.recall_score(ytrain_pred, ytrain))
print("f1 is:",metrics.f1_score(ytrain_pred, ytrain))
print("------------")
print("Test results:")
print("accuracy is:",metrics.accuracy_score(ytest_pred, ytest))
print("precision is:",metrics.precision_score(ytest_pred, ytest))
print("recall is:",metrics.recall_score(ytest_pred, ytest))
print("f1 is:",metrics.f1_score(ytest_pred, ytest))

Train results:
accuracy is: 0.7866880513231757
precision is: 0.9975490196078431
recall is: 0.3380398671096346
f1 is: 0.5049627791563276
------------
Test results:
accuracy is: 0.7826784282277466
precision is: 1.0
recall is: 0.3033419023136247
f1 is: 0.4654832347140039


<strong>This is the results with vectorize</strong>