In [1]:
from sklearn.metrics import roc_auc_score

In [2]:
import pickle

In [3]:
import pandas as pd

In [4]:
from sklearn.model_selection import train_test_split

In [5]:
data = pd.read_csv('data_sample_500.csv')
data

Unnamed: 0,terms,definitions,source,assigned_readability,flesch_reading_ease,flesch_kincaid_grade,smog_index,coleman_liau_index,automated_readability_index,dale_chall_readability_score,linsear_write_formula,gunning_fog
0,NEGATIVE CARRY,Any transaction or TRADE where the RETURNS gen...,palgrave,0,56.25,9.1,0.0,9.21,8.8,9.64,8.000000,10.00
1,Consumer Discretionary,Consumer discretionary is a term for classifyi...,investopedia,1,24.27,15.2,0.0,20.07,20.6,12.41,16.250000,18.83
2,OBJECTIVE RISK,A measure of the deviation between an INSURER’...,palgrave,0,29.35,13.3,0.0,17.75,16.2,11.63,13.250000,18.72
3,Patents,In 1899 the commissioner of the American Offic...,economist,1,54.05,12.1,14.4,11.20,14.5,8.85,16.250000,14.32
4,MASTER FUND,A FUND that invests CAPITAL gathered from INVE...,palgrave,0,75.20,6.0,0.0,10.60,8.8,10.96,6.000000,5.20
...,...,...,...,...,...,...,...,...,...,...,...,...
495,FRIENDLY TAKEOVER,An ACQUISITION that is agreed on amicable term...,palgrave,0,48.64,12.1,0.0,10.86,12.8,9.31,14.750000,13.44
496,Continuous Variable,A quantitative variable that can take any nume...,ncert_kest,1,36.96,10.3,0.0,12.50,9.3,9.35,5.500000,12.49
497,revolving credit,legally assured line of credit with a bank.,prin,0,80.28,4.1,0.0,5.76,3.8,9.95,3.000000,3.20
498,Trust Fund,A trust fund is an estate planning tool that e...,investopedia,1,52.19,10.7,13.0,9.52,10.5,9.77,12.500000,12.34


In [7]:
data['assigned_readability'].value_counts(normalize = True)

1    0.58
0    0.42
Name: assigned_readability, dtype: float64

In [8]:
data['assigned_readability'].value_counts()

1    290
0    210
Name: assigned_readability, dtype: int64

In [9]:
data[['source', 'assigned_readability']].value_counts().reset_index()

Unnamed: 0,source,assigned_readability,0
0,investopedia,1,227
1,palgrave,0,155
2,zvi,0,18
3,economist,1,18
4,6_8_louis,1,17
5,fmi,0,16
6,opod,0,15
7,sam,1,11
8,prin,0,6
9,9_12_louis,1,6


In [10]:
data.columns

Index(['terms', 'definitions', 'source', 'assigned_readability',
       'flesch_reading_ease', 'flesch_kincaid_grade', 'smog_index',
       'coleman_liau_index', 'automated_readability_index',
       'dale_chall_readability_score', 'linsear_write_formula', 'gunning_fog'],
      dtype='object')

In [11]:
numeric_columns = [
       'flesch_reading_ease', 'flesch_kincaid_grade', 'smog_index',
       'coleman_liau_index', 'automated_readability_index',
       'dale_chall_readability_score',
       'linsear_write_formula', 'gunning_fog']

In [13]:
features = ['definitions']
Y = data['assigned_readability']

In [14]:
import numpy as np
np.random.seed(0)
msk = np.random.rand(len(data)) < 0.8
df = data[msk]
df = df.reset_index().copy()
val_df = data[~msk]
val_df = val_df.reset_index().copy()

In [15]:
for col in numeric_columns:
  auroc = roc_auc_score(val_df['assigned_readability'].values, val_df[col].values)
  print(col,auroc,"\n")

flesch_reading_ease 0.49304650152107776 

flesch_kincaid_grade 0.5895262929161234 

smog_index 0.6134289439374185 

coleman_liau_index 0.5780095610604085 

automated_readability_index 0.646458061712299 

dale_chall_readability_score 0.4248152976966536 

linsear_write_formula 0.6573229030856149 

gunning_fog 0.5332464146023469 



# Feature extraction: TF-IDF

In [16]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression

In [17]:
tfidf_model_original = TfidfVectorizer(
    ngram_range=(1, 4), min_df=0.0005, stop_words="english"
)

tfidf_model_original.fit(df["definitions"])

TfidfVectorizer(min_df=0.0005, ngram_range=(1, 4), stop_words='english')

In [18]:
tfidf_df_train_original = pd.DataFrame(
    tfidf_model_original.transform(df["definitions"]).todense()
)
tfidf_df_train_original.columns = sorted(tfidf_model_original.vocabulary_)

# validation
tfidf_df_valid_original = pd.DataFrame(
    tfidf_model_original.transform(val_df["definitions"]).todense()
)
tfidf_df_valid_original.columns = sorted(tfidf_model_original.vocabulary_)


In [19]:
tfidf_df_train_original

Unnamed: 0,000,000 dollars,000 dollars assessed,000 dollars assessed value,000 mutual,000 mutual funds,000 mutual funds exchange,000 small,000 small denominations,000 small denominations enhance,10,10 20,10 20 days,10 countries,10 countries deposited,10 countries deposited funds,10 gab,10 gab 10,10 gab 10 countries,100,100 accounts,100 accounts receivable,100 accounts receivable ar,100 basis,100 basis points,100 basis points target,100 bounded,100 bounded range,100 bounded range values,100 years,100 years owe,100 years owe existence,1092,1092 release,1092 release securities,1092 release securities exchange,120,120 days,120 days date,120 days date veterans,...,yield maturity measures,yield maturity measures rate,yield modification,yield modification takes,yield modification takes account,yield quoted,yields,yields covariances,yields covariances assets,yields covariances assets alsocorrelation,york,york city,york city largest,york city largest equities,york stock,york stock exchange,york stock exchange nyse,zero,zero debt,zero debt alternative,zero debt alternative levered,zero minus,zero minus tick,zero zero,zero zero minus,zero zero minus tick,σ2,σ2 r1,σ2 r1 variance,σ2 r1 variance return,σ2 r2,σ2 r2 variance,σ2 r2 variance return,σ2 variance,σ2 variance value,σ2 variance value d2,σ22σ,σ22σ ts,σ22σ ts stock,σ22σ ts stock price
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.042214,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.049391,0.049391,0.049391,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
397,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
398,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
399,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
400,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [20]:
tfidf_df_valid_original

Unnamed: 0,000,000 dollars,000 dollars assessed,000 dollars assessed value,000 mutual,000 mutual funds,000 mutual funds exchange,000 small,000 small denominations,000 small denominations enhance,10,10 20,10 20 days,10 countries,10 countries deposited,10 countries deposited funds,10 gab,10 gab 10,10 gab 10 countries,100,100 accounts,100 accounts receivable,100 accounts receivable ar,100 basis,100 basis points,100 basis points target,100 bounded,100 bounded range,100 bounded range values,100 years,100 years owe,100 years owe existence,1092,1092 release,1092 release securities,1092 release securities exchange,120,120 days,120 days date,120 days date veterans,...,yield maturity measures,yield maturity measures rate,yield modification,yield modification takes,yield modification takes account,yield quoted,yields,yields covariances,yields covariances assets,yields covariances assets alsocorrelation,york,york city,york city largest,york city largest equities,york stock,york stock exchange,york stock exchange nyse,zero,zero debt,zero debt alternative,zero debt alternative levered,zero minus,zero minus tick,zero zero,zero zero minus,zero zero minus tick,σ2,σ2 r1,σ2 r1 variance,σ2 r1 variance return,σ2 r2,σ2 r2 variance,σ2 r2 variance return,σ2 variance,σ2 variance value,σ2 variance value d2,σ22σ,σ22σ ts,σ22σ ts stock,σ22σ ts stock price
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.128957,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
93,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
94,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
95,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
96,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# Logistic Regression Model

In [21]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score

In [22]:
train_X = tfidf_df_train_original
train_y = df['assigned_readability']
valid_X = tfidf_df_valid_original
valid_y = val_df['assigned_readability']


In [23]:
def model(clf, train_X, train_y, valid_X, valid_y):
    clf.fit(train_X, train_y)
    pred_tr = clf.predict(train_X)
    pred_valid = clf.predict(valid_X)
    pred_tr_prob = clf.predict_proba(train_X)[:,1]
    pred_valid_prob = clf.predict_proba(valid_X)[:,1]
    print("\nTraining F1:{}".format(f1_score(train_y, pred_tr, average="weighted")))
    print("Training Confusion Matrix \n{}".format(confusion_matrix(train_y, pred_tr)))
    print("Classification Report Train: \n{}".format(classification_report(train_y, pred_tr)))
    print("AUC Train", roc_auc_score(train_y, pred_tr_prob))

    print(
        "\nValidation F1:{}".format(f1_score(valid_y, pred_valid, average="weighted"))
    )
    print(
        "Validation Confusion Matrix \n{}".format(confusion_matrix(valid_y, pred_valid))
    )
    print(
        "Classification Report: \n{}".format(classification_report(valid_y, pred_valid))
    )
    print("AUC Valid", roc_auc_score(valid_y, pred_valid_prob))

In [24]:
lr_clf = LogisticRegression(solver="lbfgs", n_jobs=-1)
model(lr_clf, train_X, train_y, valid_X, valid_y)
params = lr_clf.get_params()
print(params)


Training F1:1.0
Training Confusion Matrix 
[[171   0]
 [  0 231]]
Classification Report Train: 
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       171
           1       1.00      1.00      1.00       231

    accuracy                           1.00       402
   macro avg       1.00      1.00      1.00       402
weighted avg       1.00      1.00      1.00       402

AUC Train 1.0

Validation F1:0.5383579788504549
Validation Confusion Matrix 
[[ 4 35]
 [ 0 59]]
Classification Report: 
              precision    recall  f1-score   support

           0       1.00      0.10      0.19        39
           1       0.63      1.00      0.77        59

    accuracy                           0.64        98
   macro avg       0.81      0.55      0.48        98
weighted avg       0.78      0.64      0.54        98

AUC Valid 0.7548891786179922
{'C': 1.0, 'class_weight': None, 'dual': False, 'fit_intercept': True, 'intercept_scaling': 1, 'l1_r