In [24]:
pip install scikit-optimize

Collecting scikit-optimize
  Downloading scikit_optimize-0.10.2-py2.py3-none-any.whl.metadata (9.7 kB)
Collecting pyaml>=16.9 (from scikit-optimize)
  Downloading pyaml-24.7.0-py3-none-any.whl.metadata (11 kB)
Downloading scikit_optimize-0.10.2-py2.py3-none-any.whl (107 kB)
[2K   [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m107.8/107.8 kB[0m [31m860.3 kB/s[0m eta [36m0:00:00[0mB/s[0m eta [36m0:00:01[0m0m
[?25hDownloading pyaml-24.7.0-py3-none-any.whl (24 kB)
Installing collected packages: pyaml, scikit-optimize
Successfully installed pyaml-24.7.0 scikit-optimize-0.10.2
Note: you may need to restart the kernel to use updated packages.


In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from tqdm import tqdm

In [2]:
data = pd.read_csv('updated_data.csv')
data.head()

Unnamed: 0,tweet_text,retweet_count,favorite_count,followers_count,friends_count,verified,label,time_delay (min),neg_sent,pos_sent,neu_sent,comp_sent,difference,weight,ratio
0,"Reports of ""moving body"" amidst #Germanwings w...",38,15,337960,6384,1,1,1.2833,0.0,0.0,1.0,0.0,0.0,0.0,1.0
1,BREAKING:148passengers were on board #GermanWi...,43,15,52815,293,0,1,1.5,0.0,0.14,0.86,0.3818,1.0,1.0,10000000000.0
2,BREAKING: #Germanwings crash victims include 7...,31,5,893549,2312,1,1,4.1333,0.278,0.0,0.722,-0.6124,-0.2,-0.2,0.6666667
3,BREAKING: 148 feared dead in crashed #Germanwi...,167,32,418641,1859,1,1,3.5167,0.517,0.0,0.483,-0.8176,0.0,0.0,1.0
4,Terrible news... Airbus A320 from Barcelona to...,26,1,11062,233,0,1,3.3667,0.171,0.0,0.829,-0.4767,0.0,0.0,1.0


In [3]:
text_data = data['tweet_text'].values
numerical_data = data[['retweet_count', 'followers_count', 'verified',
                           'time_delay (min)', 'neg_sent', 'pos_sent', 
                       'neu_sent', 'comp_sent', 'difference', 'ratio']].values
y = data['label']

In [89]:
# we added stratifying sampling
X_train_text, X_test_text, X_train_num, X_test_num, y_train, y_test = train_test_split(text_data, numerical_data, y, 
                                                                                       test_size=0.2, 
                                                                                       random_state=42, stratify=y)

In [90]:
# Get the shape of all the datasets
print(f'Train text: {len(X_train_text)}'), print(f'Train number: {len(X_train_num)}')
print(f'Test text: {len(X_test_text)}'), print(f'Test number:{len(X_test_num)}')
print(f'Y Training: {len(y_train)}'), print(f'Y Testing:{len(y_test)}')

Train text: 4641
Train number: 4641
Test text: 1161
Test number:1161
Y Training: 4641
Y Testing:1161


(None, None)

## Preprocess Data

## Text features

In [6]:
import tensorflow_hub as hub
import tensorflow as tf

2024-07-26 13:53:07.602010: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-07-26 13:53:07.649756: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-26 13:53:07.699180: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-26 13:53:07.712269: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-07-26 13:53:07.753125: I tensorflow/core/platform/cpu_feature_guar

In [9]:
import os
os.environ["CUDA_VISIBLE_DIVICES"]='-1'

In [10]:
elmo = hub.load("https://tfhub.dev/google/elmo/3")

In [11]:
def elmo_embeddings(text_list):
    embeddings = elmo.signatures['default'](tf.constant(text_list))['elmo']
    return embeddings

In [12]:
def process_in_batches(texts, batch_size=32):
    elmo_embed_np = []
    total_batches = (len(texts) + batch_size - 1) // batch_size  # Calculate total number of batches
    with tqdm(total=total_batches, desc='Processing Batches') as pbar:
        for i in range(0, len(texts), batch_size):
            batch_texts = texts[i:i + batch_size]
            batch_embeddings = elmo_embeddings(batch_texts)
            # Average the embeddings for each text
            batch_embeddings_np = [np.mean(embed.numpy(), axis=0) for embed in batch_embeddings]
            for embed in batch_embeddings_np:
                if embed.shape != (1024,):
                    print(f"Unexpected shape found: {embed.shape}")
            elmo_embed_np.extend(batch_embeddings_np)
            pbar.update(1)  # Update progress bar
    return np.array(elmo_embed_np)

In [91]:
train_elmo_embed_np = process_in_batches(X_train_text, batch_size=16)  # Adjust batch size as needed
test_elmo_embed_np = process_in_batches(X_test_text, batch_size=16)  # Adjust batch size as needed
test_elmo_embed_np.shape

Processing Batches: 100%|█████████████████████| 291/291 [13:51<00:00,  2.86s/it]
Processing Batches: 100%|███████████████████████| 73/73 [03:27<00:00,  2.85s/it]


(1161, 1024)

## Numerical features

In [14]:
from sklearn.preprocessing import MinMaxScaler

In [15]:
#using custom range
class CustomMinMaxScaler:
    def __init__(self, feature_range=(-2, 2)):
        self.feature_range = feature_range
        self.scaler = MinMaxScaler(feature_range=(0, 1))
    
    def fit(self, X):
        # Fit the scaler to the data
        self.scaler.fit(X)
        return self
    
    def transform(self, X):
        # Transform the data to [0, 1] range
        X_normalized = self.scaler.transform(X)
        # Scale to the desired range [-2, 2]
        a, b = self.feature_range
        X_scaled = a + (X_normalized * (b - a))
        return X_scaled
    
    def fit_transform(self, X):
        # Fit and transform the data
        return self.fit(X).transform(X)

In [92]:
num_scaler = CustomMinMaxScaler(feature_range=(-2, 2))

train_norm_num_features = num_scaler.fit_transform(X_train_num)
test_norm_num_features = num_scaler.fit_transform(X_test_num)
print(f'Normalized test shape: {test_norm_num_features.shape}')

Normalized test shape: (1161, 10)


In [93]:
print(f'Minimum value of training: {train_norm_num_features.min()}')
print(f'Maximum value of training: {test_norm_num_features.max()}')

Minimum value of training: -2.0
Maximum value of training: 2.0


## Concatenate Features 

In [94]:
X_train = np.hstack((train_elmo_embed_np, train_norm_num_features))
X_test = np.hstack((test_elmo_embed_np, test_norm_num_features))
X_test.shape

(1161, 1034)

In [95]:
X_train.shape

(4641, 1034)

In [123]:
from sklearn.model_selection import cross_validate
scv_model = SVC(random_state=42)
scoring = ['accuracy', 'recall',  'precision','f1_macro', 'f1_weighted' ]
scores = cross_validate(scv_model, X_train, y_train, scoring=scoring, cv=10)
svc_cv_scores = pd.DataFrame(scores)
display(svc_cv_scores)

Unnamed: 0,fit_time,score_time,test_accuracy,test_recall,test_precision,test_f1_macro,test_f1_weighted
0,2.937169,0.369142,0.823656,0.670886,0.779412,0.796079,0.820108
1,2.918458,0.379907,0.853448,0.738854,0.811189,0.832527,0.851662
2,2.929619,0.370713,0.853448,0.738854,0.811189,0.832527,0.851662
3,2.890524,0.363087,0.842672,0.66879,0.833333,0.814436,0.837836
4,2.896709,0.372347,0.825431,0.721519,0.754967,0.803504,0.824441
5,2.906134,0.36404,0.834052,0.689873,0.79562,0.80867,0.830898
6,2.978444,0.376964,0.844828,0.702532,0.816176,0.820769,0.841714
7,2.896813,0.368362,0.81681,0.689873,0.751724,0.791736,0.814786
8,2.942583,0.370052,0.842672,0.746835,0.781457,0.822911,0.84178
9,2.924344,0.367612,0.834052,0.740506,0.764706,0.813807,0.83339


In [127]:
avg_f1_macro_cv_score = svc_cv_scores['test_f1_macro'].mean()
avg_f1_weighted_cv_score = svc_cv_scores['test_f1_weighted'].mean()
avg_precision_cv_score = svc_cv_scores['test_precision'].mean()
avg_recall_cv_score = svc_cv_scores['test_recall'].mean()
avg_acc_cv_score = svc_cv_scores['test_accuracy'].mean()

acc_perc = round((avg_acc_cv_score * 100), 2)
recall_perc = round((avg_recall_cv_score * 100), 2)
precision_perc = round((avg_precision_cv_score * 100), 2)
weighted_perc = round((avg_f1_weighted_cv_score * 100), 2)
macro_perc = round((avg_f1_macro_cv_score * 100), 2)

print(f'Precision: {avg_precision_cv_score}', f'| Prec %: {precision_perc}')
print(f'Recall: {avg_recall_cv_score}', f' | Recall %: {recall_perc}')
print(f'Accuracy: {avg_acc_cv_score}', f' | Accuracy %: {acc_perc}')
print(f'Macro F1: {avg_f1_macro_cv_score}', f' | Macro F1 %: {macro_perc}')
print(f'Weighted F1: {avg_f1_weighted_cv_score}', f' | Weighted F1 %: {weighted_perc}')

Precision: 0.7899773490304856 | Prec %: 79.0
Recall: 0.7108522131742319  | Recall %: 71.09
Accuracy: 0.8371069707081944  | Accuracy %: 83.71
Macro F1: 0.8136964136884556  | Macro F1 %: 81.37
Weighted F1: 0.834827725051908  | Weighted F1 %: 83.48


In [124]:
scv_model.fit(X_train, y_train)

In [125]:
y_test_predict = scv_model.predict(X_test)

test_accuracy = accuracy_score(y_test, y_test_pred)
test_recall = recall_score(y_test, y_test_pred)
test_precision = precision_score(y_test, y_test_pred)

test_acc_perc = round((test_accuracy * 100), 2)
test_recall_perc = round((test_recall * 100), 2)
test_precision_perc = round((test_precision * 100), 2)

print(f'Test Precision: {test_precision}', f" | Test Set Accuracy %: {test_acc_perc}")
print(f'Training Set Recall: {test_recall}', f' | Test Set Recall %: {test_recall_perc}')
print(f"Training Set Accuracy: {test_accuracy}", f' | Test Set Precision %: {test_precision_perc}')

from sklearn.metrics import f1_score
macrof1_score = f1_score(y_test, y_test_predict, average='macro')
macro_perc = round((macrof1_score * 100), 2)
print(f'Macro F1: {macrof1_score}',f' | Test Macro F1 %: {macro_perc}')

wf1_score = f1_score(y_test, y_test_predict, average='weighted')
weighted_perc = round((wf1_score * 100), 2)
print(f'Weighted F1: {wf1_score}',f' | Test Weighted F1 %: {weighted_perc}')

Test Precision: 0.7762039660056658  | Test Set Accuracy %: 82.77
Training Set Recall: 0.6936708860759494  | Test Set Recall %: 69.37
Training Set Accuracy: 0.8277347114556417  | Test Set Precision %: 77.62
Macro F1: 0.7990514054544187  | Test Macro F1 %: 79.91
Weighted F1: 0.8229448184548909  | Test Weighted F1 %: 82.29


## SVM Model 

In [36]:
#Import svm model
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score
from bayes_opt import BayesianOptimization
from sklearn.metrics import make_scorer, accuracy_score, recall_score, precision_score
from sklearn.model_selection import StratifiedKFold
from keras.callbacks import EarlyStopping

In [43]:
score_acc = make_scorer(accuracy_score)

In [49]:
def svc_cv(C, kernel, degree, gamma):

    kernels = ['linear', 'poly', 'rbf', 'sigmoid']
    scoring = ['precision_macro', 'recall_macro', 'accuracy']
    kernel = kernels[int(kernel)]
    #  Get the next int in the range
    # C = int(round(C))
    degree = int(round(degree))
    
    # Define the SVM model
    model = SVC(C=C, kernel=kernel, degree=degree, gamma=gamma)
    kfold = StratifiedKFold(n_splits = 5, shuffle=True, random_state=123)
    scores = cross_val_score(model, X_train, y_train, scoring=scoring, cv=kfold)

    # score = np.nan_to_num(score)
    # score = score.mean()
    return scores

In [67]:
from sklearn.model_selection import cross_validate
def svc_cv(C, kernel, degree, gamma):

    kernels = ['linear', 'poly', 'rbf', 'sigmoid']
    scoring = ['precision_macro', 'recall_macro', 'accuracy']
    kernel = kernels[int(kernel)]
    #  Get the next int in the range
    # C = int(round(C))
    degree = int(round(degree))
    
    # Define the SVM model
    model = SVC(C=C, kernel=kernel, degree=degree, gamma=gamma)
    kfold = StratifiedKFold(n_splits = 5, shuffle=True, random_state=123)
    scores = cross_validate(model, X_train, y_train, scoring=scoring, cv=kfold)

    # score = np.nan_to_num(score)
    # score = score.mean()
    return scores

In [38]:
from skopt import BayesSearchCV

In [55]:
svm_params ={
    'C':(0.01, 10.0),
    'kernel':(0,3),
    'degree':(1, 10),
    'gamma':(0.01, 30.0)
}

In [30]:
svm_params ={
    'C':(1e-6, 1e+6, 'log-uniform'),
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
    'degree':(1, 10),
    'gamma':(1e-6, 1e+1, 'log-uniform'),
}

In [35]:
import numpy as np

if np.any(np.isnan(X_train)) or np.any(np.isinf(X_train)):
    print("Data contains NaNs or Infs")
else:
    print("Data does not contain NaNs or Infs")


Data does not contain NaNs or Infs


In [57]:
svm_bo_optimizer = BayesianOptimization(svc_cv, svm_params,random_state=111)

In [58]:
svm_bo_optimizer.maximize(n_iter=100)

|   iter    |  target   |     C     |  degree   |   gamma   |  kernel   |
-------------------------------------------------------------------------
| [0m1        [0m | [0m0.6606   [0m | [0m6.126    [0m | [0m2.522    [0m | [0m13.09    [0m | [0m2.308    [0m |
| [95m2        [0m | [95m0.819    [0m | [95m2.96     [0m | [95m2.342    [0m | [95m0.6841   [0m | [95m1.261    [0m |
| [0m3        [0m | [0m0.8022   [0m | [0m2.394    [0m | [0m4.039    [0m | [0m29.72    [0m | [0m0.7132   [0m |
| [0m4        [0m | [0m0.8173   [0m | [0m0.8211   [0m | [0m7.026    [0m | [0m18.64    [0m | [0m0.8228   [0m |
| [0m5        [0m | [0m0.6626   [0m | [0m4.668    [0m | [0m2.065    [0m | [0m2.228    [0m | [0m2.702    [0m |
| [0m6        [0m | [0m0.6602   [0m | [0m0.4791   [0m | [0m8.854    [0m | [0m20.43    [0m | [0m2.983    [0m |
| [0m7        [0m | [0m0.6606   [0m | [0m6.49     [0m | [0m8.548    [0m | [0m11.59    [0m | [0m2.901   

In [59]:
# # Print the best result
print(svm_bo_optimizer.max)

{'target': 0.8297796573995025, 'params': {'C': 0.09329769353266323, 'degree': 8.13223470382647, 'gamma': 10.867380225395522, 'kernel': 0.0916803057120924}}


In [60]:
# Extract the results
results = svm_bo_optimizer.res
# Create a DataFrame from the results
results_df = pd.DataFrame(results)
# Optionally, save the DataFrame to a CSV file
results_df.to_csv('svc_bayesian_optimization_results.csv', index=False)


In [61]:
import json
# Extract the best result
best_result = svm_bo_optimizer.max

# Save the best result to a JSON file
with open('svm_best_result.json', 'w') as f:
    json.dump(best_result, f)

In [79]:
# Assuming X_train and y_train are your features and labels for the training set
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

In [84]:
def svc_model(C, kernel, degree, gamma):

    kernels = ['linear', 'poly', 'rbf', 'sigmoid']
    kernel = kernels[int(kernel)]
    degree = int(round(degree))
    
    # Define the SVM model
    model = SVC(C=C, kernel=kernel, degree=degree, gamma=gamma)
    return model

In [85]:
C= 0.09329769353266323
degree = 8.13223470382647 
gamma = 10.867380225395522
kernel = 0.0916803057120924

In [96]:
model = svc_model(C, kernel, degree, gamma)
model.fit(X_train, y_train)

In [97]:
X_train.shape

(4641, 1034)

### Train

In [102]:
y_train_pred = model.predict(X_train)
train_accuracy = accuracy_score(y_train, y_train_pred)
print(f"Training Set Accuracy: {train_accuracy}")

train_recall = recall_score(y_train, y_train_pred)
print(f'Training Set Recall: {train_recall}')

train_precision = precision_score(y_train, y_train_pred)
print(f'Training Set Precision: {train_precision}')

Training Set Accuracy: 0.8812755871579401
Training Set Recall: 0.7837666455294864
Training Set Precision: 0.8547717842323651


In [108]:
train_acc_perc = round((train_accuracy * 100), 2)
train_recall_perc = round((train_recall * 100), 2)
train_precision_perc = round((train_precision * 100), 2)

print(f"Training Set Accuracy %: {train_acc_perc}")
print(f'Training Set Recall %: {train_recall_perc}')
print(f'Training Set Precision %: {train_precision_perc}')

Training Set Accuracy %: 88.13
Training Set Recall %: 78.38
Training Set Precision %: 85.48


In [112]:
from sklearn.metrics import f1_score
train_macrof1_score = f1_score(y_train, y_train_pred, average='macro')
train_macro_perc = round((train_macrof1_score * 100), 2)
print(train_macrof1_score)

print(f'Macro F1 %: {train_macro_perc}')

train_wf1_score = f1_score(y_train, y_train_pred, average='weighted')
train_weighted_perc = round((train_wf1_score * 100), 2)
print(train_wf1_score)

print(f'Weighted F1 %: {train_weighted_perc}')

0.8648487494580746
Macro F1 %: 86.48
0.879945602152574
Weighted F1 %: 87.99


### Test

In [98]:
y_test_pred = model.predict(X_test)
test_accuracy = accuracy_score(y_test, y_test_pred)
print(f"Training Set Accuracy: {test_accuracy}")

test_recall = recall_score(y_test, y_test_pred)
print(f'Training Set Recall: {test_recall}')

test_precision = precision_score(y_test, y_test_pred)
print(f'Training Set Precision: {test_precision}')

Training Set Accuracy: 0.8277347114556417
Training Set Recall: 0.6936708860759494
Training Set Precision: 0.7762039660056658


In [109]:
test_acc_perc = round((test_accuracy * 100), 2)
test_recall_perc = round((test_recall * 100), 2)
test_precision_perc = round((test_precision * 100), 2)

print(f"Test Set Accuracy %: {test_acc_perc}")
print(f'Test Set Recall %: {test_recall_perc}')
print(f'Test Set Precision %: {test_precision_perc}')

Test Set Accuracy %: 82.77
Test Set Recall %: 69.37
Test Set Precision %: 77.62


In [113]:
from sklearn.metrics import f1_score
test_macrof1_score = f1_score(y_test, y_test_pred, average='macro')
test_macro_perc = round((test_macrof1_score * 100), 2)
print(test_macrof1_score)

print(f'Macro F1 %: {test_macro_perc}')

test_wf1_score = f1_score(y_test, y_test_pred, average='weighted')
test_weighted_perc = round((test_wf1_score * 100), 2)
print(test_wf1_score)

print(f'weighted F1 %: {test_weighted_perc}')

0.8027777589030298
Macro F1 %: 80.28
0.8251967162807998
weighted F1 %: 82.52


# Baseline

In [100]:
C= 1.0
degree = 3 
gamma = 'scale'
kernel = 'rbf'
base_model = SVC(C=C, kernel=kernel, degree=degree, gamma=gamma)
base_model.fit(X_train, y_train)

### Training

In [114]:
base_train_pred = base_model.predict(X_train)
base_train_accuracy = accuracy_score(y_train, base_train_pred)
print(f"Training Set Accuracy: {base_train_accuracy}")

base_train_recall = recall_score(y_train, base_train_pred)
print(f'Training Set Recall: {base_train_recall}')

base_train_precision = precision_score(y_train, base_train_pred)
print(f'Training Set Precision: {base_train_precision}')

Training Set Accuracy: 0.8838612368024132
Training Set Recall: 0.7850348763474952
Training Set Precision: 0.8609179415855355


In [115]:
from sklearn.metrics import f1_score
base_train_acc_perc = round((base_train_accuracy * 100), 2)
base_train_recall_perc = round((base_train_recall * 100), 2)
base_train_precision_perc = round((base_train_precision * 100), 2)

print(f"Training Set Accuracy %: {base_train_acc_perc}")
print(f'Training Set Recall %: {base_train_recall_perc}')
print(f'Training Set Precision %: {base_train_precision_perc}')

base_train_macrof1_score = f1_score(y_train, base_train_pred, average='macro')
base_train_macro_perc = round((base_train_macrof1_score * 100), 2)
print(base_train_macrof1_score)

print(f'Macro F1 %: {base_train_macro_perc}')

base_train_wf1_score = f1_score(y_train, base_train_pred, average='weighted')
base_train_weighted_perc = round((base_train_wf1_score * 100), 2)
print(base_train_wf1_score)

print(f'Weighted F1 %: {base_train_weighted_perc}')

Training Set Accuracy %: 88.39
Training Set Recall %: 78.5
Training Set Precision %: 86.09
0.8676105669196701
Macro F1 %: 86.76
0.8824720343954628
Weighted F1 %: 88.25


### Testing

In [101]:
base_y_test_pred = base_model.predict(X_test)
base_test_accuracy = accuracy_score(y_test, base_y_test_pred)
print(f"Training Set Accuracy: {base_test_accuracy}")

base_test_recall = recall_score(y_test, base_y_test_pred)
print(f'Training Set Recall: {base_test_recall}')

base_test_precision = precision_score(y_test, base_y_test_pred)
print(f'Training Set Precision: {base_test_precision}')

Training Set Accuracy: 0.8268733850129198
Training Set Recall: 0.6683544303797468
Training Set Precision: 0.7904191616766467


In [116]:
base_test_acc_perc = round((base_test_accuracy * 100), 2)
base_test_recall_perc = round((base_test_recall * 100), 2)
base_test_precision_perc = round((base_test_precision * 100), 2)

print(f"Test Set Accuracy %: {base_test_acc_perc}")
print(f'Test Set Recall %: {base_test_recall_perc}')
print(f'Test Set Precision %: {base_test_precision_perc}')

base_test_macrof1_score = f1_score(y_test, base_y_test_pred, average='macro')
base_test_macro_perc = round((base_test_macrof1_score * 100), 2)
print(base_test_macro_perc)

print(f'Macro F1 %: {base_test_macro_perc}')

base_test_wf1_score = f1_score(y_test, base_y_test_pred, average='weighted')
base_test_weighted_perc = round((base_test_wf1_score * 100), 2)
print(base_test_weighted_perc)

print(f'Weighted F1 %: {base_test_weighted_perc}')

Test Set Accuracy %: 82.69
Test Set Recall %: 66.84
Test Set Precision %: 79.04
79.91
Macro F1 %: 79.91
82.29
Weighted F1 %: 82.29


# Cross Validation 

In [118]:
from sklearn.model_selection import cross_validate
from sklearn.metrics import recall_score
scoring = ['accuracy', 'recall',  'precision','f1_macro', 'f1_weighted' ]
scores = cross_validate(base_model, X_train, y_train, scoring=scoring)
base_cv_scores = pd.DataFrame(scores)
display(base_cv_scores)

Unnamed: 0,fit_time,score_time,test_accuracy,test_recall,test_precision,test_f1_macro,test_f1_weighted
0,2.173763,0.545809,0.83423,0.686709,0.797794,0.808418,0.8309
1,2.173091,0.5477,0.838362,0.695238,0.802198,0.813301,0.835266
2,2.164752,0.527595,0.81681,0.692063,0.749141,0.791736,0.814941
3,2.176102,0.548288,0.832974,0.68254,0.796296,0.806546,0.829507
4,2.161563,0.536208,0.836207,0.734177,0.773333,0.815333,0.835136


In [122]:
from sklearn.model_selection import cross_validate
from sklearn.metrics import recall_score
scoring = ['accuracy', 'recall',  'precision','f1_macro', 'f1_weighted' ]
scores = cross_validate(model, X_train, y_train, scoring=scoring)
op_cv_scores = pd.DataFrame(scores)
display(op_cv_scores)

Unnamed: 0,fit_time,score_time,test_accuracy,test_recall,test_precision,test_f1_macro,test_f1_weighted
0,1.989331,0.203353,0.838536,0.712025,0.792254,0.815382,0.836284
1,1.996908,0.193344,0.828664,0.688889,0.780576,0.802991,0.825828
2,2.009243,0.195356,0.813578,0.669841,0.753571,0.786025,0.810682
3,2.020529,0.197755,0.836207,0.707937,0.787986,0.812496,0.833908
4,1.991159,0.192612,0.824353,0.721519,0.752475,0.802451,0.823432
