In [None]:
#Connect to wandb
#TO DO : how to save models on the weight and bias platform
import wandb
wandb.login()
wandb.init(project="hyperopt", 
           entity="benchmark-nlp",
           name='fake news datasets') #CHANGE

In [2]:
import os
os.chdir('../../..')

In [3]:
#Load packages
import warnings
import io
import numpy as np
import pandas as pd
from codecarbon import EmissionsTracker
import yaml
from util.dataloader import DataLoader
from preprocessing.preprocessor import Preprocessor
from util.datasplitter import data_splitter
from preprocessing.fasttext_embeddings import FastTextEmbeddings
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, average_precision_score
warnings.filterwarnings("ignore")

In [4]:
#Set constant values
SEED=42
OPT_ITER=10

## Load data

In [6]:
dl = DataLoader(['fake_news'])
data = dl.load()


tweet_preprocessor = Preprocessor(is_tweet=True)
preprocessor = Preprocessor()

#We are not interested in the test sets for hyperparameter optimization
train_gossipcop, val_gossipcop, _ = data_splitter(data['gossipcop'],
                                 preprocessor, 
                                 create_val_set=True,   #No validation set is provided
                                 seed=SEED)

100%|███████████████████████████████████████████████████████████████████████████| 13267/13267 [01:54<00:00, 115.94it/s]
100%|█████████████████████████████████████████████████████████████████████████████| 5323/5323 [00:42<00:00, 125.44it/s]


In [5]:
train_liar, val_liar, _ = data_splitter(data['liar'],
                                 preprocessor, 
                                 create_val_set=True,   #No validation set is provided
                                 seed=SEED)

10269 rows preprocessed in 3.19018292427063 seconds
1283 rows preprocessed in 0.46010279655456543 seconds
1284 rows preprocessed in 0.4202134609222412 seconds


In [7]:
train_coaid, val_coaid, _ = data_splitter(data['CoAID'],
                                          preprocessor,
                                          create_val_set=True,
                                          test_split = 0.25, #Based on informations from the paper
                                          val_split = 0.2, 
                                          seed=SEED)

5457 rows preprocessed in 7.255673170089722 seconds


In [8]:
%%time
#fasttext 
fasttext = FastTextEmbeddings()
fasttext.load_model('fasttext/cc.en.300.bin')

CPU times: total: 25.7 s
Wall time: 43.3 s




In [15]:
embedded_train_gossipcop = fasttext.generate_sentence_embeddings(train_gossipcop['text'])
embedded_val_gossipcop = fasttext.generate_sentence_embeddings(val_gossipcop['text'])
embedded_train_gossipcop['label'] = train_gossipcop['label'].to_list()
embedded_val_gossipcop['label'] = val_gossipcop['label'].to_list()

starting to generate sentence embeddings


100%|████████████████████████████████████████████████████████████████████████████| 11897/11897 [07:02<00:00, 28.18it/s]


starting to generate sentence embeddings


100%|██████████████████████████████████████████████████████████████████████████████| 2975/2975 [01:57<00:00, 25.30it/s]


In [9]:
embedded_train_coaid = fasttext.generate_sentence_embeddings(train_coaid['text'])
embedded_val_coaid = fasttext.generate_sentence_embeddings(val_coaid['text'])
embedded_train_coaid['label'] = train_coaid['label'].to_list()
embedded_val_coaid['label'] = val_coaid['label'].to_list()

Starting to generate sentence embeddings


100%|█████████████████████████████████████████████████████████████████████████████| 3273/3273 [00:19<00:00, 170.68it/s]


Starting to generate sentence embeddings


100%|███████████████████████████████████████████████████████████████████████████████| 819/819 [00:04<00:00, 200.23it/s]


In [11]:
embedded_train_liar = fasttext.generate_sentence_embeddings(train_liar['text'])
embedded_val_liar = fasttext.generate_sentence_embeddings(val_liar['text'])
embedded_train_liar['label'] = train_liar['label'].to_list()
embedded_val_liar['label'] = val_liar['label'].to_list()

starting to generate sentence embeddings


100%|███████████████████████████████████████████████████████████████████████████| 10269/10269 [00:21<00:00, 486.48it/s]


starting to generate sentence embeddings


100%|█████████████████████████████████████████████████████████████████████████████| 1284/1284 [00:02<00:00, 493.72it/s]


## Hyperopt

In [10]:
import yaml
#Load the template yaml sweep config file for logistic regression
#If the value range for an hyperparameter needs to be changed, better to do it in the .yaml file than in a notebook
with open("config/lr_sweep.yaml", 'r') as stream:
    sweep_config = yaml.safe_load(stream)

In [11]:
#The config is displayed as a nested dictionary
sweep_config

{'method': 'random',
 'entity': 'benchmark-nlp',
 'project': 'hyperopt',
 'metric': {'name': 'loss', 'goal': 'minimize'},
 'parameters': {'C': {'min': 0, 'max': 10, 'distribution': 'uniform'},
  'penalty': {'value': 'l2'},
  'solver': {'value': 'lbfgs'},
  'random_state': {'value': 42}}}

####  politifact   Tf-Idf

In [None]:
#Don't forget to name the sweep instance  
name = 'lr_tfidf_politifact' #change here
sweep_config['name'] =  name

#Generate a sweep_id
sweep_id = wandb.sweep(sweep_config, project="hyperopt")

def train_tfidf(config = None,
          train=train_politifact, #Change here
          val=val_politifact): #change here
    '''
    Generic WandB function to conduct hyperparameter optimization with tf-idf vectorizer
    '''
    # Initialize a new wandb run
    with wandb.init(config=config,group=name):
        # If called by wandb.agent, as below,
        # this config will be set by Sweep Controller
        config = wandb.config
        vec = TfidfVectorizer()
        clf = LogisticRegression(C = config.C,
                                 penalty = config.penalty,
                                 solver = config.solver,
                                 random_state=config.random_state) #set the hyperparams here
        
        #Create the pipeline
        pipe = Pipeline([('vectorizer',vec),('clf',clf)])
        #Fit the pipeline
        pipe.fit(train['text'],train['label'])
        
        #Make predictions
        pred_val = pipe.predict(val['text'])
        pred_prob_val = pipe.predict_proba(val['text'])[:,1]
        accuracy = accuracy_score(val['label'],pred_val)
        f1_macro = f1_score(val['label'],pred_val,average='macro')
        if train['label'].nunique() <=2:
            aucpc =  average_precision_score(val['label'],pred_prob_val)
            auc = roc_auc_score(val['label'],pred_prob_val)
        else:
            aucpc = '-'
            auc = '-'
        #Log metrics on WandB
        wandb.log({"accuracy": accuracy, "f1 macro":f1_macro, "AUC-PC":aucpc, 'AUC':auc })

#Track emissions
tracker = EmissionsTracker(project_name=name,log_level='warning', measure_power_secs=300,
                           output_file='output/emissions_hyperopt.csv')
#Launch the agent
tracker.start()
wandb.agent(sweep_id, train_tfidf,count=OPT_ITER) #Count : number of iterations
tracker.stop()

In [21]:
#Don't forget to name the sweep instance   
name = 'lr_ft_politifact' #change here
sweep_config['name'] = name
#Generate a sweep_id
sweep_id = wandb.sweep(sweep_config, project="hyperopt")

def train_fasttext(config = None,
          train=embedded_train_politifact, #Change here
          val=embedded_val_politifact): #change here
    # Initialize a new wandb run
    with wandb.init(config=config, group=name):
        # If called by wandb.agent, as below,
        # this config will be set by Sweep Controller
        config = wandb.config
        clf = LogisticRegression(C = config.C,
                                 penalty = config.penalty,
                                 solver = config.solver,
                                 random_state=config.random_state) #set the hyperparams here
        pipe = Pipeline([('clf',clf)])
        pipe.fit(train.fillna(0).drop(['label'],axis=1),train['label'])
        
        #Make predictions
        pred_val = pipe.predict(val.fillna(0).drop(['label'],axis=1))
        pred_prob_val = pipe.predict_proba(val.fillna(0).drop(['label'],axis=1))[:,1]
        accuracy = accuracy_score(val['label'],pred_val)
        f1_macro = f1_score(val['label'],pred_val,average='macro')
        if train['label'].nunique() <=2:
            aucpc =  average_precision_score(val['label'],pred_prob_val)
            auc = roc_auc_score(val['label'],pred_prob_val)
            #Log predictions on WandB
        else:
            aucpc = '-'
            auc = '-'
        wandb.log({"accuracy": accuracy, "f1 macro":f1_macro, "AUC-PC":aucpc, 'AUC':auc })


#Track emissions
tracker = EmissionsTracker(project_name=name,log_level='warning', measure_power_secs=300,
                           output_file='output/emissions_hyperopt.csv')
#Launch the agent
tracker.start()
wandb.agent(sweep_id, train_fasttext,count=OPT_ITER)
tracker.stop()



Create sweep with ID: q4kk06td
Sweep URL: https://wandb.ai/benchmark-nlp/hyperopt/sweeps/q4kk06td


[34m[1mwandb[0m: Agent Starting Run: 4cwcde3h with config:
[34m[1mwandb[0m: 	C: 1.5679252398786447
[34m[1mwandb[0m: 	penalty: l2
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	solver: lbfgs


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.78609
AUC-PC,0.79527
accuracy,0.74556
f1 macro,0.70296


[34m[1mwandb[0m: Agent Starting Run: vklev73p with config:
[34m[1mwandb[0m: 	C: 7.402545625534253
[34m[1mwandb[0m: 	penalty: l2
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	solver: lbfgs


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.85246
AUC-PC,0.8766
accuracy,0.7929
f1 macro,0.76935


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: qxgwuied with config:
[34m[1mwandb[0m: 	C: 7.720505833587575
[34m[1mwandb[0m: 	penalty: l2
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	solver: lbfgs


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.85275
AUC-PC,0.877
accuracy,0.7929
f1 macro,0.76935


[34m[1mwandb[0m: Agent Starting Run: 7hwopxzm with config:
[34m[1mwandb[0m: 	C: 3.590504783309245
[34m[1mwandb[0m: 	penalty: l2
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	solver: lbfgs


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.81348
AUC-PC,0.83046
accuracy,0.78698
f1 macro,0.75964


[34m[1mwandb[0m: Agent Starting Run: rr7le3z7 with config:
[34m[1mwandb[0m: 	C: 1.9510971137793551
[34m[1mwandb[0m: 	penalty: l2
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	solver: lbfgs


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.79203
AUC-PC,0.80125
accuracy,0.77515
f1 macro,0.74394


[34m[1mwandb[0m: Agent Starting Run: pj2evkb7 with config:
[34m[1mwandb[0m: 	C: 8.958742339262004
[34m[1mwandb[0m: 	penalty: l2
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	solver: lbfgs


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.85696
AUC-PC,0.88031
accuracy,0.7929
f1 macro,0.76935


[34m[1mwandb[0m: Agent Starting Run: gre8nwkk with config:
[34m[1mwandb[0m: 	C: 3.4095418777067934
[34m[1mwandb[0m: 	penalty: l2
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	solver: lbfgs


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.81652
AUC-PC,0.83343
accuracy,0.78698
f1 macro,0.75964


[34m[1mwandb[0m: Agent Starting Run: kk9oqh8c with config:
[34m[1mwandb[0m: 	C: 6.7321937463872725
[34m[1mwandb[0m: 	penalty: l2
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	solver: lbfgs


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.84551
AUC-PC,0.86912
accuracy,0.7929
f1 macro,0.76935


[34m[1mwandb[0m: Agent Starting Run: 5es5a2gc with config:
[34m[1mwandb[0m: 	C: 0.8863200737700472
[34m[1mwandb[0m: 	penalty: l2
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	solver: lbfgs


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.77507
AUC-PC,0.78325
accuracy,0.64497
f1 macro,0.52475


[34m[1mwandb[0m: Agent Starting Run: 5xllwc67 with config:
[34m[1mwandb[0m: 	C: 0.8450294908792344
[34m[1mwandb[0m: 	penalty: l2
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	solver: lbfgs


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.77391
AUC-PC,0.78273
accuracy,0.64497
f1 macro,0.52475


0.0015662041155447848

#### gossipcop

In [None]:
#Don't forget to name the sweep instance  
name = 'lr_tfidf_gossipcop' #change here
sweep_config['name'] =  name

#Generate a sweep_id
sweep_id = wandb.sweep(sweep_config, project="hyperopt")

def train_tfidf(config = None,
          train=train_gossipcop, #Change here
          val=val_gossipcop): #change here
    '''
    Generic WandB function to conduct hyperparameter optimization with tf-idf vectorizer
    '''
    # Initialize a new wandb run
    with wandb.init(config=config,group=name):
        # If called by wandb.agent, as below,
        # this config will be set by Sweep Controller
        config = wandb.config
        vec = TfidfVectorizer()
        clf = LogisticRegression(C = config.C,
                                 penalty = config.penalty,
                                 solver = config.solver,
                                 random_state=config.random_state) #set the hyperparams here
        
        #Create the pipeline
        pipe = Pipeline([('vectorizer',vec),('clf',clf)])
        #Fit the pipeline
        pipe.fit(train['text'],train['label'])
        
        #Make predictions
        pred_val = pipe.predict(val['text'])
        pred_prob_val = pipe.predict_proba(val['text'])[:,1]
        accuracy = accuracy_score(val['label'],pred_val)
        f1_macro = f1_score(val['label'],pred_val,average='macro')
        if train['label'].nunique() <=2:
            aucpc =  average_precision_score(val['label'],pred_prob_val)
            auc = roc_auc_score(val['label'],pred_prob_val)
        else:
            aucpc = '-'
            auc = '-'
        #Log metrics on WandB
        wandb.log({"accuracy": accuracy, "f1 macro":f1_macro, "AUC-PC":aucpc, 'AUC':auc })

#Track emissions
tracker = EmissionsTracker(project_name=name,log_level='warning', measure_power_secs=300,
                           output_file='output/emissions_hyperopt.csv')
#Launch the agent
tracker.start()
wandb.agent(sweep_id, train_tfidf,count=OPT_ITER)
tracker.stop()

In [23]:
#Don't forget to name the sweep instance   
name = 'lr_ft_gossipcop' #CHANGE HERE
sweep_config['name'] = name
#Generate a sweep_id
sweep_id = wandb.sweep(sweep_config, project="hyperopt")

def train_fasttext(config = None,
          train=embedded_train_gossipcop, #CHANGE HERE
          val=embedded_val_gossipcop): #CHANGE HERE
    # Initialize a new wandb run
    with wandb.init(config=config, group=name):
        config = wandb.config
        clf = LogisticRegression(C = config.C,
                                 penalty = config.penalty,
                                 solver = config.solver,
                                 random_state=config.random_state) #set the hyperparams here
        pipe = Pipeline([('clf',clf)])
        pipe.fit(train.fillna(0).drop(['label'],axis=1),train['label'])
        
        #Make predictions
        pred_val = pipe.predict(val.fillna(0).drop(['label'],axis=1))
        pred_prob_val = pipe.predict_proba(val.fillna(0).drop(['label'],axis=1))[:,1]
        accuracy = accuracy_score(val['label'],pred_val)
        f1_macro = f1_score(val['label'],pred_val,average='macro')
        if train['label'].nunique() <=2:
            aucpc =  average_precision_score(val['label'],pred_prob_val)
            auc = roc_auc_score(val['label'],pred_prob_val)
            #Log predictions on WandB
        else:
            aucpc = '-'
            auc = '-'
        wandb.log({"accuracy": accuracy, "f1 macro":f1_macro, "AUC-PC":aucpc, 'AUC':auc })


#Track emissions
tracker = EmissionsTracker(project_name=name,log_level='warning', measure_power_secs=300,
                           output_file='output/emissions_hyperopt.csv')
#Launch the agent
tracker.start()
wandb.agent(sweep_id, train_fasttext,count=OPT_ITER)
tracker.stop()



Create sweep with ID: y6lkh9yb
Sweep URL: https://wandb.ai/benchmark-nlp/hyperopt/sweeps/y6lkh9yb


[34m[1mwandb[0m: Agent Starting Run: jm1w9ubd with config:
[34m[1mwandb[0m: 	C: 4.051432569872457
[34m[1mwandb[0m: 	penalty: l2
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	solver: lbfgs


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.77426
AUC-PC,0.86534
accuracy,0.79059
f1 macro,0.68094


[34m[1mwandb[0m: Agent Starting Run: p0zpre39 with config:
[34m[1mwandb[0m: 	C: 5.9596150202346045
[34m[1mwandb[0m: 	penalty: l2
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	solver: lbfgs


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.7773
AUC-PC,0.86668
accuracy,0.79059
f1 macro,0.68545


[34m[1mwandb[0m: Agent Starting Run: cq8txlyk with config:
[34m[1mwandb[0m: 	C: 6.7220336485070415
[34m[1mwandb[0m: 	penalty: l2
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	solver: lbfgs


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.77761
AUC-PC,0.86675
accuracy,0.78992
f1 macro,0.6877


[34m[1mwandb[0m: Agent Starting Run: i2qlrfpv with config:
[34m[1mwandb[0m: 	C: 6.084905361322282
[34m[1mwandb[0m: 	penalty: l2
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	solver: lbfgs


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.77737
AUC-PC,0.8667
accuracy,0.79059
f1 macro,0.68545


[34m[1mwandb[0m: Agent Starting Run: 6y5jhx66 with config:
[34m[1mwandb[0m: 	C: 6.8006040886114665
[34m[1mwandb[0m: 	penalty: l2
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	solver: lbfgs


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.77771
AUC-PC,0.86683
accuracy,0.78992
f1 macro,0.6877


[34m[1mwandb[0m: Agent Starting Run: h8hd8cjq with config:
[34m[1mwandb[0m: 	C: 4.6573831169638655
[34m[1mwandb[0m: 	penalty: l2
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	solver: lbfgs


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.77527
AUC-PC,0.86581
accuracy,0.79126
f1 macro,0.68424


[34m[1mwandb[0m: Agent Starting Run: abu59kn6 with config:
[34m[1mwandb[0m: 	C: 6.677578762269977
[34m[1mwandb[0m: 	penalty: l2
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	solver: lbfgs


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.77766
AUC-PC,0.86683
accuracy,0.79025
f1 macro,0.68766


[34m[1mwandb[0m: Agent Starting Run: yyfbft9u with config:
[34m[1mwandb[0m: 	C: 5.904495299081747
[34m[1mwandb[0m: 	penalty: l2
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	solver: lbfgs


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.77724
AUC-PC,0.86666
accuracy,0.79059
f1 macro,0.68545


[34m[1mwandb[0m: Agent Starting Run: tj2f237m with config:
[34m[1mwandb[0m: 	C: 9.62558358479632
[34m[1mwandb[0m: 	penalty: l2
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	solver: lbfgs


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.77946
AUC-PC,0.86769
accuracy,0.79261
f1 macro,0.69448


[34m[1mwandb[0m: Agent Starting Run: zmd2uvq5 with config:
[34m[1mwandb[0m: 	C: 0.5810914720265337
[34m[1mwandb[0m: 	penalty: l2
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	solver: lbfgs


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.74903
AUC-PC,0.85225
accuracy,0.73983
f1 macro,0.53031


0.0018296275300614252

####  CoAID dataset



In [14]:
#Don't forget to name the sweep instance  
name = 'lr_tfidf_coaid' #change here
sweep_config['name'] =  name

#Generate a sweep_id
sweep_id = wandb.sweep(sweep_config, project="hyperopt")

def train_tfidf(config = None,
          train=train_coaid, #Change here
          val=val_coaid): #change here
    '''
    Generic WandB function to conduct hyperparameter optimization with tf-idf vectorizer
    '''
    # Initialize a new wandb run
    with wandb.init(config=config,group=name):
        # If called by wandb.agent, as below,
        # this config will be set by Sweep Controller
        config = wandb.config
        vec = TfidfVectorizer()
        clf = LogisticRegression(C = config.C,
                                 penalty = config.penalty,
                                 solver = config.solver,
                                 random_state=config.random_state) #set the hyperparams here
        
        #Create the pipeline
        pipe = Pipeline([('vectorizer',vec),('clf',clf)])
        #Fit the pipeline
        pipe.fit(train['text'],train['label'])
        
        #Make predictions
        pred_val = pipe.predict(val['text'])
        pred_prob_val = pipe.predict_proba(val['text'])[:,1]
        accuracy = accuracy_score(val['label'],pred_val)
        f1_macro = f1_score(val['label'],pred_val,average='macro')
        if train['label'].nunique() <=2:
            aucpc =  average_precision_score(val['label'],pred_prob_val)
            auc = roc_auc_score(val['label'],pred_prob_val)
        else:
            aucpc = '-'
            auc = '-'
        #Log metrics on WandB
        wandb.log({"accuracy": accuracy, "f1 macro":f1_macro, "AUC-PC":aucpc, 'AUC':auc })

#Track emissions
tracker = EmissionsTracker(project_name=name,log_level='warning', measure_power_secs=300,
                           output_file='output/emissions_hyperopt.csv')
#Launch the agent
tracker.start()
wandb.agent(sweep_id, train_tfidf,count=OPT_ITER)
tracker.stop()

Create sweep with ID: tcj2qcwg
Sweep URL: https://wandb.ai/benchmark-nlp/hyperopt/sweeps/tcj2qcwg


[34m[1mwandb[0m: Agent Starting Run: z3mcw38e with config:
[34m[1mwandb[0m: 	C: 2.268078986060117
[34m[1mwandb[0m: 	penalty: l2
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	solver: lbfgs


0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.97741
AUC-PC,0.99554
accuracy,0.92918
f1 macro,0.85643


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: rr030n3g with config:
[34m[1mwandb[0m: 	C: 4.602263524862772
[34m[1mwandb[0m: 	penalty: l2
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	solver: lbfgs


0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.97877
AUC-PC,0.99582
accuracy,0.93773
f1 macro,0.87761


[34m[1mwandb[0m: Agent Starting Run: wz9fblsq with config:
[34m[1mwandb[0m: 	C: 3.071743786594846
[34m[1mwandb[0m: 	penalty: l2
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	solver: lbfgs


0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.97803
AUC-PC,0.99567
accuracy,0.93162
f1 macro,0.8633


[34m[1mwandb[0m: Agent Starting Run: dm4ljnmp with config:
[34m[1mwandb[0m: 	C: 2.8257083164607124
[34m[1mwandb[0m: 	penalty: l2
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	solver: lbfgs


0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.97787
AUC-PC,0.99563
accuracy,0.93162
f1 macro,0.8633


[34m[1mwandb[0m: Agent Starting Run: s9zkky5r with config:
[34m[1mwandb[0m: 	C: 0.960509797136847
[34m[1mwandb[0m: 	penalty: l2
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	solver: lbfgs


0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.9753
AUC-PC,0.99509
accuracy,0.91575
f1 macro,0.81815


[34m[1mwandb[0m: Agent Starting Run: 65jjmev6 with config:
[34m[1mwandb[0m: 	C: 7.1266932562627625
[34m[1mwandb[0m: 	penalty: l2
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	solver: lbfgs


0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.97905
AUC-PC,0.99589
accuracy,0.94139
f1 macro,0.88745


[34m[1mwandb[0m: Agent Starting Run: 2tkg6br2 with config:
[34m[1mwandb[0m: 	C: 3.1669734887773906
[34m[1mwandb[0m: 	penalty: l2
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	solver: lbfgs


0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.97813
AUC-PC,0.99569
accuracy,0.93284
f1 macro,0.86621


[34m[1mwandb[0m: Agent Starting Run: m0qx027a with config:
[34m[1mwandb[0m: 	C: 2.46418540926801
[34m[1mwandb[0m: 	penalty: l2
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	solver: lbfgs


0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.97766
AUC-PC,0.99559
accuracy,0.92918
f1 macro,0.85744


[34m[1mwandb[0m: Agent Starting Run: ib5k4q75 with config:
[34m[1mwandb[0m: 	C: 6.711506264240641
[34m[1mwandb[0m: 	penalty: l2
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	solver: lbfgs


0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.97902
AUC-PC,0.99588
accuracy,0.94139
f1 macro,0.88745


[34m[1mwandb[0m: Agent Starting Run: ruy2a4h2 with config:
[34m[1mwandb[0m: 	C: 4.207980276600441
[34m[1mwandb[0m: 	penalty: l2
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	solver: lbfgs


0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.9786
AUC-PC,0.99579
accuracy,0.93773
f1 macro,0.87761


0.002363480667308772

In [15]:
#Don't forget to name the sweep instance   
name = 'lr_ft_coaid' #CHANGE HERE
sweep_config['name'] = name
#Generate a sweep_id
sweep_id = wandb.sweep(sweep_config, project="hyperopt")

def train_fasttext(config = None,
          train=embedded_train_coaid, #CHANGE HERE
          val=embedded_val_coaid): #CHANGE HERE
    # Initialize a new wandb run
    with wandb.init(config=config, group=name):
        config = wandb.config
        clf = LogisticRegression(C = config.C,
                                 penalty = config.penalty,
                                 solver = config.solver,
                                 random_state=config.random_state) #set the hyperparams here
        pipe = Pipeline([('clf',clf)])
        pipe.fit(train.fillna(0).drop(['label'],axis=1),train['label'])
        
        #Make predictions
        pred_val = pipe.predict(val.fillna(0).drop(['label'],axis=1))
        pred_prob_val = pipe.predict_proba(val.fillna(0).drop(['label'],axis=1))[:,1]
        accuracy = accuracy_score(val['label'],pred_val)
        f1_macro = f1_score(val['label'],pred_val,average='macro')
        if train['label'].nunique() <=2:
            aucpc =  average_precision_score(val['label'],pred_prob_val)
            auc = roc_auc_score(val['label'],pred_prob_val)
            #Log predictions on WandB
        else:
            aucpc = '-'
            auc = '-'
        wandb.log({"accuracy": accuracy, "f1 macro":f1_macro, "AUC-PC":aucpc, 'AUC':auc })


#Track emissions
tracker = EmissionsTracker(project_name=name,log_level='warning', measure_power_secs=300,
                           output_file='output/emissions_hyperopt.csv')
#Launch the agent
tracker.start()
wandb.agent(sweep_id, train_fasttext,count=OPT_ITER)
tracker.stop()



Create sweep with ID: etes0k3k
Sweep URL: https://wandb.ai/benchmark-nlp/hyperopt/sweeps/etes0k3k


[34m[1mwandb[0m: Agent Starting Run: hqgw9c7g with config:
[34m[1mwandb[0m: 	C: 3.758167554260737
[34m[1mwandb[0m: 	penalty: l2
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	solver: lbfgs


0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.95932
AUC-PC,0.99147
accuracy,0.92796
f1 macro,0.85447


[34m[1mwandb[0m: Agent Starting Run: iy3klkg0 with config:
[34m[1mwandb[0m: 	C: 2.1970616756381145
[34m[1mwandb[0m: 	penalty: l2
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	solver: lbfgs


0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.95744
AUC-PC,0.99112
accuracy,0.91941
f1 macro,0.83061


[34m[1mwandb[0m: Agent Starting Run: vnwq1bp0 with config:
[34m[1mwandb[0m: 	C: 4.831862991049962
[34m[1mwandb[0m: 	penalty: l2
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	solver: lbfgs


0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.95984
AUC-PC,0.99153
accuracy,0.92918
f1 macro,0.85842


[34m[1mwandb[0m: Agent Starting Run: p7eho4ed with config:
[34m[1mwandb[0m: 	C: 2.6329088729313477
[34m[1mwandb[0m: 	penalty: l2
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	solver: lbfgs


0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.95814
AUC-PC,0.99126
accuracy,0.9243
f1 macro,0.84433


[34m[1mwandb[0m: Agent Starting Run: l0g9as72 with config:
[34m[1mwandb[0m: 	C: 4.308244984380963
[34m[1mwandb[0m: 	penalty: l2
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	solver: lbfgs


0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.95963
AUC-PC,0.99152
accuracy,0.92918
f1 macro,0.85842


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: bppf67sv with config:
[34m[1mwandb[0m: 	C: 3.829196324709935
[34m[1mwandb[0m: 	penalty: l2
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	solver: lbfgs


0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.95938
AUC-PC,0.99148
accuracy,0.92674
f1 macro,0.85252


[34m[1mwandb[0m: Agent Starting Run: b1tl4k3w with config:
[34m[1mwandb[0m: 	C: 0.2413344617109359
[34m[1mwandb[0m: 	penalty: l2
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	solver: lbfgs


0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.93507
AUC-PC,0.9863
accuracy,0.87057
f1 macro,0.6592


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: c8zaan1g with config:
[34m[1mwandb[0m: 	C: 5.02790190056746
[34m[1mwandb[0m: 	penalty: l2
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	solver: lbfgs


0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.96006
AUC-PC,0.99157
accuracy,0.92918
f1 macro,0.85842


[34m[1mwandb[0m: Agent Starting Run: j1ocqipm with config:
[34m[1mwandb[0m: 	C: 8.928909389549027
[34m[1mwandb[0m: 	penalty: l2
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	solver: lbfgs


0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.96085
AUC-PC,0.99154
accuracy,0.93162
f1 macro,0.86695


[34m[1mwandb[0m: Agent Starting Run: vn1ahp0o with config:
[34m[1mwandb[0m: 	C: 8.96667124544859
[34m[1mwandb[0m: 	penalty: l2
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	solver: lbfgs


0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.96085
AUC-PC,0.99153
accuracy,0.93162
f1 macro,0.86695


0.001521652704121228

#### liar

In [16]:
#Don't forget to name the sweep instance  
name = 'lr_tfidf_liar' #change here
sweep_config['name'] =  name

#Generate a sweep_id
sweep_id = wandb.sweep(sweep_config, project="hyperopt")

def train_tfidf(config = None,
          train=train_liar, #Change here
          val=val_liar): #change here
    '''
    Generic WandB function to conduct hyperparameter optimization with tf-idf vectorizer
    '''
    # Initialize a new wandb run
    with wandb.init(config=config,group=name):
        config = wandb.config
        vec = TfidfVectorizer()
        clf = LogisticRegression(C = config.C,
                                 penalty = config.penalty,
                                 solver = config.solver,
                                 random_state=config.random_state) #set the hyperparams here
        
        #Create the pipeline
        pipe = Pipeline([('vectorizer',vec),('clf',clf)])
        #Fit the pipeline
        pipe.fit(train['text'],train['label'])
        
        #Make predictions
        pred_val = pipe.predict(val['text'])
        pred_prob_val = pipe.predict_proba(val['text'])[:,1]
        accuracy = accuracy_score(val['label'],pred_val)
        f1_macro = f1_score(val['label'],pred_val,average='macro')
        if train['label'].nunique() <=2:
            aucpc =  average_precision_score(val['label'],pred_prob_val)
            auc = roc_auc_score(val['label'],pred_prob_val)
        else:
            aucpc = '-'
            auc = '-'
        #Log metrics on WandB
        wandb.log({"accuracy": accuracy, "f1 macro":f1_macro, "AUC-PC":aucpc, 'AUC':auc })

#Track emissions
tracker = EmissionsTracker(project_name=name,log_level='warning', measure_power_secs=300,
                           output_file='output/emissions_hyperopt.csv')
#Launch the agent
tracker.start()
wandb.agent(sweep_id, train_tfidf,count=OPT_ITER)
tracker.stop()



Create sweep with ID: ht9azxs3
Sweep URL: https://wandb.ai/benchmark-nlp/hyperopt/sweeps/ht9azxs3


[34m[1mwandb[0m: Agent Starting Run: b1qbe92a with config:
[34m[1mwandb[0m: 	C: 0.04453077629431812
[34m[1mwandb[0m: 	penalty: l2
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	solver: lbfgs


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁
f1 macro,▁

0,1
AUC,-
AUC-PC,-
accuracy,0.21573
f1 macro,0.13287


[34m[1mwandb[0m: Agent Starting Run: cwuuaa88 with config:
[34m[1mwandb[0m: 	C: 8.162531974356131
[34m[1mwandb[0m: 	penalty: l2
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	solver: lbfgs


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁
f1 macro,▁

0,1
AUC,-
AUC-PC,-
accuracy,0.23364
f1 macro,0.22706


[34m[1mwandb[0m: Agent Starting Run: kp06e3rc with config:
[34m[1mwandb[0m: 	C: 5.863049326087397
[34m[1mwandb[0m: 	penalty: l2
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	solver: lbfgs


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁
f1 macro,▁

0,1
AUC,-
AUC-PC,-
accuracy,0.23209
f1 macro,0.22459


[34m[1mwandb[0m: Agent Starting Run: zfbzjn81 with config:
[34m[1mwandb[0m: 	C: 0.07721496010830275
[34m[1mwandb[0m: 	penalty: l2
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	solver: lbfgs


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁
f1 macro,▁

0,1
AUC,-
AUC-PC,-
accuracy,0.24221
f1 macro,0.16572


[34m[1mwandb[0m: Agent Starting Run: vljc829s with config:
[34m[1mwandb[0m: 	C: 4.225969401958578
[34m[1mwandb[0m: 	penalty: l2
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	solver: lbfgs


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁
f1 macro,▁

0,1
AUC,-
AUC-PC,-
accuracy,0.23209
f1 macro,0.22173


[34m[1mwandb[0m: Agent Starting Run: 8t0rfx2d with config:
[34m[1mwandb[0m: 	C: 7.346064889264405
[34m[1mwandb[0m: 	penalty: l2
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	solver: lbfgs


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁
f1 macro,▁

0,1
AUC,-
AUC-PC,-
accuracy,0.23676
f1 macro,0.23147


[34m[1mwandb[0m: Agent Starting Run: 0wrn948m with config:
[34m[1mwandb[0m: 	C: 8.4102574185806
[34m[1mwandb[0m: 	penalty: l2
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	solver: lbfgs


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁
f1 macro,▁

0,1
AUC,-
AUC-PC,-
accuracy,0.23287
f1 macro,0.22559


[34m[1mwandb[0m: Agent Starting Run: xaob4k5q with config:
[34m[1mwandb[0m: 	C: 7.422979934573297
[34m[1mwandb[0m: 	penalty: l2
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	solver: lbfgs


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁
f1 macro,▁

0,1
AUC,-
AUC-PC,-
accuracy,0.23442
f1 macro,0.22628


[34m[1mwandb[0m: Agent Starting Run: jfhwz4fq with config:
[34m[1mwandb[0m: 	C: 8.204712532266917
[34m[1mwandb[0m: 	penalty: l2
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	solver: lbfgs


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁
f1 macro,▁

0,1
AUC,-
AUC-PC,-
accuracy,0.2352
f1 macro,0.22861


[34m[1mwandb[0m: Agent Starting Run: o1m9krwk with config:
[34m[1mwandb[0m: 	C: 9.151164608732278
[34m[1mwandb[0m: 	penalty: l2
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	solver: lbfgs


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁
f1 macro,▁

0,1
AUC,-
AUC-PC,-
accuracy,0.23131
f1 macro,0.2242


0.0011501132224013269

In [13]:
#Don't forget to name the sweep instance   
name = 'lr_ft_liar' #CHANGE HERE
sweep_config['name'] = name
#Generate a sweep_id
sweep_id = wandb.sweep(sweep_config, project="hyperopt")

def train_fasttext(config = None,
          train=embedded_train_liar, #CHANGE HERE
          val=embedded_val_liar): #CHANGE HERE
    # Initialize a new wandb run
    with wandb.init(config=config, group=name):
        config = wandb.config
        clf = LogisticRegression(C = config.C,
                                 penalty = config.penalty,
                                 solver = config.solver,
                                 random_state=config.random_state) #set the hyperparams here
        pipe = Pipeline([('clf',clf)])
        pipe.fit(train.fillna(0).drop(['label'],axis=1),train['label'])
        
        #Make predictions
        pred_val = pipe.predict(val.fillna(0).drop(['label'],axis=1))
        pred_prob_val = pipe.predict_proba(val.fillna(0).drop(['label'],axis=1))[:,1]
        accuracy = accuracy_score(val['label'],pred_val)
        f1_macro = f1_score(val['label'],pred_val,average='macro')
        if train['label'].nunique() <=2:
            aucpc =  average_precision_score(val['label'],pred_prob_val)
            auc = roc_auc_score(val['label'],pred_prob_val)
            #Log predictions on WandB
        else:
            aucpc = '-'
            auc = '-'
        wandb.log({"accuracy": accuracy, "f1 macro":f1_macro, "AUC-PC":aucpc, 'AUC':auc })


#Track emissions
tracker = EmissionsTracker(project_name=name,log_level='warning', measure_power_secs=300,
                           output_file='output/emissions_hyperopt.csv')
#Launch the agent
tracker.start()
wandb.agent(sweep_id, train_fasttext,count=OPT_ITER)
tracker.stop()

Create sweep with ID: s4iwuslh
Sweep URL: https://wandb.ai/benchmark-nlp/hyperopt/sweeps/s4iwuslh


NameError: name 'embedded_train_liar' is not defined