In [1]:
#Connect to wandb
#TO DO : how to save models on the weight and bias platform
import wandb
wandb.login()
wandb.init(project="xgb", 
           entity="benchmark-nlp",
           name='fake news datasets') #CHANGE

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mjtonglet[0m ([33mbenchmark-nlp[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [2]:
import os
#Move back to the root directory of the project
os.chdir('../../..')

In [3]:
#Load packages
import warnings
import io
import numpy as np
import pandas as pd
from codecarbon import EmissionsTracker
import yaml
from util.dataloader import DataLoader
from preprocessing.preprocessor import Preprocessor
from util.datasplitter import data_splitter
from preprocessing.fasttext_embeddings import FastTextEmbeddings
from sklearn.feature_extraction.text import TfidfVectorizer
from xgboost import XGBClassifier
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, average_precision_score
warnings.filterwarnings("ignore")

In [4]:
#Set constant values
SEED=42
OPT_ITER=20

## Load data

In [5]:
dl = DataLoader(['fake_news'])
data = dl.load()


tweet_preprocessor = Preprocessor(is_tweet=True)
preprocessor = Preprocessor()

#We are not interested in the test sets for hyperparameter optimization

train_gossipcop, val_gossipcop, _ = data_splitter(data['gossipcop'],
                                 preprocessor, 
                                 create_val_set=True,   #No validation set is provided
                                 seed=SEED)


100%|███████████████████████████████████████████████████████████████████████████| 13267/13267 [01:38<00:00, 134.11it/s]
100%|█████████████████████████████████████████████████████████████████████████████| 5323/5323 [00:41<00:00, 128.64it/s]


18590 rows preprocessed in 97.84012413024902 seconds


In [6]:
train_coaid, val_coaid, _ = data_splitter(data['CoAID'],
                                 preprocessor, 
                                 create_val_set=True,   #No validation set is provided
                                 test_split=0.25,
                                 val_split=0.2,
                                 seed=SEED)
train_liar, val_liar, _ = data_splitter(data['liar'],
                                 preprocessor, 
                                 create_val_set=True,   #No validation set is provided
                                 seed=SEED)

In [7]:
%%time
#fasttext 
fasttext = FastTextEmbeddings()
fasttext.load_model('fasttext/cc.en.300.bin')

CPU times: total: 17.9 s
Wall time: 34.1 s




In [8]:
embedded_train_gossipcop = fasttext.generate_sentence_embeddings(train_gossipcop['text'])
embedded_val_gossipcop = fasttext.generate_sentence_embeddings(val_gossipcop['text'])
embedded_train_gossipcop['label'] = train_gossipcop['label'].to_list()
embedded_val_gossipcop['label'] = val_gossipcop['label'].to_list()

Starting to generate sentence embeddings


100%|████████████████████████████████████████████████████████████████████████████| 11897/11897 [04:10<00:00, 47.50it/s]


Starting to generate sentence embeddings


100%|██████████████████████████████████████████████████████████████████████████████| 2975/2975 [01:05<00:00, 45.41it/s]


In [17]:
embedded_train_coaid = fasttext.generate_sentence_embeddings(train_coaid['text'])
embedded_val_coaid = fasttext.generate_sentence_embeddings(val_coaid['text'])
embedded_train_coaid['label'] = train_coaid['label'].to_list()
embedded_val_coaid['label'] = val_coaid['label'].to_list()

Starting to generate sentence embeddings


100%|█████████████████████████████████████████████████████████████████████████████| 3273/3273 [00:09<00:00, 351.32it/s]


Starting to generate sentence embeddings


100%|███████████████████████████████████████████████████████████████████████████████| 819/819 [00:02<00:00, 373.09it/s]


In [9]:
embedded_train_liar = fasttext.generate_sentence_embeddings(train_liar['text'])
embedded_val_liar = fasttext.generate_sentence_embeddings(val_liar['text'])
embedded_train_liar['label'] = train_liar['label'].to_list()
embedded_val_liar['label'] = val_liar['label'].to_list()

Starting to generate sentence embeddings


100%|███████████████████████████████████████████████████████████████████████████| 10269/10269 [00:36<00:00, 279.58it/s]


Starting to generate sentence embeddings


100%|█████████████████████████████████████████████████████████████████████████████| 1284/1284 [00:03<00:00, 407.03it/s]


## Hyperopt

In [9]:
import yaml
#Load the template yaml sweep config file for logistic regression
#If the value range for an hyperparameter needs to be changed, better to do it in the .yaml file than in a notebook
with open("config/xgb_sweep.yaml", 'r') as stream:
    sweep_config = yaml.safe_load(stream)

In [10]:
#The config is displayed as a nested dictionary
sweep_config

{'method': 'random',
 'entity': 'benchmark-nlp',
 'project': 'hyperopt',
 'metric': {'name': 'loss', 'goal': 'minimize'},
 'parameters': {'gamma': {'min': 0, 'max': 1, 'distribution': 'uniform'},
  'n_estimators': {'min': 10, 'max': 150, 'distribution': 'int_uniform'},
  'learning_rate': {'min': 0.001, 'max': 0.1, 'distribution': 'uniform'},
  'max_depth': {'min': 0, 'max': 10, 'distribution': 'int_uniform'},
  'random_state': {'value': 42}}}

#### gossipcop

In [None]:
#Don't forget to name the sweep instance  
name = 'xgb_tfidf_gossipcop' #change here
sweep_config['name'] =  name

#Generate a sweep_id
sweep_id = wandb.sweep(sweep_config, project="xgb")

def train_tfidf(config = None,
          train=train_gossipcop, #Change here
          val=val_gossipcop): #change here
    '''
    Generic WandB function to conduct hyperparameter optimization with tf-idf vectorizer
    '''
    # Initialize a new wandb run
    with wandb.init(config=config,group=name):
        # If called by wandb.agent, as below,
        # this config will be set by Sweep Controller
        config = wandb.config
        vec = TfidfVectorizer()
        clf = XGBClassifier(gamma=config.gamma,
                                 n_estimators=config.n_estimators,
                                 learning_rate=config.learning_rate,
                                 max_depth=config.max_depth,
                                 random_state=config.random_state) #set the hyperparams here
        
        #Create the pipeline
        pipe = Pipeline([('vectorizer',vec),('clf',clf)])
        #Fit the pipeline
        pipe.fit(train['text'],train['label'])
        
        #Make predictions
        pred_val = pipe.predict(val['text'])
        pred_prob_val = pipe.predict_proba(val['text'])[:,1]
        accuracy = accuracy_score(val['label'],pred_val)
        f1_macro = f1_score(val['label'],pred_val,average='macro')
        if train['label'].nunique() <=2:
            aucpc =  average_precision_score(val['label'],pred_prob_val)
            auc = roc_auc_score(val['label'],pred_prob_val)
        else:
            aucpc = '-'
            auc = '-'
        #Log metrics on WandB
        wandb.log({"accuracy": accuracy, "f1 macro":f1_macro, "AUC-PC":aucpc, 'AUC':auc })

#Track emissions
tracker = EmissionsTracker(project_name=name,log_level='warning', measure_power_secs=300,
                           output_file='output/emissions_hyperopt.csv')
#Launch the agent
tracker.start()
wandb.agent(sweep_id, train_tfidf,count=OPT_ITER)
tracker.stop()

In [None]:
#Don't forget to name the sweep instance   
name = 'xgb_ft_gossipcop' #CHANGE HERE
sweep_config['name'] = name
#Generate a sweep_id
sweep_id = wandb.sweep(sweep_config, project="xgb")

def train_fasttext(config = None,
          train=embedded_train_gossipcop, #CHANGE HERE
          val=embedded_val_gossipcop): #CHANGE HERE
    # Initialize a new wandb run
    with wandb.init(config=config, group=name):
        config = wandb.config
        clf = XGBClassifier(gamma=config.gamma,
                                 n_estimators=config.n_estimators,
                                 learning_rate=config.learning_rate,
                                 max_depth=config.max_depth,
                                 random_state=config.random_state) #set the hyperparams here
        pipe = Pipeline([('clf',clf)])
        pipe.fit(train.fillna(0).drop(['label'],axis=1),train['label'])
        
        #Make predictions
        pred_val = pipe.predict(val.fillna(0).drop(['label'],axis=1))
        pred_prob_val = pipe.predict_proba(val.fillna(0).drop(['label'],axis=1))[:,1]
        accuracy = accuracy_score(val['label'],pred_val)
        f1_macro = f1_score(val['label'],pred_val,average='macro')
        if train['label'].nunique() <=2:
            aucpc =  average_precision_score(val['label'],pred_prob_val)
            auc = roc_auc_score(val['label'],pred_prob_val)
            #Log predictions on WandB
        else:
            aucpc = '-'
            auc = '-'
        wandb.log({"accuracy": accuracy, "f1 macro":f1_macro, "AUC-PC":aucpc, 'AUC':auc })


#Track emissions
tracker = EmissionsTracker(project_name=name,log_level='warning', measure_power_secs=300,
                           output_file='output/emissions_hyperopt.csv')
#Launch the agent
tracker.start()
wandb.agent(sweep_id, train_fasttext,count=OPT_ITER)
tracker.stop()

####  CoAiD



In [12]:
#Don't forget to name the sweep instance  
name = 'xgb_tfidf_coaid' #change here
sweep_config['name'] =  name

#Generate a sweep_id
sweep_id = wandb.sweep(sweep_config, project="xgb")

def train_tfidf(config = None,
          train=train_coaid, #Change here
          val=val_coaid): #change here
    '''
    Generic WandB function to conduct hyperparameter optimization with tf-idf vectorizer
    '''
    # Initialize a new wandb run
    with wandb.init(config=config,group=name):
        # If called by wandb.agent, as below,
        # this config will be set by Sweep Controller
        config = wandb.config
        vec = TfidfVectorizer()
        clf = XGBClassifier(gamma=config.gamma,
                                 n_estimators=config.n_estimators,
                                 learning_rate=config.learning_rate,
                                 max_depth=config.max_depth,
                                 random_state=config.random_state) #set the hyperparams here
        
        #Create the pipeline
        pipe = Pipeline([('vectorizer',vec),('clf',clf)])
        #Fit the pipeline
        pipe.fit(train['text'],train['label'])
        
        #Make predictions
        pred_val = pipe.predict(val['text'])
        pred_prob_val = pipe.predict_proba(val['text'])[:,1]
        accuracy = accuracy_score(val['label'],pred_val)
        f1_macro = f1_score(val['label'],pred_val,average='macro')
        if train['label'].nunique() <=2:
            aucpc =  average_precision_score(val['label'],pred_prob_val)
            auc = roc_auc_score(val['label'],pred_prob_val)
        else:
            aucpc = '-'
            auc = '-'
        #Log metrics on WandB
        wandb.log({"accuracy": accuracy, "f1 macro":f1_macro, "AUC-PC":aucpc, 'AUC':auc })

#Track emissions
tracker = EmissionsTracker(project_name=name,log_level='warning', measure_power_secs=300,
                           output_file='output/emissions_hyperopt.csv')
#Launch the agent
tracker.start()
wandb.agent(sweep_id, train_tfidf,count=OPT_ITER)
tracker.stop()



Create sweep with ID: 3jaychrx
Sweep URL: https://wandb.ai/benchmark-nlp/xgb/sweeps/3jaychrx


[34m[1mwandb[0m: Agent Starting Run: 2rqk7f7e with config:
[34m[1mwandb[0m: 	gamma: 0.803627837598068
[34m[1mwandb[0m: 	learning_rate: 0.01487720120326014
[34m[1mwandb[0m: 	max_depth: 3
[34m[1mwandb[0m: 	n_estimators: 63
[34m[1mwandb[0m: 	random_state: 42


0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.89538
AUC-PC,0.96725
accuracy,0.89133
f1 macro,0.77734


[34m[1mwandb[0m: Agent Starting Run: n4skxv8u with config:
[34m[1mwandb[0m: 	gamma: 0.5423461612102266
[34m[1mwandb[0m: 	learning_rate: 0.01077590317175046
[34m[1mwandb[0m: 	max_depth: 1
[34m[1mwandb[0m: 	n_estimators: 17
[34m[1mwandb[0m: 	random_state: 42


0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.62427
AUC-PC,0.86985
accuracy,0.87424
f1 macro,0.66372


[34m[1mwandb[0m: Agent Starting Run: kpt7cid7 with config:
[34m[1mwandb[0m: 	gamma: 0.4683219864232014
[34m[1mwandb[0m: 	learning_rate: 0.06877951538234965
[34m[1mwandb[0m: 	max_depth: 2
[34m[1mwandb[0m: 	n_estimators: 46
[34m[1mwandb[0m: 	random_state: 42


0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.92179
AUC-PC,0.98019
accuracy,0.88889
f1 macro,0.76017


[34m[1mwandb[0m: Agent Starting Run: 8v19ihm7 with config:
[34m[1mwandb[0m: 	gamma: 0.9716085411365712
[34m[1mwandb[0m: 	learning_rate: 0.09904787767989837
[34m[1mwandb[0m: 	max_depth: 9
[34m[1mwandb[0m: 	n_estimators: 33
[34m[1mwandb[0m: 	random_state: 42


0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.95511
AUC-PC,0.99023
accuracy,0.91331
f1 macro,0.82609


[34m[1mwandb[0m: Agent Starting Run: vdb5f3y4 with config:
[34m[1mwandb[0m: 	gamma: 0.3242368715523102
[34m[1mwandb[0m: 	learning_rate: 0.02155808610619455
[34m[1mwandb[0m: 	max_depth: 2
[34m[1mwandb[0m: 	n_estimators: 76
[34m[1mwandb[0m: 	random_state: 42


0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.89646
AUC-PC,0.96895
accuracy,0.88889
f1 macro,0.76902


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 9obyahl4 with config:
[34m[1mwandb[0m: 	gamma: 0.2877911875214736
[34m[1mwandb[0m: 	learning_rate: 0.04016598117888619
[34m[1mwandb[0m: 	max_depth: 0
[34m[1mwandb[0m: 	n_estimators: 56
[34m[1mwandb[0m: 	random_state: 42


0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.5
AUC-PC,0.83394
accuracy,0.83394
f1 macro,0.45473


[34m[1mwandb[0m: Agent Starting Run: fkaapeur with config:
[34m[1mwandb[0m: 	gamma: 0.7715185498225615
[34m[1mwandb[0m: 	learning_rate: 0.06550310755264453
[34m[1mwandb[0m: 	max_depth: 6
[34m[1mwandb[0m: 	n_estimators: 139
[34m[1mwandb[0m: 	random_state: 42


0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.96215
AUC-PC,0.99215
accuracy,0.92796
f1 macro,0.86879


[34m[1mwandb[0m: Agent Starting Run: 8muu3f18 with config:
[34m[1mwandb[0m: 	gamma: 0.1729760621456191
[34m[1mwandb[0m: 	learning_rate: 0.08208579175316763
[34m[1mwandb[0m: 	max_depth: 10
[34m[1mwandb[0m: 	n_estimators: 110
[34m[1mwandb[0m: 	random_state: 42


0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.96403
AUC-PC,0.9926
accuracy,0.9243
f1 macro,0.86413


[34m[1mwandb[0m: Agent Starting Run: bcrxlvz2 with config:
[34m[1mwandb[0m: 	gamma: 0.6044010689556408
[34m[1mwandb[0m: 	learning_rate: 0.02791943606569448
[34m[1mwandb[0m: 	max_depth: 6
[34m[1mwandb[0m: 	n_estimators: 37
[34m[1mwandb[0m: 	random_state: 42


0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.9165
AUC-PC,0.97415
accuracy,0.90476
f1 macro,0.80556


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: juhl4yvu with config:
[34m[1mwandb[0m: 	gamma: 0.7659939243910101
[34m[1mwandb[0m: 	learning_rate: 0.025789124395491657
[34m[1mwandb[0m: 	max_depth: 6
[34m[1mwandb[0m: 	n_estimators: 89
[34m[1mwandb[0m: 	random_state: 42


0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.94154
AUC-PC,0.98634
accuracy,0.91087
f1 macro,0.81867


[34m[1mwandb[0m: Agent Starting Run: n44p3lau with config:
[34m[1mwandb[0m: 	gamma: 0.1327088386198314
[34m[1mwandb[0m: 	learning_rate: 0.04724542469294626
[34m[1mwandb[0m: 	max_depth: 7
[34m[1mwandb[0m: 	n_estimators: 47
[34m[1mwandb[0m: 	random_state: 42


0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.9447
AUC-PC,0.98716
accuracy,0.91087
f1 macro,0.81994


[34m[1mwandb[0m: Agent Starting Run: z5r10qjd with config:
[34m[1mwandb[0m: 	gamma: 0.27677369154139453
[34m[1mwandb[0m: 	learning_rate: 0.05641101369673189
[34m[1mwandb[0m: 	max_depth: 5
[34m[1mwandb[0m: 	n_estimators: 34
[34m[1mwandb[0m: 	random_state: 42


0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.93058
AUC-PC,0.98193
accuracy,0.90354
f1 macro,0.80093


[34m[1mwandb[0m: Agent Starting Run: y6se2ikt with config:
[34m[1mwandb[0m: 	gamma: 0.9251419404967044
[34m[1mwandb[0m: 	learning_rate: 0.09688526049812832
[34m[1mwandb[0m: 	max_depth: 4
[34m[1mwandb[0m: 	n_estimators: 115
[34m[1mwandb[0m: 	random_state: 42


0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.96122
AUC-PC,0.99192
accuracy,0.92796
f1 macro,0.86879


[34m[1mwandb[0m: Agent Starting Run: 53elkcjf with config:
[34m[1mwandb[0m: 	gamma: 0.473868302623215
[34m[1mwandb[0m: 	learning_rate: 0.025126179716554897
[34m[1mwandb[0m: 	max_depth: 9
[34m[1mwandb[0m: 	n_estimators: 82
[34m[1mwandb[0m: 	random_state: 42


0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.94595
AUC-PC,0.98784
accuracy,0.91453
f1 macro,0.82913


[34m[1mwandb[0m: Agent Starting Run: b11y9u1p with config:
[34m[1mwandb[0m: 	gamma: 0.3997953694581795
[34m[1mwandb[0m: 	learning_rate: 0.0972159338575958
[34m[1mwandb[0m: 	max_depth: 3
[34m[1mwandb[0m: 	n_estimators: 82
[34m[1mwandb[0m: 	random_state: 42


0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.95333
AUC-PC,0.98974
accuracy,0.90965
f1 macro,0.81553


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 8yoq96u7 with config:
[34m[1mwandb[0m: 	gamma: 0.7599854907985318
[34m[1mwandb[0m: 	learning_rate: 0.0644567439837874
[34m[1mwandb[0m: 	max_depth: 6
[34m[1mwandb[0m: 	n_estimators: 12
[34m[1mwandb[0m: 	random_state: 42


0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.91498
AUC-PC,0.97327
accuracy,0.90476
f1 macro,0.80693


[34m[1mwandb[0m: Agent Starting Run: gj5vu2t6 with config:
[34m[1mwandb[0m: 	gamma: 0.7924626911397618
[34m[1mwandb[0m: 	learning_rate: 0.007361571801752195
[34m[1mwandb[0m: 	max_depth: 5
[34m[1mwandb[0m: 	n_estimators: 117
[34m[1mwandb[0m: 	random_state: 42


0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.9099
AUC-PC,0.97181
accuracy,0.9011
f1 macro,0.79879


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 8sxsncj3 with config:
[34m[1mwandb[0m: 	gamma: 0.6525248434891573
[34m[1mwandb[0m: 	learning_rate: 0.0918220653535241
[34m[1mwandb[0m: 	max_depth: 2
[34m[1mwandb[0m: 	n_estimators: 107
[34m[1mwandb[0m: 	random_state: 42


0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.95103
AUC-PC,0.9891
accuracy,0.90965
f1 macro,0.8142


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: soyl99po with config:
[34m[1mwandb[0m: 	gamma: 0.8432000119288168
[34m[1mwandb[0m: 	learning_rate: 0.08892216573945395
[34m[1mwandb[0m: 	max_depth: 10
[34m[1mwandb[0m: 	n_estimators: 30
[34m[1mwandb[0m: 	random_state: 42


0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.95124
AUC-PC,0.98919
accuracy,0.91453
f1 macro,0.8303


[34m[1mwandb[0m: Agent Starting Run: xoqjsi8y with config:
[34m[1mwandb[0m: 	gamma: 0.01257341857072758
[34m[1mwandb[0m: 	learning_rate: 0.02825924609698786
[34m[1mwandb[0m: 	max_depth: 0
[34m[1mwandb[0m: 	n_estimators: 93
[34m[1mwandb[0m: 	random_state: 42


0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.5
AUC-PC,0.83394
accuracy,0.83394
f1 macro,0.45473


0.0034603223140008657

In [25]:
#Don't forget to name the sweep instance   
name = 'xgb_ft_coaid' #CHANGE HERE
sweep_config['name'] = name
#Generate a sweep_id
sweep_id = wandb.sweep(sweep_config, project="xgb")

def train_fasttext(config = None,
          train=embedded_train_coaid, #CHANGE HERE
          val=embedded_val_coaid): #CHANGE HERE
    # Initialize a new wandb run
    with wandb.init(config=config, group=name):
        config = wandb.config
        clf = XGBClassifier(gamma=config.gamma,
                                 n_estimators=config.n_estimators,
                                 learning_rate=config.learning_rate,
                                 max_depth=config.max_depth,
                                 random_state=config.random_state) #set the hyperparams here
        pipe = Pipeline([('clf',clf)])
        pipe.fit(train.fillna(0).drop(['label'],axis=1),train['label'])
        
        #Make predictions
        pred_val = pipe.predict(val.fillna(0).drop(['label'],axis=1))
        pred_prob_val = pipe.predict_proba(val.fillna(0).drop(['label'],axis=1))[:,1]
        accuracy = accuracy_score(val['label'],pred_val)
        f1_macro = f1_score(val['label'],pred_val,average='macro')
        if train['label'].nunique() <=2:
            aucpc =  average_precision_score(val['label'],pred_prob_val)
            auc = roc_auc_score(val['label'],pred_prob_val)
            #Log predictions on WandB
        else:
            aucpc = '-'
            auc = '-'
        wandb.log({"accuracy": accuracy, "f1 macro":f1_macro, "AUC-PC":aucpc, 'AUC':auc })


#Track emissions
tracker = EmissionsTracker(project_name=name,log_level='warning', measure_power_secs=300,
                           output_file='output/emissions_hyperopt.csv')
#Launch the agent
tracker.start()
wandb.agent(sweep_id, train_fasttext,count=OPT_ITER)
tracker.stop()



Create sweep with ID: 2lkcm8ev
Sweep URL: https://wandb.ai/benchmark-nlp/xgb/sweeps/2lkcm8ev


[34m[1mwandb[0m: Agent Starting Run: 3ttvjndu with config:
[34m[1mwandb[0m: 	gamma: 0.9229902247386036
[34m[1mwandb[0m: 	learning_rate: 0.08225241931294472
[34m[1mwandb[0m: 	max_depth: 4
[34m[1mwandb[0m: 	n_estimators: 17
[34m[1mwandb[0m: 	random_state: 42


0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.9571
AUC-PC,0.98967
accuracy,0.91331
f1 macro,0.82363


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: tho4bbwv with config:
[34m[1mwandb[0m: 	gamma: 0.17997929845326832
[34m[1mwandb[0m: 	learning_rate: 0.06510934539035863
[34m[1mwandb[0m: 	max_depth: 2
[34m[1mwandb[0m: 	n_estimators: 34
[34m[1mwandb[0m: 	random_state: 42


0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.95621
AUC-PC,0.99087
accuracy,0.90598
f1 macro,0.79387


[34m[1mwandb[0m: Agent Starting Run: o78q3l6i with config:
[34m[1mwandb[0m: 	gamma: 0.03029312543971019
[34m[1mwandb[0m: 	learning_rate: 0.06458164031533263
[34m[1mwandb[0m: 	max_depth: 9
[34m[1mwandb[0m: 	n_estimators: 131
[34m[1mwandb[0m: 	random_state: 42


0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.97193
AUC-PC,0.99427
accuracy,0.93529
f1 macro,0.87612


[34m[1mwandb[0m: Agent Starting Run: 3cjboo0u with config:
[34m[1mwandb[0m: 	gamma: 0.5441156632961585
[34m[1mwandb[0m: 	learning_rate: 0.03308685857967908
[34m[1mwandb[0m: 	max_depth: 5
[34m[1mwandb[0m: 	n_estimators: 68
[34m[1mwandb[0m: 	random_state: 42


0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.96416
AUC-PC,0.99247
accuracy,0.9304
f1 macro,0.86502


[34m[1mwandb[0m: Agent Starting Run: xlh06rzm with config:
[34m[1mwandb[0m: 	gamma: 0.08394467028045138
[34m[1mwandb[0m: 	learning_rate: 0.08588011786629841
[34m[1mwandb[0m: 	max_depth: 1
[34m[1mwandb[0m: 	n_estimators: 73
[34m[1mwandb[0m: 	random_state: 42


0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.9558
AUC-PC,0.9906
accuracy,0.91209
f1 macro,0.80951


[34m[1mwandb[0m: Agent Starting Run: eigo0o5m with config:
[34m[1mwandb[0m: 	gamma: 0.4320307598434917
[34m[1mwandb[0m: 	learning_rate: 0.05424057083056191
[34m[1mwandb[0m: 	max_depth: 1
[34m[1mwandb[0m: 	n_estimators: 34
[34m[1mwandb[0m: 	random_state: 42


0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.9368
AUC-PC,0.98645
accuracy,0.86813
f1 macro,0.64184


[34m[1mwandb[0m: Agent Starting Run: go0vduex with config:
[34m[1mwandb[0m: 	gamma: 0.2006401384595673
[34m[1mwandb[0m: 	learning_rate: 0.06656621643910683
[34m[1mwandb[0m: 	max_depth: 8
[34m[1mwandb[0m: 	n_estimators: 84
[34m[1mwandb[0m: 	random_state: 42


0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.96895
AUC-PC,0.99354
accuracy,0.93651
f1 macro,0.87727


[34m[1mwandb[0m: Agent Starting Run: 1s3tcf1n with config:
[34m[1mwandb[0m: 	gamma: 0.12146023619527237
[34m[1mwandb[0m: 	learning_rate: 0.045349045131090206
[34m[1mwandb[0m: 	max_depth: 2
[34m[1mwandb[0m: 	n_estimators: 23
[34m[1mwandb[0m: 	random_state: 42


0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.94018
AUC-PC,0.98714
accuracy,0.89744
f1 macro,0.76283


[34m[1mwandb[0m: Agent Starting Run: oal62ro6 with config:
[34m[1mwandb[0m: 	gamma: 0.26615722828733357
[34m[1mwandb[0m: 	learning_rate: 0.08354606187671874
[34m[1mwandb[0m: 	max_depth: 0
[34m[1mwandb[0m: 	n_estimators: 109
[34m[1mwandb[0m: 	random_state: 42


0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.5
AUC-PC,0.83394
accuracy,0.83394
f1 macro,0.45473


[34m[1mwandb[0m: Agent Starting Run: vj28korv with config:
[34m[1mwandb[0m: 	gamma: 0.9305010806584896
[34m[1mwandb[0m: 	learning_rate: 0.01384658654719058
[34m[1mwandb[0m: 	max_depth: 8
[34m[1mwandb[0m: 	n_estimators: 141
[34m[1mwandb[0m: 	random_state: 42


0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.96557
AUC-PC,0.99279
accuracy,0.93407
f1 macro,0.87338


[34m[1mwandb[0m: Agent Starting Run: 1zs5viqq with config:
[34m[1mwandb[0m: 	gamma: 0.4616730695814377
[34m[1mwandb[0m: 	learning_rate: 0.018826599436620062
[34m[1mwandb[0m: 	max_depth: 2
[34m[1mwandb[0m: 	n_estimators: 132
[34m[1mwandb[0m: 	random_state: 42


0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.95702
AUC-PC,0.99098
accuracy,0.91087
f1 macro,0.80761


[34m[1mwandb[0m: Agent Starting Run: x113ding with config:
[34m[1mwandb[0m: 	gamma: 0.4992842118161299
[34m[1mwandb[0m: 	learning_rate: 0.014702243120187796
[34m[1mwandb[0m: 	max_depth: 7
[34m[1mwandb[0m: 	n_estimators: 95
[34m[1mwandb[0m: 	random_state: 42


0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.96292
AUC-PC,0.99224
accuracy,0.94261
f1 macro,0.88943


[34m[1mwandb[0m: Agent Starting Run: h142wf4a with config:
[34m[1mwandb[0m: 	gamma: 0.06161200638014119
[34m[1mwandb[0m: 	learning_rate: 0.00277434164852012
[34m[1mwandb[0m: 	max_depth: 10
[34m[1mwandb[0m: 	n_estimators: 23
[34m[1mwandb[0m: 	random_state: 42


0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.89533
AUC-PC,0.96806
accuracy,0.88278
f1 macro,0.78586


[34m[1mwandb[0m: Agent Starting Run: m3zgfl9o with config:
[34m[1mwandb[0m: 	gamma: 0.3673011491606005
[34m[1mwandb[0m: 	learning_rate: 0.08900001882983456
[34m[1mwandb[0m: 	max_depth: 10
[34m[1mwandb[0m: 	n_estimators: 95
[34m[1mwandb[0m: 	random_state: 42


0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.97134
AUC-PC,0.99411
accuracy,0.93773
f1 macro,0.88231


[34m[1mwandb[0m: Agent Starting Run: 1tj99rok with config:
[34m[1mwandb[0m: 	gamma: 0.970710087167786
[34m[1mwandb[0m: 	learning_rate: 0.04265682039961101
[34m[1mwandb[0m: 	max_depth: 7
[34m[1mwandb[0m: 	n_estimators: 81
[34m[1mwandb[0m: 	random_state: 42


0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.96865
AUC-PC,0.99355
accuracy,0.93407
f1 macro,0.87255


[34m[1mwandb[0m: Agent Starting Run: r61ewvii with config:
[34m[1mwandb[0m: 	gamma: 0.188002049793602
[34m[1mwandb[0m: 	learning_rate: 0.023565667865723148
[34m[1mwandb[0m: 	max_depth: 10
[34m[1mwandb[0m: 	n_estimators: 96
[34m[1mwandb[0m: 	random_state: 42


0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.96443
AUC-PC,0.99255
accuracy,0.93651
f1 macro,0.87807


[34m[1mwandb[0m: Agent Starting Run: t1xeyfrd with config:
[34m[1mwandb[0m: 	gamma: 0.8735906367758377
[34m[1mwandb[0m: 	learning_rate: 0.004929891230001128
[34m[1mwandb[0m: 	max_depth: 10
[34m[1mwandb[0m: 	n_estimators: 109
[34m[1mwandb[0m: 	random_state: 42


0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.95306
AUC-PC,0.99
accuracy,0.93284
f1 macro,0.87544


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: vf3s994j with config:
[34m[1mwandb[0m: 	gamma: 0.551296450604776
[34m[1mwandb[0m: 	learning_rate: 0.03258382554192019
[34m[1mwandb[0m: 	max_depth: 1
[34m[1mwandb[0m: 	n_estimators: 21
[34m[1mwandb[0m: 	random_state: 42


0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.89465
AUC-PC,0.96741
accuracy,0.85836
f1 macro,0.59826


[34m[1mwandb[0m: Agent Starting Run: gnx7ksq8 with config:
[34m[1mwandb[0m: 	gamma: 0.4155623020813912
[34m[1mwandb[0m: 	learning_rate: 0.012040942563092238
[34m[1mwandb[0m: 	max_depth: 7
[34m[1mwandb[0m: 	n_estimators: 18
[34m[1mwandb[0m: 	random_state: 42


0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.93831
AUC-PC,0.98493
accuracy,0.89866
f1 macro,0.80967


[34m[1mwandb[0m: Agent Starting Run: jirq5u48 with config:
[34m[1mwandb[0m: 	gamma: 0.98885470736457
[34m[1mwandb[0m: 	learning_rate: 0.08222559858376109
[34m[1mwandb[0m: 	max_depth: 8
[34m[1mwandb[0m: 	n_estimators: 59
[34m[1mwandb[0m: 	random_state: 42


0,1
AUC,▁
AUC-PC,▁
accuracy,▁
f1 macro,▁

0,1
AUC,0.96873
AUC-PC,0.99349
accuracy,0.94017
f1 macro,0.88621


0.0032671196163521665

#### liar

In [15]:
#Don't forget to name the sweep instance  
name = 'xgb_tfidf_liar' #change here
sweep_config['name'] =  name

#Generate a sweep_id
sweep_id = wandb.sweep(sweep_config, project="xgb")

def train_tfidf(config = None,
          train=train_liar, #Change here
          val=val_liar): #change here
    '''
    Generic WandB function to conduct hyperparameter optimization with tf-idf vectorizer
    '''
    # Initialize a new wandb run
    with wandb.init(config=config,group=name):
        config = wandb.config
        vec = TfidfVectorizer()
        clf = XGBClassifier(gamma=config.gamma,
                                 n_estimators=config.n_estimators,
                                 learning_rate=config.learning_rate,
                                 max_depth=config.max_depth,
                                 random_state=config.random_state) #set the hyperparams here
        
        #Create the pipeline
        pipe = Pipeline([('vectorizer',vec),('clf',clf)])
        #Fit the pipeline
        pipe.fit(train['text'],train['label'])
        
        #Make predictions
        pred_val = pipe.predict(val['text'])
        pred_prob_val = pipe.predict_proba(val['text'])[:,1]
        accuracy = accuracy_score(val['label'],pred_val)
        f1_macro = f1_score(val['label'],pred_val,average='macro')
        if train['label'].nunique() <=2:
            aucpc =  average_precision_score(val['label'],pred_prob_val)
            auc = roc_auc_score(val['label'],pred_prob_val)
        else:
            aucpc = '-'
            auc = '-'
        #Log metrics on WandB
        wandb.log({"accuracy": accuracy, "f1 macro":f1_macro, "AUC-PC":aucpc, 'AUC':auc })

#Track emissions
tracker = EmissionsTracker(project_name=name,log_level='warning', measure_power_secs=300,
                           output_file='output/emissions_hyperopt.csv')
#Launch the agent
tracker.start()
wandb.agent(sweep_id, train_tfidf,count=OPT_ITER)
tracker.stop()

Create sweep with ID: 49qir2ww
Sweep URL: https://wandb.ai/benchmark-nlp/xgb/sweeps/49qir2ww


[34m[1mwandb[0m: Agent Starting Run: fbed0h82 with config:
[34m[1mwandb[0m: 	gamma: 0.12353902011952922
[34m[1mwandb[0m: 	learning_rate: 0.015059595514406703
[34m[1mwandb[0m: 	max_depth: 4
[34m[1mwandb[0m: 	n_estimators: 114
[34m[1mwandb[0m: 	random_state: 42


0,1
accuracy,▁
f1 macro,▁

0,1
AUC,-
AUC-PC,-
accuracy,0.2134
f1 macro,0.15171


[34m[1mwandb[0m: Agent Starting Run: hzu6oeeo with config:
[34m[1mwandb[0m: 	gamma: 0.10618271088403695
[34m[1mwandb[0m: 	learning_rate: 0.050215583347495686
[34m[1mwandb[0m: 	max_depth: 6
[34m[1mwandb[0m: 	n_estimators: 89
[34m[1mwandb[0m: 	random_state: 42


0,1
accuracy,▁
f1 macro,▁

0,1
AUC,-
AUC-PC,-
accuracy,0.24377
f1 macro,0.19328


[34m[1mwandb[0m: Agent Starting Run: x9rvbt73 with config:
[34m[1mwandb[0m: 	gamma: 0.17426476998942209
[34m[1mwandb[0m: 	learning_rate: 0.02225709390913024
[34m[1mwandb[0m: 	max_depth: 0
[34m[1mwandb[0m: 	n_estimators: 127
[34m[1mwandb[0m: 	random_state: 42


0,1
accuracy,▁
f1 macro,▁

0,1
AUC,-
AUC-PC,-
accuracy,0.19315
f1 macro,0.05396


[34m[1mwandb[0m: Agent Starting Run: vnjquw7q with config:
[34m[1mwandb[0m: 	gamma: 0.4274871978689072
[34m[1mwandb[0m: 	learning_rate: 0.029376831677582045
[34m[1mwandb[0m: 	max_depth: 2
[34m[1mwandb[0m: 	n_estimators: 148
[34m[1mwandb[0m: 	random_state: 42


0,1
accuracy,▁
f1 macro,▁

0,1
AUC,-
AUC-PC,-
accuracy,0.21262
f1 macro,0.14998


[34m[1mwandb[0m: Agent Starting Run: hp9bp4nd with config:
[34m[1mwandb[0m: 	gamma: 0.8846180694890843
[34m[1mwandb[0m: 	learning_rate: 0.015855858934449588
[34m[1mwandb[0m: 	max_depth: 0
[34m[1mwandb[0m: 	n_estimators: 15
[34m[1mwandb[0m: 	random_state: 42


0,1
accuracy,▁
f1 macro,▁

0,1
AUC,-
AUC-PC,-
accuracy,0.19315
f1 macro,0.05396


[34m[1mwandb[0m: Agent Starting Run: i7mra31q with config:
[34m[1mwandb[0m: 	gamma: 0.1779244238907215
[34m[1mwandb[0m: 	learning_rate: 0.02588683302319907
[34m[1mwandb[0m: 	max_depth: 2
[34m[1mwandb[0m: 	n_estimators: 60
[34m[1mwandb[0m: 	random_state: 42


0,1
accuracy,▁
f1 macro,▁

0,1
AUC,-
AUC-PC,-
accuracy,0.20794
f1 macro,0.1355


[34m[1mwandb[0m: Agent Starting Run: ufihmcze with config:
[34m[1mwandb[0m: 	gamma: 0.21975607101928096
[34m[1mwandb[0m: 	learning_rate: 0.0451662201431415
[34m[1mwandb[0m: 	max_depth: 1
[34m[1mwandb[0m: 	n_estimators: 122
[34m[1mwandb[0m: 	random_state: 42


0,1
accuracy,▁
f1 macro,▁

0,1
AUC,-
AUC-PC,-
accuracy,0.20639
f1 macro,0.13011


[34m[1mwandb[0m: Agent Starting Run: hzcocdz6 with config:
[34m[1mwandb[0m: 	gamma: 0.3666653165520216
[34m[1mwandb[0m: 	learning_rate: 0.01334071792046342
[34m[1mwandb[0m: 	max_depth: 10
[34m[1mwandb[0m: 	n_estimators: 144
[34m[1mwandb[0m: 	random_state: 42


0,1
accuracy,▁
f1 macro,▁

0,1
AUC,-
AUC-PC,-
accuracy,0.23598
f1 macro,0.20159


[34m[1mwandb[0m: Agent Starting Run: 31ywdmds with config:
[34m[1mwandb[0m: 	gamma: 0.15838972827216868
[34m[1mwandb[0m: 	learning_rate: 0.03849042353083415
[34m[1mwandb[0m: 	max_depth: 4
[34m[1mwandb[0m: 	n_estimators: 110
[34m[1mwandb[0m: 	random_state: 42


0,1
accuracy,▁
f1 macro,▁

0,1
AUC,-
AUC-PC,-
accuracy,0.20327
f1 macro,0.1522


[34m[1mwandb[0m: Agent Starting Run: abycg5iq with config:
[34m[1mwandb[0m: 	gamma: 0.13074743566444846
[34m[1mwandb[0m: 	learning_rate: 0.08119769426679056
[34m[1mwandb[0m: 	max_depth: 9
[34m[1mwandb[0m: 	n_estimators: 84
[34m[1mwandb[0m: 	random_state: 42


0,1
accuracy,▁
f1 macro,▁

0,1
AUC,-
AUC-PC,-
accuracy,0.22897
f1 macro,0.20005


[34m[1mwandb[0m: Agent Starting Run: mgznzo4x with config:
[34m[1mwandb[0m: 	gamma: 0.7671358876940165
[34m[1mwandb[0m: 	learning_rate: 0.039934925516250025
[34m[1mwandb[0m: 	max_depth: 3
[34m[1mwandb[0m: 	n_estimators: 143
[34m[1mwandb[0m: 	random_state: 42


0,1
accuracy,▁
f1 macro,▁

0,1
AUC,-
AUC-PC,-
accuracy,0.21729
f1 macro,0.16462


[34m[1mwandb[0m: Agent Starting Run: 128abetc with config:
[34m[1mwandb[0m: 	gamma: 0.6044994221242421
[34m[1mwandb[0m: 	learning_rate: 0.0915879640094272
[34m[1mwandb[0m: 	max_depth: 7
[34m[1mwandb[0m: 	n_estimators: 39
[34m[1mwandb[0m: 	random_state: 42


0,1
accuracy,▁
f1 macro,▁

0,1
AUC,-
AUC-PC,-
accuracy,0.23442
f1 macro,0.19041


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: lm1bcbzr with config:
[34m[1mwandb[0m: 	gamma: 0.2342714162834416
[34m[1mwandb[0m: 	learning_rate: 0.094334022212529
[34m[1mwandb[0m: 	max_depth: 0
[34m[1mwandb[0m: 	n_estimators: 76
[34m[1mwandb[0m: 	random_state: 42


0,1
accuracy,▁
f1 macro,▁

0,1
AUC,-
AUC-PC,-
accuracy,0.19315
f1 macro,0.05396


[34m[1mwandb[0m: Agent Starting Run: gmfh11ny with config:
[34m[1mwandb[0m: 	gamma: 0.35759789161113087
[34m[1mwandb[0m: 	learning_rate: 0.08316215623891453
[34m[1mwandb[0m: 	max_depth: 7
[34m[1mwandb[0m: 	n_estimators: 149
[34m[1mwandb[0m: 	random_state: 42


0,1
accuracy,▁
f1 macro,▁

0,1
AUC,-
AUC-PC,-
accuracy,0.2391
f1 macro,0.20701


[34m[1mwandb[0m: Agent Starting Run: w35gzuq0 with config:
[34m[1mwandb[0m: 	gamma: 0.8263035514282455
[34m[1mwandb[0m: 	learning_rate: 0.07857329834597163
[34m[1mwandb[0m: 	max_depth: 10
[34m[1mwandb[0m: 	n_estimators: 31
[34m[1mwandb[0m: 	random_state: 42


0,1
accuracy,▁
f1 macro,▁

0,1
AUC,-
AUC-PC,-
accuracy,0.23598
f1 macro,0.20071


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ej3g5zt8 with config:
[34m[1mwandb[0m: 	gamma: 0.16559762589000027
[34m[1mwandb[0m: 	learning_rate: 0.0813968683774145
[34m[1mwandb[0m: 	max_depth: 6
[34m[1mwandb[0m: 	n_estimators: 77
[34m[1mwandb[0m: 	random_state: 42


0,1
accuracy,▁
f1 macro,▁

0,1
AUC,-
AUC-PC,-
accuracy,0.2352
f1 macro,0.19219


[34m[1mwandb[0m: Agent Starting Run: puw3ciww with config:
[34m[1mwandb[0m: 	gamma: 0.6140842541720232
[34m[1mwandb[0m: 	learning_rate: 0.07429868667764379
[34m[1mwandb[0m: 	max_depth: 2
[34m[1mwandb[0m: 	n_estimators: 139
[34m[1mwandb[0m: 	random_state: 42


0,1
accuracy,▁
f1 macro,▁

0,1
AUC,-
AUC-PC,-
accuracy,0.22274
f1 macro,0.17461


[34m[1mwandb[0m: Agent Starting Run: niiase0r with config:
[34m[1mwandb[0m: 	gamma: 0.34747950103948033
[34m[1mwandb[0m: 	learning_rate: 0.08957171988552483
[34m[1mwandb[0m: 	max_depth: 4
[34m[1mwandb[0m: 	n_estimators: 73
[34m[1mwandb[0m: 	random_state: 42


0,1
accuracy,▁
f1 macro,▁

0,1
AUC,-
AUC-PC,-
accuracy,0.24611
f1 macro,0.19129


[34m[1mwandb[0m: Agent Starting Run: zqsghzn5 with config:
[34m[1mwandb[0m: 	gamma: 0.8103916395021523
[34m[1mwandb[0m: 	learning_rate: 0.06284145643864351
[34m[1mwandb[0m: 	max_depth: 3
[34m[1mwandb[0m: 	n_estimators: 105
[34m[1mwandb[0m: 	random_state: 42


0,1
accuracy,▁
f1 macro,▁

0,1
AUC,-
AUC-PC,-
accuracy,0.2204
f1 macro,0.17071


[34m[1mwandb[0m: Agent Starting Run: 0mvuqiug with config:
[34m[1mwandb[0m: 	gamma: 0.8765847423948667
[34m[1mwandb[0m: 	learning_rate: 0.06480873617175187
[34m[1mwandb[0m: 	max_depth: 4
[34m[1mwandb[0m: 	n_estimators: 43
[34m[1mwandb[0m: 	random_state: 42


0,1
accuracy,▁
f1 macro,▁

0,1
AUC,-
AUC-PC,-
accuracy,0.2095
f1 macro,0.15392


0.0032428502195058343

In [16]:
#Don't forget to name the sweep instance   
name = 'xgb_ft_liar' #CHANGE HERE
sweep_config['name'] = name
#Generate a sweep_id
sweep_id = wandb.sweep(sweep_config, project="xgb")

def train_fasttext(config = None,
          train=embedded_train_liar, #CHANGE HERE
          val=embedded_val_liar): #CHANGE HERE
    # Initialize a new wandb run
    with wandb.init(config=config, group=name):
        config = wandb.config
        clf = XGBClassifier(gamma=config.gamma,
                                 n_estimators=config.n_estimators,
                                 learning_rate=config.learning_rate,
                                 max_depth=config.max_depth,
                                 random_state=config.random_state) #set the hyperparams here
        pipe = Pipeline([('clf',clf)])
        pipe.fit(train.fillna(0).drop(['label'],axis=1),train['label'])
        
        #Make predictions
        pred_val = pipe.predict(val.fillna(0).drop(['label'],axis=1))
        pred_prob_val = pipe.predict_proba(val.fillna(0).drop(['label'],axis=1))[:,1]
        accuracy = accuracy_score(val['label'],pred_val)
        f1_macro = f1_score(val['label'],pred_val,average='macro')
        if train['label'].nunique() <=2:
            aucpc =  average_precision_score(val['label'],pred_prob_val)
            auc = roc_auc_score(val['label'],pred_prob_val)
            #Log predictions on WandB
        else:
            aucpc = '-'
            auc = '-'
        wandb.log({"accuracy": accuracy, "f1 macro":f1_macro, "AUC-PC":aucpc, 'AUC':auc })


#Track emissions
tracker = EmissionsTracker(project_name=name,log_level='warning', measure_power_secs=300,
                           output_file='output/emissions_hyperopt.csv')
#Launch the agent
tracker.start()
wandb.agent(sweep_id, train_fasttext,count=OPT_ITER)
tracker.stop()



Create sweep with ID: ugwz4xz6
Sweep URL: https://wandb.ai/benchmark-nlp/xgb/sweeps/ugwz4xz6


[34m[1mwandb[0m: Agent Starting Run: n0wdjrhk with config:
[34m[1mwandb[0m: 	gamma: 0.6439190854821173
[34m[1mwandb[0m: 	learning_rate: 0.09177557113871249
[34m[1mwandb[0m: 	max_depth: 1
[34m[1mwandb[0m: 	n_estimators: 26
[34m[1mwandb[0m: 	random_state: 42


0,1
accuracy,▁
f1 macro,▁

0,1
AUC,-
AUC-PC,-
accuracy,0.23598
f1 macro,0.14996


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: bfz0howy with config:
[34m[1mwandb[0m: 	gamma: 0.16484690958326187
[34m[1mwandb[0m: 	learning_rate: 0.026709659965655785
[34m[1mwandb[0m: 	max_depth: 7
[34m[1mwandb[0m: 	n_estimators: 85
[34m[1mwandb[0m: 	random_state: 42


0,1
accuracy,▁
f1 macro,▁

0,1
AUC,-
AUC-PC,-
accuracy,0.23131
f1 macro,0.20306


[34m[1mwandb[0m: Agent Starting Run: jgftlm6b with config:
[34m[1mwandb[0m: 	gamma: 0.3051249349580568
[34m[1mwandb[0m: 	learning_rate: 0.05706939288416224
[34m[1mwandb[0m: 	max_depth: 5
[34m[1mwandb[0m: 	n_estimators: 14
[34m[1mwandb[0m: 	random_state: 42


0,1
accuracy,▁
f1 macro,▁

0,1
AUC,-
AUC-PC,-
accuracy,0.2391
f1 macro,0.20755


[34m[1mwandb[0m: Agent Starting Run: 1xla7eie with config:
[34m[1mwandb[0m: 	gamma: 0.5168598475973685
[34m[1mwandb[0m: 	learning_rate: 0.05717140841536311
[34m[1mwandb[0m: 	max_depth: 7
[34m[1mwandb[0m: 	n_estimators: 65
[34m[1mwandb[0m: 	random_state: 42


0,1
accuracy,▁
f1 macro,▁

0,1
AUC,-
AUC-PC,-
accuracy,0.22664
f1 macro,0.20369


[34m[1mwandb[0m: Agent Starting Run: 7x92ttmu with config:
[34m[1mwandb[0m: 	gamma: 0.8377985269549321
[34m[1mwandb[0m: 	learning_rate: 0.0712190913075617
[34m[1mwandb[0m: 	max_depth: 3
[34m[1mwandb[0m: 	n_estimators: 39
[34m[1mwandb[0m: 	random_state: 42


0,1
accuracy,▁
f1 macro,▁

0,1
AUC,-
AUC-PC,-
accuracy,0.24844
f1 macro,0.19636


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 9o61bi7k with config:
[34m[1mwandb[0m: 	gamma: 0.5160949088869208
[34m[1mwandb[0m: 	learning_rate: 0.09809192258126388
[34m[1mwandb[0m: 	max_depth: 4
[34m[1mwandb[0m: 	n_estimators: 26
[34m[1mwandb[0m: 	random_state: 42


0,1
accuracy,▁
f1 macro,▁

0,1
AUC,-
AUC-PC,-
accuracy,0.24766
f1 macro,0.21031


[34m[1mwandb[0m: Agent Starting Run: k1l2pfc8 with config:
[34m[1mwandb[0m: 	gamma: 0.8804412074492417
[34m[1mwandb[0m: 	learning_rate: 0.04933720109877978
[34m[1mwandb[0m: 	max_depth: 7
[34m[1mwandb[0m: 	n_estimators: 65
[34m[1mwandb[0m: 	random_state: 42


0,1
accuracy,▁
f1 macro,▁

0,1
AUC,-
AUC-PC,-
accuracy,0.23364
f1 macro,0.20709


[34m[1mwandb[0m: Agent Starting Run: 8lg87a2a with config:
[34m[1mwandb[0m: 	gamma: 0.4797864135813056
[34m[1mwandb[0m: 	learning_rate: 0.08723537864288392
[34m[1mwandb[0m: 	max_depth: 10
[34m[1mwandb[0m: 	n_estimators: 42
[34m[1mwandb[0m: 	random_state: 42


0,1
accuracy,▁
f1 macro,▁

0,1
AUC,-
AUC-PC,-
accuracy,0.2352
f1 macro,0.21722


[34m[1mwandb[0m: Agent Starting Run: 5braulxv with config:
[34m[1mwandb[0m: 	gamma: 0.41736275619204466
[34m[1mwandb[0m: 	learning_rate: 0.01572627736449291
[34m[1mwandb[0m: 	max_depth: 3
[34m[1mwandb[0m: 	n_estimators: 82
[34m[1mwandb[0m: 	random_state: 42


0,1
accuracy,▁
f1 macro,▁

0,1
AUC,-
AUC-PC,-
accuracy,0.25467
f1 macro,0.20511


[34m[1mwandb[0m: Agent Starting Run: 2rgd6z2z with config:
[34m[1mwandb[0m: 	gamma: 0.7809159699620692
[34m[1mwandb[0m: 	learning_rate: 0.024652402771116525
[34m[1mwandb[0m: 	max_depth: 10
[34m[1mwandb[0m: 	n_estimators: 73
[34m[1mwandb[0m: 	random_state: 42


0,1
accuracy,▁
f1 macro,▁

0,1
AUC,-
AUC-PC,-
accuracy,0.21495
f1 macro,0.19822


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: m9mk4h2v with config:
[34m[1mwandb[0m: 	gamma: 0.28211749811589404
[34m[1mwandb[0m: 	learning_rate: 0.014916963808484882
[34m[1mwandb[0m: 	max_depth: 0
[34m[1mwandb[0m: 	n_estimators: 89
[34m[1mwandb[0m: 	random_state: 42


0,1
accuracy,▁
f1 macro,▁

0,1
AUC,-
AUC-PC,-
accuracy,0.19315
f1 macro,0.05396


[34m[1mwandb[0m: Agent Starting Run: 4j6ryg00 with config:
[34m[1mwandb[0m: 	gamma: 0.2985306233537437
[34m[1mwandb[0m: 	learning_rate: 0.07717552652904054
[34m[1mwandb[0m: 	max_depth: 3
[34m[1mwandb[0m: 	n_estimators: 134
[34m[1mwandb[0m: 	random_state: 42


0,1
accuracy,▁
f1 macro,▁

0,1
AUC,-
AUC-PC,-
accuracy,0.25857
f1 macro,0.23203


[34m[1mwandb[0m: Agent Starting Run: wilj2kft with config:
[34m[1mwandb[0m: 	gamma: 0.5164920737794635
[34m[1mwandb[0m: 	learning_rate: 0.08090747462499279
[34m[1mwandb[0m: 	max_depth: 5
[34m[1mwandb[0m: 	n_estimators: 137
[34m[1mwandb[0m: 	random_state: 42


0,1
accuracy,▁
f1 macro,▁

0,1
AUC,-
AUC-PC,-
accuracy,0.23754
f1 macro,0.22274


[34m[1mwandb[0m: Agent Starting Run: ahub3bm2 with config:
[34m[1mwandb[0m: 	gamma: 0.5792507541027511
[34m[1mwandb[0m: 	learning_rate: 0.05863419499379573
[34m[1mwandb[0m: 	max_depth: 10
[34m[1mwandb[0m: 	n_estimators: 111
[34m[1mwandb[0m: 	random_state: 42


0,1
accuracy,▁
f1 macro,▁

0,1
AUC,-
AUC-PC,-
accuracy,0.24377
f1 macro,0.22107


[34m[1mwandb[0m: Agent Starting Run: ok71k89h with config:
[34m[1mwandb[0m: 	gamma: 0.8084529046787543
[34m[1mwandb[0m: 	learning_rate: 0.07407966768911987
[34m[1mwandb[0m: 	max_depth: 3
[34m[1mwandb[0m: 	n_estimators: 103
[34m[1mwandb[0m: 	random_state: 42


0,1
accuracy,▁
f1 macro,▁

0,1
AUC,-
AUC-PC,-
accuracy,0.24221
f1 macro,0.2151


[34m[1mwandb[0m: Agent Starting Run: r28wi8h4 with config:
[34m[1mwandb[0m: 	gamma: 0.6174645326263741
[34m[1mwandb[0m: 	learning_rate: 0.03193929305628633
[34m[1mwandb[0m: 	max_depth: 5
[34m[1mwandb[0m: 	n_estimators: 140
[34m[1mwandb[0m: 	random_state: 42


0,1
accuracy,▁
f1 macro,▁

0,1
AUC,-
AUC-PC,-
accuracy,0.25
f1 macro,0.21855


[34m[1mwandb[0m: Agent Starting Run: fh747wac with config:
[34m[1mwandb[0m: 	gamma: 0.4427867825239692
[34m[1mwandb[0m: 	learning_rate: 0.06796979896454493
[34m[1mwandb[0m: 	max_depth: 4
[34m[1mwandb[0m: 	n_estimators: 29
[34m[1mwandb[0m: 	random_state: 42


0,1
accuracy,▁
f1 macro,▁

0,1
AUC,-
AUC-PC,-
accuracy,0.24611
f1 macro,0.20999


[34m[1mwandb[0m: Agent Starting Run: xvjrdeuv with config:
[34m[1mwandb[0m: 	gamma: 0.045506896923234286
[34m[1mwandb[0m: 	learning_rate: 0.05430449272672324
[34m[1mwandb[0m: 	max_depth: 6
[34m[1mwandb[0m: 	n_estimators: 96
[34m[1mwandb[0m: 	random_state: 42


0,1
accuracy,▁
f1 macro,▁

0,1
AUC,-
AUC-PC,-
accuracy,0.23988
f1 macro,0.21493


[34m[1mwandb[0m: Agent Starting Run: yntl7jgo with config:
[34m[1mwandb[0m: 	gamma: 0.5424137044218664
[34m[1mwandb[0m: 	learning_rate: 0.032824904720252825
[34m[1mwandb[0m: 	max_depth: 1
[34m[1mwandb[0m: 	n_estimators: 84
[34m[1mwandb[0m: 	random_state: 42


0,1
accuracy,▁
f1 macro,▁

0,1
AUC,-
AUC-PC,-
accuracy,0.23209
f1 macro,0.14897


[34m[1mwandb[0m: Agent Starting Run: q8urcvva with config:
[34m[1mwandb[0m: 	gamma: 0.9801801933296108
[34m[1mwandb[0m: 	learning_rate: 0.04768416160360133
[34m[1mwandb[0m: 	max_depth: 4
[34m[1mwandb[0m: 	n_estimators: 144
[34m[1mwandb[0m: 	random_state: 42


0,1
accuracy,▁
f1 macro,▁

0,1
AUC,-
AUC-PC,-
accuracy,0.25078
f1 macro,0.23087


0.011878413049547929