In [82]:
import pickle as pkl
import csv 
import json
import sys
from dataclasses import dataclass
import itertools
import torch

In [83]:
@dataclass
class LLMPipelineConfig:
    run_identifier: str
    config_file_path: str
    output_folder_path: str
    llm_name: str
    collate_fn_name: str
    device: object
    max_epochs: int
    hparams: list

def generate_config_dict(config_file_path):
    config_dict = {}
    with open(config_file_path) as f:
        for line in f:
            line = line.split(":")
            config_dict[line[0].strip()] = line[1].strip()
    config_dict['config_file_path'] = config_file_path
    return config_dict
    
    
def generate_hyperparameter_sets(config_dict):
    lf = config_dict['loss_functions'].split(',')
    lr = [float(lr.strip()) for lr in config_dict['learning_rates'].split(',')]
    bs = [int(bs.strip()) for bs in config_dict['batch_sizes'].split('.')]
    op = [op.strip() for op in config_dict['optimizers'].split(',')]
    hdo = [float(lr.strip()) for lr in config_dict['hidden_dropout_prob'].split(',')]
    ado = [float(lr.strip()) for lr in config_dict['attention_probs_dropout_prob'].split(',')]
    hparams = []
    for loss_function, learning_rate, batch_size, optimizer, hidden_dropout_prob, attention_probs_dropout_prob in itertools.product(
            lf, lr, bs, op, hdo, ado):
        hparams.append({
            'loss_function': loss_function,
            'learning_rate': learning_rate,
            'batch_size': batch_size,
            'optimizer': optimizer,
            'hidden_dropout_prob': hidden_dropout_prob,
            'attention_probs_dropout_prob': attention_probs_dropout_prob
        })
    return hparams


def config_obj_from_config_dict(config_dict):
    hparams = generate_hyperparameter_sets(config_dict)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    return LLMPipelineConfig(config_dict['run_identifier'],
                             config_dict['config_file_path'],
                             config_dict["output_folder_path"],
                             config_dict['llm_name'],
                             config_dict['collate_fn_name'],
                             device,
                             int(config_dict['max_epochs']),
                             hparams)


def misclassified_samples_csv_to_dict(path):
    false_positives = []
    false_negatives = []
    with open(path) as f:
        csvreader = csv.reader(f)
        for row in csvreader:
            if row[2] == '0':
                false_positives.append(row[0])
            elif row[2] == '1':
                false_negatives.append(row[0])
        return false_positives, false_negatives    
    

def identify_file_paths_for_misclassified_samples(path):
    config_dict = generate_config_dict(path)
    config_obj = config_obj_from_config_dict(config_dict)

In [68]:
path = '/home/nadia/Desktop/runs/bert-base-uncased-baseline/config.txt'

In [48]:
false_positives, false_negatives = misclassified_samples_csv_to_dict(path)

In [84]:
identify_file_paths_for_misclassified_samples(path)

  return torch._C._cuda_getDeviceCount() > 0


KeyError: 'collate_fn_name'