In [1]:
import pandas as pd

from src.extraction.jsonl_data_reader import JsonlDataReader

train_data = JsonlDataReader(file_name='train.jsonl').read()
dev_data = JsonlDataReader(file_name='dev.jsonl').read()
test_data = JsonlDataReader(file_name='test.jsonl').read()

stage_order = ['pretokenizer', 'tokenizer', 'post_tokenizer', 'vectorizer', 'model']
stage_order_map = dict(zip(stage_order[:-1], stage_order[1:]))



In [2]:
from src.vectorizer.w2v_vectorizer import W2vVectorizer
from src.models.abstract_model import AbstractModel
from src.vectorizer.lsi_vectorizer import LsiVectorizer
from src.vectorizer.sk_count_vectorizer import SkCountVectorizer
from src.vectorizer.tfidf_vectorizer import TfidfVectorizer
from src.post_tokenizer.phraser_merger import PhraserMerger
from src.post_tokenizer.null_post_tokenizer import NullPostTokenizer
from src.tokenize.sentence_piece_tokenizer import SentencePieceTokenizer
from src.tokenize.null_tokenizer import NullTokenizer
from src.tokenize.spacy_tokenizer import SpacyTokenizer
from src.preprocessing.simple_preprocessor import SimplePreprocessor

stage_config = {
    'preprocessing': {
        SimplePreprocessor: {
            'remove_citations': (True, False),
            'remove_duplicates': (True, False),
        }
    },
    'tokenizer': {
        SpacyTokenizer: {
            'replace_numbers': (True, False),
            'remove_stopwords': (True, False),
        },
        NullTokenizer: {},
        SentencePieceTokenizer: {
            'vocab_size': (5000, 10000),
        },
    },
    'post_tokenizer': {
        NullPostTokenizer: {},
        PhraserMerger: {
            'num_gram': (1, 2),
        },
    },
    'vectorizer': {
        TfidfVectorizer: {},
        SkCountVectorizer: {
            'ignore_preprocessing': (True, False),
        },
        W2vVectorizer: {},
        LsiVectorizer: {},
        # FastTextW2vVectorizer: {},
    },
    'model': {
        AbstractModel: {},
    }
}

In [3]:
from itertools import product
from sklearn.svm import SVC
from sklearn.metrics import f1_score
from sklearn.linear_model import LogisticRegression


def run_preprocessing(run_class, args, prev_output, current_state, current_models):
    print(f'running preprocessing: {run_class}: {args=}, {current_state=}')
    current_obj = run_class(**args)
    preprocessed_train = current_obj.preprocess(train_data)
    preprocessed_dev = current_obj.preprocess(dev_data)
    preprocessed_test = current_obj.preprocess(test_data)
    return (preprocessed_train, preprocessed_dev, preprocessed_test), current_obj


def run_tokenizer(run_class, args, prev_output, current_state, current_models):
    print(f'running tokenizer: {run_class}: {args=}, {current_state=}')
    tokenizer = run_class(
        **args
    )
    train, dev, test = prev_output
    tokenizer.fit(train)
    results = tuple(tokenizer.tokenize(x) for x in prev_output)
    return results, tokenizer


def run_post_tokenizer(run_class, args, prev_output, current_state, current_models):
    print(f'running post_tokenizer: {run_class}: {args=}, {current_state=}')
    post_tokenizer = run_class(**args)
    train, dev, test = prev_output
    post_tokenizer.fit(train)
    results = tuple(post_tokenizer.transform(x) for x in prev_output)
    return results, post_tokenizer


def run_vectorizer(run_class, args, prev_output, current_state, current_models):
    print(f'running vectorizer: {run_class}: {args=}, {current_state=}')
    vectorizer = run_class(**args)
    train, dev, test = prev_output
    vectorizer.fit(train)
    results = tuple(vectorizer.transform(x) for x in prev_output)
    return results, vectorizer


def run_model(run_class, args, prev_output, current_state, current_models):
    print(f'running model: {run_class}: {args=}, {current_state=}')
    models = {
        'LR': LogisticRegression(max_iter=2000),
        # 'svm': SVC(kernel='rbf'),
    }
    train, dev, test = prev_output

    results = []
    for model_name, model in models.items():
        model_result = {'model_name': model_name}
        model.fit(train.vectors, train.label_indices)
        y_pred_train = model.predict(train.vectors)
        score = f1_score(train.label_indices, y_pred_train, average='macro')
        model_result['train_f1'] = score

        y_pred_dev = model.predict(dev.vectors)
        score = f1_score(dev.label_indices, y_pred_dev, average='macro')
        model_result['dev_f1'] = score

        y_pred_test = model.predict(test.vectors)
        score = f1_score(test.label_indices, y_pred_test, average='macro')
        model_result['test_f1'] = score
        results.append(model_result)
        print(f'{model_result=}')

    return results, None


def run(stage: str, run_class, args: dict, prev_output=None, current_state=None, current_models=None) -> list[dict]:
    if current_state is None:
        current_state = dict()
    current_state = {**current_state, stage: run_class.__name__}
    if current_models is None:
        current_models = dict()
    results = []
    next_stage = stage_order_map.get(stage)

    run_func = {
        'pretokenizer': run_preprocessing,
        'tokenizer': run_tokenizer,
        'post_tokenizer': run_post_tokenizer,
        'vectorizer': run_vectorizer,
        'model': run_model,
    }

    if not args:
        result, model = run_func[stage](run_class, args, prev_output, current_state, current_models)
        new_models = {**current_models, stage: model}
        if next_stage is not None:
            for next_class, next_args in stage_config[next_stage].items():
                new_state = current_state
                run_result = run(next_stage, next_class, next_args, result, new_state, new_models)
                results.extend(run_result)
            return results
        else:
            result_state = [{**current_state, **result_row} for result_row in result]
            return result_state

    argument_permutations = list(product(*args.values()))
    for values in argument_permutations:
        new_arg = dict(zip(args.keys(), values))
        result, model = run_func[stage](run_class, new_arg, prev_output, current_state, current_models)
        new_models = {**current_models, stage: model}
        if next_stage is not None:
            for next_class, next_args in stage_config[next_stage].items():
                new_state = {**current_state, **new_arg}
                run_result = run(next_stage, next_class, next_args, result, new_state, new_models)
                results.extend(run_result)
        else:
            result_state = [{**current_state, **result_row} for result_row in result]
            results.extend(result_state)
    return results



In [4]:
results = run('pretokenizer', SimplePreprocessor, {
    'remove_citations': (True, False),
    'remove_duplicates': (True, False),
}, None)
results_df = pd.DataFrame(results)
results_df

running preprocessing: <class 'src.preprocessing.simple_preprocessor.SimplePreprocessor'>: args={'remove_citations': True, 'remove_duplicates': True}, current_state={'pretokenizer': 'SimplePreprocessor'}
running tokenizer: <class 'src.tokenize.spacy_tokenizer.SpacyTokenizer'>: args={'replace_numbers': True, 'remove_stopwords': True}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer'}
running post_tokenizer: <class 'src.post_tokenizer.null_post_tokenizer.NullPostTokenizer'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': True, 'post_tokenizer': 'NullPostTokenizer'}
running vectorizer: <class 'src.vectorizer.tfidf_vectorizer.TfidfVectorizer'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'toke



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': True, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.8067378232163117, 'dev_f1': 0.6740916345794465, 'test_f1': 0.6288266039004315}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': True, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': True, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 0.9998007816475948, 'dev_f1': 0.7531704588618823,



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': True, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.8196356714368359, 'dev_f1': 0.6868539274996469, 'test_f1': 0.6733361375095881}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': True, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': True, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 0.9998007816475948, 'dev_f1



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': True, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.8188152338105471, 'dev_f1': 0.673300115250786, 'test_f1': 0.6742040050270183}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': True, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': True, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 0.9998007816475948, 'dev_f1'



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': False, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.8340909464969837, 'dev_f1': 0.7195815518445882, 'test_f1': 0.6815591590889393}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': False, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': False, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 0.9998007816475948, 'dev_f1': 0.799724320229785



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': False, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.8565454886867846, 'dev_f1': 0.7391725921137686, 'test_f1': 0.7100877190035143}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': False, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': False, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 0.9998007816475948, 'dev_



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': False, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.8544608159127912, 'dev_f1': 0.7344472080371812, 'test_f1': 0.7139960589366524}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': False, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': False, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 1.0, 'dev_f1': 0.79560994



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': True, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.8129929540514684, 'dev_f1': 0.6629491199213079, 'test_f1': 0.6382525022343971}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': True, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': True, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 0.9998007816475948, 'dev_f1': 0.750791831549876



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': True, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.828491110557327, 'dev_f1': 0.6732234799554373, 'test_f1': 0.6860970867899895}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': True, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': True, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 0.9998007816475948, 'dev_f



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': True, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.8304752696863043, 'dev_f1': 0.6737233622783155, 'test_f1': 0.6746834029725582}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': True, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': True, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 0.9998007816475948, 'dev_



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': False, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.8411243238758074, 'dev_f1': 0.7148410737183367, 'test_f1': 0.6745684020554794}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': False, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': False, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 0.9998007816475948, 'dev_f1': 0.8038430521737



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': False, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.8625427635437748, 'dev_f1': 0.7263684961163953, 'test_f1': 0.7095631835357863}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': False, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': False, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 0.9998007816475948, 'de



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': False, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.8656650553905633, 'dev_f1': 0.7274760163844144, 'test_f1': 0.7166331469776002}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': False, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': False, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 1.0, 'dev_f1': 0.790813



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'NullTokenizer', 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.9371066057822911, 'dev_f1': 0.7050388312710189, 'test_f1': 0.6937663548007681}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'NullTokenizer', 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'NullTokenizer', 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 0.9998007816475948, 'dev_f1': 0.8001477222355525, 'test_f1': 0.8084549693024973}
running vectorizer: <class 'src.vectorizer.w2v_vectorizer.W2vVectorizer'



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'NullTokenizer', 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.9497887429969052, 'dev_f1': 0.7369006731416121, 'test_f1': 0.7192722460309146}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'NullTokenizer', 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'NullTokenizer', 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 1.0, 'dev_f1': 0.7929394919253038, 'test_f1': 0.8019987570614099}
running vectorizer: <class 'src.vectorizer.w2v_vectorizer.W2vVect



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'NullTokenizer', 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.9488091114178001, 'dev_f1': 0.7235146172618158, 'test_f1': 0.715906579467675}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'NullTokenizer', 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'NullTokenizer', 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 1.0, 'dev_f1': 0.7985340414710734, 'test_f1': 0.7983337790421915}
running vectorizer: <class 'src.vectorizer.w2v_vectorizer.W2vVecto



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 5000, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.8565572536296923, 'dev_f1': 0.7194297444651352, 'test_f1': 0.7278957809587331}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 5000, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 5000, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 0.9998007816475948, 'dev_f1': 0.7975728966291528, 'test_f1': 0.7907894293640613}
running vector



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 5000, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.8890075795144282, 'dev_f1': 0.7178332503409903, 'test_f1': 0.7352669849103757}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 5000, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 5000, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 1.0, 'dev_f1': 0.7957071983630647, 'test_f1': 0.790602220011274}
running 



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 5000, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.8907581585170742, 'dev_f1': 0.7177068160023955, 'test_f1': 0.7329180297947223}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 5000, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 5000, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 1.0, 'dev_f1': 0.7854240696122924, 'test_f1': 0.7910145913450064}
running



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 10000, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.8781994823102718, 'dev_f1': 0.7082592727260139, 'test_f1': 0.6981941979784217}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 10000, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 10000, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 0.9998007816475948, 'dev_f1': 0.8101988532737475, 'test_f1': 0.7979161564471511}
running vect



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 10000, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.903722655293474, 'dev_f1': 0.7182700387592148, 'test_f1': 0.7304505897679939}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 10000, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 10000, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 1.0, 'dev_f1': 0.7942740703148341, 'test_f1': 0.7957758003460779}
runnin



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 10000, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.9113135436809888, 'dev_f1': 0.7220894317510217, 'test_f1': 0.7125672601772687}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 10000, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': True, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 10000, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 1.0, 'dev_f1': 0.7965804470197235, 'test_f1': 0.7911386335537278}
runni



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': True, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.7975263061243755, 'dev_f1': 0.6678015095834621, 'test_f1': 0.6269755949622837}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': True, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': True, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 0.9996010029243213, 'dev_f1': 0.749903525254313



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': True, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.8145991928378042, 'dev_f1': 0.6746250354856637, 'test_f1': 0.669736653961093}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': True, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': True, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 0.9994939932236262, 'dev_f



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': True, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.8147171511197819, 'dev_f1': 0.6807510661363502, 'test_f1': 0.6759925530153135}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': True, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': True, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 0.9993869660074909, 'dev_



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': False, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.8360608373465417, 'dev_f1': 0.7291123763515067, 'test_f1': 0.6760142149247207}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': False, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': False, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 0.9996010449586131, 'dev_f1': 0.7991971057768



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': False, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.8540712191581679, 'dev_f1': 0.734340070621957, 'test_f1': 0.7160178040684758}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': False, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': False, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 0.9996010449586131, 'dev



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': False, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.8502191113553951, 'dev_f1': 0.7385814555226541, 'test_f1': 0.7235754815508684}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': False, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': False, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 0.9996787711424727, 'de



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': True, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.8083906507517488, 'dev_f1': 0.6621856927781052, 'test_f1': 0.6338273280210545}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': True, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': True, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 0.999708121235348, 'dev_f1': 0.75202164415791



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': True, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 0.8248983547401042, 'dev_f1': 0.6816413571828687, 'test_f1': 0.6797796270631773}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': True, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': True, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.8253142163872488, 'dev_f1': 0.6795837226120153, 'test_f1': 0.6782122215755501}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': True, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': True, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 0.9996010869288637, 'de



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': False, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.8391833321768001, 'dev_f1': 0.7111790616623059, 'test_f1': 0.6743869695342314}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': False, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': False, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 0.9997081386798525, 'dev_f1': 0.80272460087



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': False, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.8590725113030376, 'dev_f1': 0.7345246307497509, 'test_f1': 0.7095605900082584}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': False, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': False, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 0.9997081386798525, '



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': False, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.8658763086076439, 'dev_f1': 0.7304931646016918, 'test_f1': 0.7210946370899077}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': False, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': False, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 0.9997858719616982, '



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': False, 'tokenizer': 'NullTokenizer', 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.9344097632638272, 'dev_f1': 0.6957052168670427, 'test_f1': 0.6886656463262142}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': False, 'tokenizer': 'NullTokenizer', 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': False, 'tokenizer': 'NullTokenizer', 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 0.9997081386798525, 'dev_f1': 0.8007641637007724, 'test_f1': 0.806283166322157}
running vectorizer: <class 'src.vectorizer.w2v_vectorizer.W2vVectorizer



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': False, 'tokenizer': 'NullTokenizer', 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.9463115969519379, 'dev_f1': 0.7460505501816015, 'test_f1': 0.7165477639510125}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': False, 'tokenizer': 'NullTokenizer', 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': False, 'tokenizer': 'NullTokenizer', 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 0.9997081386798525, 'dev_f1': 0.7955164659183588, 'test_f1': 0.7995002870479618}
running vectorizer: <class 'src.vectorizer.w2v_v



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': False, 'tokenizer': 'NullTokenizer', 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.9463196025145401, 'dev_f1': 0.7369831376346309, 'test_f1': 0.7110347789020018}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': False, 'tokenizer': 'NullTokenizer', 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': False, 'tokenizer': 'NullTokenizer', 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 0.9998929237141576, 'dev_f1': 0.7987469852391703, 'test_f1': 0.7955283704363484}
running vectorizer: <class 'src.vectorizer.w2v_v



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': False, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 5000, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.85857076642991, 'dev_f1': 0.7267854063875187, 'test_f1': 0.7273851605035427}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': False, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 5000, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': False, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 5000, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 0.9997081386798525, 'dev_f1': 0.8095927620420137, 'test_f1': 0.7891509027315943}
running vector



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': False, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 5000, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.8847346696600903, 'dev_f1': 0.7249199076488041, 'test_f1': 0.7271999823795122}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': False, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 5000, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': False, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 5000, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 0.9998929237141576, 'dev_f1': 0.7962155048692945, 'test_f1': 0.79030219



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': False, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 5000, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.887011895922257, 'dev_f1': 0.7247060983799906, 'test_f1': 0.7414134496902968}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': False, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 5000, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': False, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 5000, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 0.9998929237141576, 'dev_f1': 0.8013885389788048, 'test_f1': 0.791311811



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': False, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 10000, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.8782673843077271, 'dev_f1': 0.7298901927016637, 'test_f1': 0.7026151669074875}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': False, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 10000, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': False, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 10000, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 0.9997081386798525, 'dev_f1': 0.8085691538218146, 'test_f1': 0.8038961305575044}
running ve



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': False, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 10000, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.9094993428918009, 'dev_f1': 0.7296002527474177, 'test_f1': 0.7238254091767029}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': False, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 10000, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': False, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 10000, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 0.9998929237141576, 'dev_f1': 0.8083248818946113, 'test_f1': 0.796845



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': False, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 10000, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.9088731908008336, 'dev_f1': 0.7295687357361366, 'test_f1': 0.7099039423259127}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': False, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 10000, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': True, 'remove_duplicates': False, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 10000, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 0.9998929237141576, 'dev_f1': 0.7994478365111628, 'test_f1': 0.789117



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': True, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.8185523278030334, 'dev_f1': 0.6601208076854063, 'test_f1': 0.6148605098442456}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': True, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': True, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 1.0, 'dev_f1': 0.7452437268292512, 'test_f1': 0



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': True, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.83321294735139, 'dev_f1': 0.6853488120041642, 'test_f1': 0.6644657267170074}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': True, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': True, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 1.0, 'dev_f1': 0.7609929333



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': True, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.8278332231123396, 'dev_f1': 0.6832867893882774, 'test_f1': 0.6680631914863094}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': True, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': True, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 1.0, 'dev_f1': 0.76285479



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': False, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.8439678072654537, 'dev_f1': 0.7197678192673777, 'test_f1': 0.6690776737593631}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': False, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': False, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 1.0, 'dev_f1': 0.7998782849239281, 'test_f1':



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': False, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.8632579068912728, 'dev_f1': 0.7272485140742403, 'test_f1': 0.7079798874928956}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': False, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': False, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 1.0, 'dev_f1': 0.804552



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': False, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.8657823240315984, 'dev_f1': 0.7237316463895985, 'test_f1': 0.7117349394732848}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': False, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': False, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 1.0, 'dev_f1': 0.799915



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': True, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.821437309460418, 'dev_f1': 0.6605454724066454, 'test_f1': 0.6242951044728247}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': True, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': True, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 1.0, 'dev_f1': 0.7442387025170666, 'test_f1': 



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': True, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.8425083078254074, 'dev_f1': 0.6752463054187192, 'test_f1': 0.6655744494966749}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': True, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': True, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 1.0, 'dev_f1': 0.750729



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': True, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.8344301262180993, 'dev_f1': 0.6768848461317081, 'test_f1': 0.6594163587609355}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': True, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': True, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 1.0, 'dev_f1': 0.750369



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': False, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.8487811782005478, 'dev_f1': 0.7069812256421019, 'test_f1': 0.6695411730754576}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': False, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': False, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 1.0, 'dev_f1': 0.7945611311698478, 'test_f1



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': False, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.8731829266992522, 'dev_f1': 0.7273340224811182, 'test_f1': 0.6994654777298145}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': False, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': False, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 1.0, 'dev_f1': 0.8013



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': False, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.8703724863940444, 'dev_f1': 0.7204429241923664, 'test_f1': 0.7040765922990267}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': False, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': True, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': False, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 1.0, 'dev_f1': 0.8021



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': True, 'tokenizer': 'NullTokenizer', 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.9448705676218637, 'dev_f1': 0.7125736339630305, 'test_f1': 0.6878482461556882}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': True, 'tokenizer': 'NullTokenizer', 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': True, 'tokenizer': 'NullTokenizer', 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 1.0, 'dev_f1': 0.8049515469719176, 'test_f1': 0.8053002070393376}
running vectorizer: <class 'src.vectorizer.w2v_vectorizer.W2vVectorizer'>: args={}, c



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': True, 'tokenizer': 'NullTokenizer', 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.9575620916575524, 'dev_f1': 0.7397141463408493, 'test_f1': 0.7178198278683502}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': True, 'tokenizer': 'NullTokenizer', 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': True, 'tokenizer': 'NullTokenizer', 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 1.0, 'dev_f1': 0.8010570612320741, 'test_f1': 0.798576935914434}
running vectorizer: <class 'src.vectorizer.w2v_vectorizer.W2vVec



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': True, 'tokenizer': 'NullTokenizer', 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.9589007711941179, 'dev_f1': 0.7311151079136691, 'test_f1': 0.7142628566074566}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': True, 'tokenizer': 'NullTokenizer', 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': True, 'tokenizer': 'NullTokenizer', 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 1.0, 'dev_f1': 0.8000372595129234, 'test_f1': 0.7969591802684409}
running vectorizer: <class 'src.vectorizer.w2v_vectorizer.W2vVe



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': True, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 5000, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.8573402272164712, 'dev_f1': 0.7276268049784793, 'test_f1': 0.7094430133818328}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': True, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 5000, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': True, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 5000, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 1.0, 'dev_f1': 0.8009515388286962, 'test_f1': 0.7897417684401363}
running vectorizer: <class 



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': True, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 5000, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.893615037604563, 'dev_f1': 0.7164626735423303, 'test_f1': 0.7198235002867864}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': True, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 5000, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': True, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 5000, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 1.0, 'dev_f1': 0.8054881469606139, 'test_f1': 0.7897458334013107}
runnin



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': True, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 5000, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.8889522977701457, 'dev_f1': 0.711768382427065, 'test_f1': 0.7056552422781518}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': True, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 5000, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': True, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 5000, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 1.0, 'dev_f1': 0.7822829447656415, 'test_f1': 0.7851850071920556}
runnin



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': True, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 10000, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.8853027642969854, 'dev_f1': 0.7308993097176345, 'test_f1': 0.6988749678972432}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': True, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 10000, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': True, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 10000, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 1.0, 'dev_f1': 0.7980029730217825, 'test_f1': 0.7965789678837919}
running vectorizer: <clas



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': True, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 10000, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.9079279420631927, 'dev_f1': 0.7423542975735419, 'test_f1': 0.725048665494851}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': True, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 10000, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': True, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 10000, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 1.0, 'dev_f1': 0.8051957066012245, 'test_f1': 0.7927469517831347}
runn



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': True, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 10000, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.9167058319322062, 'dev_f1': 0.7216085453075863, 'test_f1': 0.7112989056471927}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': True, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 10000, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': True, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 10000, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 1.0, 'dev_f1': 0.7902738981657663, 'test_f1': 0.792737768846781}
runn



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': True, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.8151841916681496, 'dev_f1': 0.6566443589293779, 'test_f1': 0.6160332423269536}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': True, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': True, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 0.9993573212095802, 'dev_f1': 0.7506945228684



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': True, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.8292866214121895, 'dev_f1': 0.6783739213316679, 'test_f1': 0.6618199633949066}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': True, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': True, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 0.9994647411995564, 'de



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': True, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.8256233276346195, 'dev_f1': 0.6726810909263552, 'test_f1': 0.6758652901204254}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': True, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': True, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 0.9994644957893448, 'de



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': False, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.8398828432453196, 'dev_f1': 0.7220221436059712, 'test_f1': 0.6762917608185385}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': False, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': False, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 0.9994646185707877, 'dev_f1': 0.79799509895



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': False, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.8673505467586446, 'dev_f1': 0.7312229260353481, 'test_f1': 0.7127578463647412}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': False, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': False, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 0.9994644957893448, '



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': False, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.865166279871163, 'dev_f1': 0.7178584174037518, 'test_f1': 0.7092842339541292}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': False, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': True, 'remove_stopwords': False, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 0.9994647411995564, 'd



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': True, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.8170407479175665, 'dev_f1': 0.6581717211483703, 'test_f1': 0.6241836883967983}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': True, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': True, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 0.999464372855056, 'dev_f1': 0.745753119175



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': True, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.8355274826524428, 'dev_f1': 0.6803084403347007, 'test_f1': 0.6660761445853335}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': True, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': True, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 0.9995716457593291, '



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': True, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.8332042776653736, 'dev_f1': 0.6690538307497244, 'test_f1': 0.6595501976160536}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': True, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': True, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 0.9994646185707877, '



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': False, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.8476836306709806, 'dev_f1': 0.7029196460846036, 'test_f1': 0.6775976818267365}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': False, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': False, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 0.9994646185707877, 'dev_f1': 0.797400707



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': False, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.8709658918502914, 'dev_f1': 0.732040681143856, 'test_f1': 0.7116404964768347}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': False, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': False, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 0.9995717439233965, 



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': False, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.8694566775837912, 'dev_f1': 0.7156498880388288, 'test_f1': 0.7064892058378706}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': False, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': False, 'tokenizer': 'SpacyTokenizer', 'replace_numbers': False, 'remove_stopwords': False, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 0.9995717439233965,



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': False, 'tokenizer': 'NullTokenizer', 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.943286289695182, 'dev_f1': 0.7073329136967398, 'test_f1': 0.692993697128237}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': False, 'tokenizer': 'NullTokenizer', 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': False, 'tokenizer': 'NullTokenizer', 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 0.9995716457593291, 'dev_f1': 0.8034484875828545, 'test_f1': 0.8025116591000092}
running vectorizer: <class 'src.vectorizer.w2v_vectorizer.W2vVectorize



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': False, 'tokenizer': 'NullTokenizer', 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.9566630984235598, 'dev_f1': 0.7590353157105333, 'test_f1': 0.7073223634041351}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': False, 'tokenizer': 'NullTokenizer', 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': False, 'tokenizer': 'NullTokenizer', 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 0.9995717439233965, 'dev_f1': 0.7975345513368306, 'test_f1': 0.7962283582050474}
running vectorizer: <class 'src.vectorizer.w2v



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': False, 'tokenizer': 'NullTokenizer', 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.9541019584481981, 'dev_f1': 0.7392086505646498, 'test_f1': 0.7090344799443224}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': False, 'tokenizer': 'NullTokenizer', 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': False, 'tokenizer': 'NullTokenizer', 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 0.9996787711424727, 'dev_f1': 0.8028450988970036, 'test_f1': 0.794500091625895}
running vectorizer: <class 'src.vectorizer.w2v_



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': False, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 5000, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.8556550478251883, 'dev_f1': 0.7224787681679822, 'test_f1': 0.7196707868115523}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': False, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 5000, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': False, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 5000, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 0.9995717439233965, 'dev_f1': 0.8083743018020065, 'test_f1': 0.7945012605208798}
running ve



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': False, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 5000, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.8906124522614665, 'dev_f1': 0.7323284085991628, 'test_f1': 0.7136068016729568}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': False, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 5000, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': False, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 5000, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 0.9996787711424727, 'dev_f1': 0.7852564145790485, 'test_f1': 0.788687



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': False, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 5000, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.8857906519837214, 'dev_f1': 0.7233663460076488, 'test_f1': 0.7200453411526077}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': False, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 5000, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': False, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 5000, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 0.9996787711424727, 'dev_f1': 0.7865612173755974, 'test_f1': 0.798933



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': False, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 10000, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.8790674389951394, 'dev_f1': 0.7354338486648496, 'test_f1': 0.7043876323050148}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': False, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 10000, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': False, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 10000, 'post_tokenizer': 'NullPostTokenizer', 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 0.9995717439233965, 'dev_f1': 0.8023078953038039, 'test_f1': 0.7991772757884185}
running 



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': False, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 10000, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.9055523893457154, 'dev_f1': 0.7315535142965276, 'test_f1': 0.7261163922333532}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': False, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 10000, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': False, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 10000, 'post_tokenizer': 'PhraserMerger', 'num_gram': 1, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 0.9996787711424727, 'dev_f1': 0.8059744513850794, 'test_f1': 0.7898



running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': False, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 10000, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': True, 'model': 'AbstractModel'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model_result={'model_name': 'LR', 'train_f1': 0.9029985217310532, 'dev_f1': 0.7179738907539507, 'test_f1': 0.7179934642884639}
running vectorizer: <class 'src.vectorizer.sk_count_vectorizer.SkCountVectorizer'>: args={'ignore_preprocessing': False}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': False, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 10000, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer'}
running model: <class 'src.models.abstract_model.AbstractModel'>: args={}, current_state={'pretokenizer': 'SimplePreprocessor', 'remove_citations': False, 'remove_duplicates': False, 'tokenizer': 'SentencePieceTokenizer', 'vocab_size': 10000, 'post_tokenizer': 'PhraserMerger', 'num_gram': 2, 'vectorizer': 'SkCountVectorizer', 'ignore_preprocessing': False, 'model': 'AbstractModel'}
model_result={'model_name': 'LR', 'train_f1': 0.9996787711424727, 'dev_f1': 0.7885875102349228, 'test_f1': 0.7930

Unnamed: 0,pretokenizer,remove_citations,remove_duplicates,tokenizer,replace_numbers,remove_stopwords,post_tokenizer,vectorizer,model,model_name,train_f1,dev_f1,test_f1,ignore_preprocessing,num_gram,vocab_size
0,SimplePreprocessor,True,True,SpacyTokenizer,True,True,NullPostTokenizer,TfidfVectorizer,AbstractModel,LR,0.870372,0.736147,0.723453,,,
1,SimplePreprocessor,True,True,SpacyTokenizer,True,True,NullPostTokenizer,SkCountVectorizer,AbstractModel,LR,0.806738,0.674092,0.628827,True,,
2,SimplePreprocessor,True,True,SpacyTokenizer,True,True,NullPostTokenizer,SkCountVectorizer,AbstractModel,LR,0.999801,0.753170,0.737456,False,,
3,SimplePreprocessor,True,True,SpacyTokenizer,True,True,NullPostTokenizer,W2vVectorizer,AbstractModel,LR,0.663951,0.631870,0.635406,,,
4,SimplePreprocessor,True,True,SpacyTokenizer,True,True,NullPostTokenizer,LsiVectorizer,AbstractModel,LR,0.728142,0.710348,0.690148,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
415,SimplePreprocessor,False,False,SentencePieceTokenizer,,,PhraserMerger,TfidfVectorizer,AbstractModel,LR,0.901645,0.781128,0.792285,,2.0,10000.0
416,SimplePreprocessor,False,False,SentencePieceTokenizer,,,PhraserMerger,SkCountVectorizer,AbstractModel,LR,0.902999,0.717974,0.717993,True,2.0,10000.0
417,SimplePreprocessor,False,False,SentencePieceTokenizer,,,PhraserMerger,SkCountVectorizer,AbstractModel,LR,0.999679,0.788588,0.793014,False,2.0,10000.0
418,SimplePreprocessor,False,False,SentencePieceTokenizer,,,PhraserMerger,W2vVectorizer,AbstractModel,LR,0.370393,0.366388,0.337872,,2.0,10000.0


In [5]:
from src.utils.path_getter import PathGetter

results_df.to_parquet(PathGetter.get_data_directory()/'experiments.parquet')

In [6]:
results_df.sort_values(by=['test_f1'], ascending=False)

Unnamed: 0,pretokenizer,remove_citations,remove_duplicates,tokenizer,replace_numbers,remove_stopwords,post_tokenizer,vectorizer,model,model_name,train_f1,dev_f1,test_f1,ignore_preprocessing,num_gram,vocab_size
170,SimplePreprocessor,True,False,NullTokenizer,,,PhraserMerger,TfidfVectorizer,AbstractModel,LR,0.901645,0.785547,0.811778,,1.0,
65,SimplePreprocessor,True,True,NullTokenizer,,,PhraserMerger,TfidfVectorizer,AbstractModel,LR,0.900072,0.782403,0.809262,,1.0,
380,SimplePreprocessor,False,False,NullTokenizer,,,PhraserMerger,TfidfVectorizer,AbstractModel,LR,0.904953,0.788257,0.808687,,1.0,
62,SimplePreprocessor,True,True,NullTokenizer,,,NullPostTokenizer,SkCountVectorizer,AbstractModel,LR,0.999801,0.800148,0.808455,False,,
70,SimplePreprocessor,True,True,NullTokenizer,,,PhraserMerger,TfidfVectorizer,AbstractModel,LR,0.896358,0.779570,0.806851,,2.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
418,SimplePreprocessor,False,False,SentencePieceTokenizer,,,PhraserMerger,W2vVectorizer,AbstractModel,LR,0.370393,0.366388,0.337872,,2.0,10000.0
293,SimplePreprocessor,False,True,SentencePieceTokenizer,,,PhraserMerger,W2vVectorizer,AbstractModel,LR,0.358704,0.355332,0.334485,,1.0,5000.0
193,SimplePreprocessor,True,False,SentencePieceTokenizer,,,PhraserMerger,W2vVectorizer,AbstractModel,LR,0.347664,0.320911,0.319209,,2.0,5000.0
403,SimplePreprocessor,False,False,SentencePieceTokenizer,,,PhraserMerger,W2vVectorizer,AbstractModel,LR,0.299330,0.306093,0.274905,,2.0,5000.0
