In [None]:
# default_exp utils

In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

# utils

> Various utility functions used by the blurr package.

In [None]:
#hide
import pdb, sys, inspect
from enum import Enum

import pandas as pd
import torch

from transformers import *
from fastai2.text.all import *

In [None]:
#hide
from nbdev.showdoc import *
from fastcore.test import *

torch.cuda.set_device(1)
print(f'Using GPU #{torch.cuda.current_device()}: {torch.cuda.get_device_name()}')

Using GPU #1: GeForce GTX 1080 Ti


In [None]:
#export
def str_to_class(classname):
    "converts string representation to class"
    return getattr(sys.modules[__name__], classname)

In [None]:
#export
class Singleton:
    def __init__(self,cls):
        self._cls, self._instance = cls, None

    def __call__(self, *args, **kwargs):
        if self._instance == None: self._instance = self._cls(*args, **kwargs)
        return self._instance

`Singleton` functions as python decorator.  Use this above any class to turn that class into a singleton (see [here](https://python-3-patterns-idioms-test.readthedocs.io/en/latest/Singleton.html) for more info on the singleton pattern).

In [None]:
@Singleton
class TestSingleton: pass

a = TestSingleton()
b = TestSingleton()
test_eq(a,b)

In [None]:
#export
@Singleton
class ModelHelper():
    
    def __init__(self):
        # get hf classes (tokenizers, configs, models, etc...)
        transformer_classes = inspect.getmembers(sys.modules[__name__], 
                                                 lambda member: inspect.isclass(member)
                                                 and member.__module__.startswith('transformers.'))
        
        # build a df that we can query against to get various transformers objects/info
        self._df = pd.DataFrame(transformer_classes, columns=['class_name', 'class_location'])
        
        # add the module each class is included in
        self._df['module'] = self._df.class_location.apply(lambda v: v.__module__)
        
        # remove class_location (don't need it anymore)
        self._df.drop(labels=['class_location'], axis=1, inplace=True)
        
        # break up the module into separate cols
        module_parts_df = self._df.module.str.split(".", n = -1, expand = True) 
        for i in range(len(module_parts_df.columns)):
            self._df[f'module_part_{i}'] = module_parts_df[i]

        # using module part 1, break up the functional area and arch into separate cols
        module_part_1_df = self._df.module_part_1.str.split("_", n = 1, expand = True) 
        self._df[['functional_area', 'arch']] = module_part_1_df
        
        # if functional area = modeling, pull out the task it is built for
        model_type_df = self._df[(self._df.functional_area == 'modeling')].class_name.str.split('For', n=1, expand=True)
        
        model_type_df[1] = np.where(model_type_df[1].notnull(), 
                                    'For' + model_type_df[1].astype(str), 
                                    model_type_df[1])
        
        self._df['model_task'] = model_type_df[1]
        
        model_type_df[1] = np.where(model_type_df[1].notnull(), 
                                    'With' + model_type_df[1].astype(str), 
                                    self._df[(self._df.functional_area == 'modeling')].model_task)
        
        self._df['model_task'] = model_type_df[1]
        
        # look at what we're going to remove (use to verify we're just getting rid of stuff we want too)
        # df[~df['hf_class_type'].isin(['modeling', 'configuration', 'tokenization'])]
        
        # only need these 3 functional areas for our querying purposes
        self._df = self._df[self._df['functional_area'].isin(['modeling', 'configuration', 'tokenization'])]

`ModelHelper` is a `Singleton` (there exists only one instance, and the same instance is returned upon subsequent instantiation requests)

In [None]:
mh = ModelHelper()
mh2 = ModelHelper()
test_eq(mh, mh2)

In [None]:
#hide
display_df(mh._df.head())

print(list(mh._df.model_task.unique()))
print('')
print(list(mh._df.functional_area.unique()))
print('')
print(list(mh._df.module_part_2.unique()))
print('')
print(list(mh._df.module_part_3.unique()))

Unnamed: 0,class_name,module,module_part_0,module_part_1,module_part_2,module_part_3,functional_area,arch,model_task
1,AdaptiveEmbedding,transformers.modeling_transfo_xl,transformers,modeling_transfo_xl,,,modeling,transfo_xl,
2,AlbertConfig,transformers.configuration_albert,transformers,configuration_albert,,,configuration,albert,
3,AlbertForMaskedLM,transformers.modeling_albert,transformers,modeling_albert,,,modeling,albert,WithForMaskedLM
4,AlbertForQuestionAnswering,transformers.modeling_albert,transformers,modeling_albert,,,modeling,albert,WithForQuestionAnswering
5,AlbertForSequenceClassification,transformers.modeling_albert,transformers,modeling_albert,,,modeling,albert,WithForSequenceClassification


[None, nan, 'WithForMaskedLM', 'WithForQuestionAnswering', 'WithForSequenceClassification', 'WithForTokenClassification', 'WithForPreTraining', 'WithForConditionalGeneration', 'WithForMultipleChoice', 'WithForNextSentencePrediction', 'WithForQuestionAnsweringSimple', 'WithForClassification']

['modeling', 'configuration', 'tokenization']

[None]

[None]


In [None]:
#hide
from nbdev.export import notebook2script
notebook2script()

Converted 00_utils.ipynb.
Converted index.ipynb.
