This simple notebook shows how *detoxify* package can be used in an offline mode.

**Changelog:**
* V3 - specified a device to use
* V2 - max_len can be now set to a desired  value

original notebook https://www.kaggle.com/atamazian/using-detoxify-in-offline-mode 

In [None]:
!cp -r ../input/detoxify/detoxify-master detoxify
!pip install -q ./detoxify
!rm -rf ./detoxify

In [None]:
from transformers import AutoTokenizer
from detoxify import Detoxify
import torch 

In [None]:
max_len = 500
huggingface_config_path = '../input/bert-base-uncased'
detox = Detoxify(model_type='original',  
                 checkpoint='../input/detoxify-models/toxic_original-c1212f89.ckpt',
                 device=torch.device("cuda:0" if torch.cuda.is_available() else "cpu"),
                 huggingface_config_path=huggingface_config_path)

# A little trick allowing us to set max_len
detox.tokenizer = AutoTokenizer.from_pretrained(huggingface_config_path,
                    local_files_only=True,
                    model_max_length=max_len)

results = detox.predict('I am not toxic, sorry!')
print(results)

In [None]:
import pandas as pd
import numpy as np

cat_mtpl ={'obscene':0.16,'toxicity':0.32,'threat':1.5,
          'insult':0.64,'severe_toxicity':1.5,'identity_attack':1.5}


df_sub = pd.read_csv('../input/jigsaw-toxic-severity-rating/comments_to_score.csv')
for i in range(len(df_sub)):
    score = detox.predict(df_sub['text'][i])

    # Give more weight to severe toxic 
    for category in cat_mtpl:
        score[category] = score[category] * cat_mtpl[category]
    #print(score)
    preds = (np.array([score[cat] for cat in ['toxicity', 'severe_toxicity', 'obscene', 'threat', 'insult', 'identity_attack']]).sum(axis=0) )
    

    df_sub.loc[i, ['score']] = preds
    #print(score)

In [None]:
df_sub['score'] = (df_sub['score']-df_sub['score'].min())/(df_sub['score'].max()-df_sub['score'].min())

df_sub.head()

In [None]:
df_sub[['comment_id','score']].to_csv('submission.csv', index=False)

In [None]:
len(df_sub)