#Installing packages

In [None]:
!pip install --upgrade pip
!pip install deeppavlov
!python -m deeppavlov install squad_bert
!pip uninstall tensorflow
!pip install tensorflow-gpu==1.15.2
!pip install transformers==2.8.0

Make sure that tf can see gpu

In [1]:
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
print("="*100)
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())
print("="*100)
from tensorflow.python.client import device_lib
def get_available_gpus():
    local_device_protos = device_lib.list_local_devices()
    return [x.name for x in local_device_protos if x.device_type == 'GPU']
get_available_gpus()

Num GPUs Available:  1
[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 11169264157387939262
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 14674281152
locality {
  bus_id: 1
  links {
  }
}
incarnation: 18235363450799666721
physical_device_desc: "device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5"
]


['/device:GPU:0']

# Imports

In [1]:
from deeppavlov.core.data.utils import download
from deeppavlov.dataset_readers.basic_classification_reader import BasicClassificationDatasetReader
from deeppavlov.dataset_iterators.basic_classification_iterator import BasicClassificationDatasetIterator

from deeppavlov.models.preprocessors.bert_preprocessor import BertPreprocessor
from deeppavlov.models.preprocessors.torch_transformers_preprocessor import TorchTransformersPreprocessor

from deeppavlov.core.data.simple_vocab import SimpleVocabulary
from deeppavlov.models.preprocessors.one_hotter import OneHotter
from deeppavlov.models.classifiers.proba2labels import Proba2Labels
from deeppavlov.models.bert.bert_classifier import BertClassifierModel
from deeppavlov.metrics.accuracy import sets_accuracy
from sklearn.metrics import f1_score, accuracy_score, roc_auc_score
from tqdm import tqdm
prob2labels = Proba2Labels(max_proba=True)

import pandas as pd
from sklearn.model_selection import train_test_split
import os

import numpy as np
import matplotlib.pyplot as plt

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package perluniprops to /root/nltk_data...
[nltk_data]   Package perluniprops is already up-to-date!
[nltk_data] Downloading package nonbreaking_prefixes to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package nonbreaking_prefixes is already up-to-date!







# Prepare models

The model is stored [here](https://drive.google.com/file/d/1u_g7CSJPYUHuRc6CaoLn1tKhJ6t8kQxF/view?usp=sharing)

In [None]:
wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id=1u_g7CSJPYUHuRc6CaoLn1tKhJ6t8kQxF' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id=1u_g7CSJPYUHuRc6CaoLn1tKhJ6t8kQxF" -O sensitive_model && rm -rf /tmp/cookies.txt

In [None]:
!unzip sensitive_model.zip

In [6]:
!wget http://files.deeppavlov.ai/deeppavlov_data/bert/ru_conversational_cased_L-12_H-768_A-12.tar.gz
!tar -xf ru_conversational_cased_L-12_H-768_A-12.tar.gz

--2021-03-05 08:25:01--  http://files.deeppavlov.ai/deeppavlov_data/bert/ru_conversational_cased_L-12_H-768_A-12.tar.gz
Resolving files.deeppavlov.ai (files.deeppavlov.ai)... 93.175.29.74
Connecting to files.deeppavlov.ai (files.deeppavlov.ai)|93.175.29.74|:80... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://files.deeppavlov.ai/deeppavlov_data/bert/ru_conversational_cased_L-12_H-768_A-12.tar.gz [following]
--2021-03-05 08:25:02--  https://files.deeppavlov.ai/deeppavlov_data/bert/ru_conversational_cased_L-12_H-768_A-12.tar.gz
Connecting to files.deeppavlov.ai (files.deeppavlov.ai)|93.175.29.74|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 660061308 (629M) [application/octet-stream]
Saving to: ‘ru_conversational_cased_L-12_H-768_A-12.tar.gz’


2021-03-05 08:27:52 (3.72 MB/s) - ‘ru_conversational_cased_L-12_H-768_A-12.tar.gz’ saved [660061308/660061308]



In [6]:
PRETR_BERT_PATH = "./ru_conversational_cased_L-12_H-768_A-12"

In [9]:
bert_preprocessor = BertPreprocessor(vocab_file=os.path.join(PRETR_BERT_PATH,"vocab.txt"),
                                     do_lower_case=False,
                                     max_seq_length=256)

In [4]:
import json
with open("target_vaiables_id2topic_dict.json","r") as f:
  target_vaiables_id2topic_dict = json.load(f)

In [7]:
bert_classifier = BertClassifierModel(
    multilabel = True,
    n_classes=391,
    return_probas=True,
    one_hot_labels=True,
    bert_config_file=os.path.join(PRETR_BERT_PATH,"bert_config.json"),
    pretrained_bert=os.path.join(PRETR_BERT_PATH,"bert_model.ckpt"),
    save_path="sensitive_model_v1/model",
    load_path="sensitive_model_v1/model",
    keep_prob=0.5,
    learning_rate=1e-05,
    learning_rate_drop_patience=5,
    learning_rate_drop_div=2.0
)








The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use keras.layers.Dense instead.
Instructions for updating:
Please use `layer.__call__` method instead.

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where




Instructions for updating:
Use standard file APIs to check for files with this prefix.


2021-03-05 08:39:12.622 INFO in 'deeppavlov.core.models.tf_model'['tf_model'] at line 51: [loading model from /content/sensitive_model_v1/model]



INFO:tensorflow:Restoring parameters from /content/sensitive_model_v1/model


# Perform predictions

In [11]:
def adjust_multilabel_predictions(y):
    named_predicitons = []   
    for y_c in y:
      y_c = target_vaiables_id2topic_dict[str(np.argmax(y_c))]
      named_predicitons.append(y_c)
    return named_predicitons

text = ["люблю качать фильмы с торрентов", "это какая-то порнография"]
y_valid_pred = adjust_multilabel_predictions(bert_classifier(bert_preprocessor(text)))
y_valid_pred

['online_crime', 'pornography']