# Installing packages

In [None]:
!pip install --upgrade pip
!pip install deeppavlov
!python -m deeppavlov install squad_bert
!pip uninstall tensorflow
!pip install tensorflow-gpu==1.15.2
!pip install transformers==2.8.0

Make sure tensorflow can see GPU

In [None]:
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
print("="*100)
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())
print("="*100)
from tensorflow.python.client import device_lib
def get_available_gpus():
    local_device_protos = device_lib.list_local_devices()
    return [x.name for x in local_device_protos if x.device_type == 'GPU']
get_available_gpus()

Num GPUs Available:  1
[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 4132640700858181781
, name: "/device:XLA_CPU:0"
device_type: "XLA_CPU"
memory_limit: 17179869184
locality {
}
incarnation: 16158257127326140703
physical_device_desc: "device: XLA_CPU device"
, name: "/device:XLA_GPU:0"
device_type: "XLA_GPU"
memory_limit: 17179869184
locality {
}
incarnation: 6890870243285408058
physical_device_desc: "device: XLA_GPU device"
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 14949928141
locality {
  bus_id: 1
  links {
  }
}
incarnation: 7640123047094104233
physical_device_desc: "device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5"
]


['/device:GPU:0']

# Imports

In [None]:
from deeppavlov.core.data.utils import download
from deeppavlov.dataset_readers.basic_classification_reader import BasicClassificationDatasetReader
from deeppavlov.dataset_iterators.basic_classification_iterator import BasicClassificationDatasetIterator

from deeppavlov.models.preprocessors.bert_preprocessor import BertPreprocessor
from deeppavlov.models.preprocessors.torch_transformers_preprocessor import TorchTransformersPreprocessor

from deeppavlov.core.data.simple_vocab import SimpleVocabulary
from deeppavlov.models.preprocessors.one_hotter import OneHotter
from deeppavlov.models.classifiers.proba2labels import Proba2Labels
from deeppavlov.models.bert.bert_classifier import BertClassifierModel
from deeppavlov.metrics.accuracy import sets_accuracy
from sklearn.metrics import f1_score, accuracy_score, roc_auc_score
from tqdm import tqdm
prob2labels = Proba2Labels(max_proba=True)

import pandas as pd
from sklearn.model_selection import train_test_split
import os

import numpy as np
import matplotlib.pyplot as plt

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package perluniprops to /root/nltk_data...
[nltk_data]   Unzipping misc/perluniprops.zip.
[nltk_data] Downloading package nonbreaking_prefixes to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping corpora/nonbreaking_prefixes.zip.







# Initialize model

Pretrained model is stored [here](https://drive.google.com/file/d/1MRY9dIFllHH_aB0wgc2NLg2e3XCJUITm/view?usp=sharing). You can load it using the command below

In [None]:
!wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id=1MRY9dIFllHH_aB0wgc2NLg2e3XCJUITm' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id=1MRY9dIFllHH_aB0wgc2NLg2e3XCJUITm" -O appropriateness_model.zip && rm -rf /tmp/cookies.txt

--2021-03-05 07:46:30--  https://docs.google.com/uc?export=download&confirm=WmMS&id=1MRY9dIFllHH_aB0wgc2NLg2e3XCJUITm
Resolving docs.google.com (docs.google.com)... 172.217.8.14, 2607:f8b0:4004:803::200e
Connecting to docs.google.com (docs.google.com)|172.217.8.14|:443... connected.
HTTP request sent, awaiting response... 302 Moved Temporarily
Location: https://doc-00-2k-docs.googleusercontent.com/docs/securesc/72usggbjmvt3058dqq9qj8hlu6i29hra/drhvhnad9lh45orrrlmtsit1ggu7u4vp/1614930375000/09930121825324625069/11494186554280750811Z/1MRY9dIFllHH_aB0wgc2NLg2e3XCJUITm?e=download [following]
--2021-03-05 07:46:30--  https://doc-00-2k-docs.googleusercontent.com/docs/securesc/72usggbjmvt3058dqq9qj8hlu6i29hra/drhvhnad9lh45orrrlmtsit1ggu7u4vp/1614930375000/09930121825324625069/11494186554280750811Z/1MRY9dIFllHH_aB0wgc2NLg2e3XCJUITm?e=download
Resolving doc-00-2k-docs.googleusercontent.com (doc-00-2k-docs.googleusercontent.com)... 142.250.73.193, 2607:f8b0:4004:829::2001
Connecting to doc-00-2k

In [None]:
!unzip appropriateness_model.zip

Archive:  appropriateness_model.zip
  inflating: appropriateness_model/checkpoint  
  inflating: appropriateness_model/model.index  
  inflating: appropriateness_model/model.meta  
  inflating: appropriateness_model/model.data-00000-of-00001  


In [None]:
!wget http://files.deeppavlov.ai/deeppavlov_data/bert/ru_conversational_cased_L-12_H-768_A-12.tar.gz
!tar -xf ru_conversational_cased_L-12_H-768_A-12.tar.gz

--2021-03-05 07:59:27--  http://files.deeppavlov.ai/deeppavlov_data/bert/ru_conversational_cased_L-12_H-768_A-12.tar.gz
Resolving files.deeppavlov.ai (files.deeppavlov.ai)... 93.175.29.74
Connecting to files.deeppavlov.ai (files.deeppavlov.ai)|93.175.29.74|:80... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://files.deeppavlov.ai/deeppavlov_data/bert/ru_conversational_cased_L-12_H-768_A-12.tar.gz [following]
--2021-03-05 07:59:27--  https://files.deeppavlov.ai/deeppavlov_data/bert/ru_conversational_cased_L-12_H-768_A-12.tar.gz
Connecting to files.deeppavlov.ai (files.deeppavlov.ai)|93.175.29.74|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 660061308 (629M) [application/octet-stream]
Saving to: ‘ru_conversational_cased_L-12_H-768_A-12.tar.gz’


2021-03-05 08:00:54 (7.28 MB/s) - ‘ru_conversational_cased_L-12_H-768_A-12.tar.gz’ saved [660061308/660061308]



In [None]:
PRETR_BERT_PATH = "./ru_conversational_cased_L-12_H-768_A-12"

In [None]:
bert_preprocessor = BertPreprocessor(vocab_file=os.path.join(PRETR_BERT_PATH,"vocab.txt"),
                                     do_lower_case=False,
                                     max_seq_length=256)




In [None]:
input_features = bert_preprocessor(["превет медвед"])
print(input_features[0].tokens)
print(input_features[0].input_ids)
print(input_features[0].input_mask)
print(input_features[0].input_type_ids)

['[CLS]', 'пре', '##вет', 'медвед', '[SEP]']
[101, 932, 977, 24772, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [None]:
bert_classifier = BertClassifierModel(
    n_classes=2,
    return_probas=True,
    one_hot_labels=True,
    bert_config_file=os.path.join(PRETR_BERT_PATH,"bert_config.json"),
    pretrained_bert=os.path.join(PRETR_BERT_PATH,"bert_model.ckpt"),
    save_path="appropriateness_model/model",
    load_path="appropriateness_model/model",
    keep_prob=0.5,
    learning_rate=1e-05,
    learning_rate_drop_patience=5,
    learning_rate_drop_div=2.0
)







The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use keras.layers.Dense instead.
Instructions for updating:
Please use `layer.__call__` method instead.





Instructions for updating:
Use standard file APIs to check for files with this prefix.


2021-03-05 08:01:39.3 INFO in 'deeppavlov.core.models.tf_model'['tf_model'] at line 51: [loading model from /content/appropriateness_model/model]



INFO:tensorflow:Restoring parameters from /content/appropriateness_model/model


# Predicting

In [None]:
text = ["привет как дела","заем вообще нужны инвалиды","люблю качать фильмы на торренте", "торрент это оч плохо, лучше платить авторам за творчество и соблюдать их права"]
y_valid_pred = bert_classifier(bert_preprocessor(text))
y_valid_pred

array([[0.9968267 , 0.00317322],
       [0.2709852 , 0.7290148 ],
       [0.0294681 , 0.9705319 ],
       [0.4898167 , 0.51018333]], dtype=float32)