In [1]:
from engine import Engine
from pipelines import NLPPipeline, Pipeline
from db import FirestoreDB
from db.models import News

from firebase_admin import initialize_app
from firebase_admin.credentials import Certificate
from security import Credentials

from transformers import AutoTokenizer, AutoModelForSequenceClassification
from nlp.models import NLPModel
from nlp.processing import (
    NLPPostProcessor,
    NLPTokenizer,
)

from concurrent.futures import ThreadPoolExecutor

from services import NewsService, NLPService, service_provider

ModuleNotFoundError: No module named 'apscheduler'

In [2]:
news_service = service_provider(NewsService)
print(news_service)

<services.news_service.NewsService object at 0x00000204AEFAD390>


In [3]:
service_provider.add(
    NLPService(
        model = NLPModel(
            model=AutoModelForSequenceClassification.from_pretrained(
                "mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis"
            ),
            is_auto_model=True,
        ),
        tokenizer = NLPTokenizer(
            tokenizer=AutoTokenizer.from_pretrained("mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis")
        ),
        postprocessor = NLPPostProcessor(
        chain=[
            "set",
        ]
    ))
)

Service added: [<services.nlp_service.NLPService object at 0x00000204AEFC00D0>]


In [4]:
nlp_service = service_provider(NLPService)
print(nlp_service)

<services.nlp_service.NLPService object at 0x00000204AEFC00D0>


In [5]:
app = initialize_app(Certificate(Credentials("firebase").load()))
db = FirestoreDB(app)


Trying to open credentials: [c:\Users\tomsr\Documents\School\aidi\student projects\crypto_bot\server/keys.json]


In [6]:
nlp_pipeline_news_fetch = NLPPipeline(
    news_service.get,
    name = "nlp_pipeline_news_fetch"
)

In [7]:
nlp_pipelines_news_tokenize = NLPPipeline(
    nlp_service.tokenize,
    name = "nlp_pipeline_news_tokenize"
)

In [8]:
nlp_pipelines_news_predict = NLPPipeline(
    nlp_service.predict,
    name = "nlp_pipeline_news_predict"
)

In [9]:
nlp_pipelines_news_postprocess = NLPPipeline(
    nlp_service.postprocess,
    name = "nlp_pipeline_news_postprocess"
)

In [11]:
context = {"db": db, "pool": ThreadPoolExecutor(4, "ctb")}

In [12]:
engine = Engine(context, [
    nlp_pipeline_news_fetch, 
    nlp_pipelines_news_tokenize,
    nlp_pipelines_news_predict,
    nlp_pipelines_news_postprocess,
])

In [13]:
result = engine.run_sequential()
result

Fetching articles


[NLPDataFrame] Loading data from API Inputs
nlp_pipeline_news_fetch executed
[NLPTokenizer(device=cuda:0, tokenizer=RobertaTokenizerFast(name_or_path='mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis', vocab_size=50265, model_max_length=512, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'bos_token': '<s>', 'eos_token': '</s>', 'unk_token': '<unk>', 'sep_token': '</s>', 'pad_token': '<pad>', 'cls_token': '<s>', 'mask_token': '<mask>'}, clean_up_tokenization_spaces=True),  added_tokens_decoder={
	0: AddedToken("<s>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	1: AddedToken("<pad>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	2: AddedToken("</s>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	3: AddedToken("<unk>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	50264: AddedToken("<mask>", rstrip=Fals

Tokenizing: 100%|██████████| 100/100 [00:00<00:00, 1386.98it/s]


nlp_pipeline_news_tokenize executed
[CryptoBotNLPModel(device=cuda:0, model=RobertaForSequenceClassification(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-5): 6 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (

Predicting: 100%|██████████| 100/100 [00:02<00:00, 49.23it/s]


nlp_pipeline_news_predict executed
KWARGS {'db': <db.firestore.FirestoreDB object at 0x00000204B05D3910>, 'pool': <concurrent.futures.thread.ThreadPoolExecutor object at 0x00000204B05D26D0>, 'nlp_df': <nlp.processing.nlp_dataframe.NLPDataFrame object at 0x00000204AEB5B710>, 'tensors': [{'input_ids': tensor([[    0,   133, 24296,  2306,  8518,    17,    27,    29,  7447, 36168,
          1534,    11, 19374,  1721,    83,   987,   755,   837,    40,  6307,
            10,   251,    12, 12364,  2625,    81,   549,  5369,  5825,   269,
            16, 43084, 13806, 27439,     6, 31569,     9,  8518,     4,   302,
            17,    27,    29,  1273,  7576,  4976,     5, 27615,    13,   258,
          2380,     4,     2]], device='cuda:0'), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1]], device='cuda:0')}, {'input_ids': tensor([[    0,

{'db': <db.firestore.FirestoreDB at 0x204b05d3910>,
 'pool': <concurrent.futures.thread.ThreadPoolExecutor at 0x204b05d26d0>,
 'nlp_df': <nlp.processing.nlp_dataframe.NLPDataFrame at 0x204aeb5b710>,
 'tensors': [{'input_ids': tensor([[    0,   133, 24296,  2306,  8518,    17,    27,    29,  7447, 36168,
             1534,    11, 19374,  1721,    83,   987,   755,   837,    40,  6307,
               10,   251,    12, 12364,  2625,    81,   549,  5369,  5825,   269,
               16, 43084, 13806, 27439,     6, 31569,     9,  8518,     4,   302,
               17,    27,    29,  1273,  7576,  4976,     5, 27615,    13,   258,
             2380,     4,     2]], device='cuda:0'),
   'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
            1, 1, 1, 1, 1]], device='cuda:0')},
  {'input_ids': tensor([[    0,  7608,  8518, 20311,    17,    27,    90, 16054,

In [14]:
context.update(result)
context

{'db': <db.firestore.FirestoreDB at 0x204b05d3910>,
 'pool': <concurrent.futures.thread.ThreadPoolExecutor at 0x204b05d26d0>,
 'nlp_df': <nlp.processing.nlp_dataframe.NLPDataFrame at 0x204aeb5b710>,
 'tensors': [{'input_ids': tensor([[    0,   133, 24296,  2306,  8518,    17,    27,    29,  7447, 36168,
             1534,    11, 19374,  1721,    83,   987,   755,   837,    40,  6307,
               10,   251,    12, 12364,  2625,    81,   549,  5369,  5825,   269,
               16, 43084, 13806, 27439,     6, 31569,     9,  8518,     4,   302,
               17,    27,    29,  1273,  7576,  4976,     5, 27615,    13,   258,
             2380,     4,     2]], device='cuda:0'),
   'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
            1, 1, 1, 1, 1]], device='cuda:0')},
  {'input_ids': tensor([[    0,  7608,  8518, 20311,    17,    27,    90, 16054,