In [None]:
!pip -q install transformers sentence_transformers
!pip -q install huggingface_hub langchain

In [3]:
import os
os.environ['HUGGINGFACEHUB_API_TOKEN'] = ''

## Tasks & its models

1) Conversational : facebook/blenderbot-400M-distill

2) Fill-Mask: distilroberta-base


3) Question Answering:
deepset/roberta-base-squad2

4) Sentence Similarity:sentence-transformers/all-MiniLM-L6-v2

5) Summarization:sshleifer/distilbart-cnn-12-6

6) Table Question Answering :
google/tapas-base-finetuned-wtq

7) Text Classification:distilbert-base-uncased-finetuned-sst-2-english

8) Text Generation: gpt2


9) Token Classification: dslim/bert-base-NER


10) Translation :Helsinki-NLP/opus-mt-en-fr

11) Zero-Shot Classification:facebook/bart-large-mnli

## We will see

Langchain LLM can support only Text Generation Tasks. 

Rest of the tasks, we will look at the Transformers pipeline method.

In [None]:
#Transformers Pipeline for Task-1

from transformers import pipeline, Conversation
converse = pipeline("conversational",model = 'facebook/blenderbot-400M-distill')

In [5]:
conversation_1 = Conversation("Going to Mars - any suggestions?")

In [None]:
converse(conversations=[conversation_1])

In [7]:
#HF API inference 
import json
import requests

In [8]:
API_URL = "https://api-inference.huggingface.co/models/distilroberta-base"

In [9]:
API_TOKEN=''

In [10]:
headers = {"Authorization": f"Bearer {API_TOKEN}"}

In [11]:
#Helper function
def query(payload):
    data = json.dumps(payload)
    response = requests.request("POST", 
                                API_URL, 
                                headers=headers, data=data)
    return json.loads(response.content.decode("utf-8"))

In [16]:
data = query("Amsterdam is a <mask> in Netherlands")

In [None]:
data

In [18]:
## Using langchain HuggingFace Hub for text-generation integrations
### Model is gpt2
from langchain import PromptTemplate, HuggingFaceHub, LLMChain

In [22]:
llm=HuggingFaceHub(repo_id="gpt2")

In [None]:
llm("Where are you now?")

In [None]:
#question answering
model_repo = "deepset/roberta-base-squad2"

qa_model = pipeline("question-answering",
                    model=model_repo)

In [29]:
context = "My name is Martian Martin and I live in Mohave Desert."
question = "Where do I live?"

In [None]:
qa_model(question = question, context = context)

In [None]:
#Summarisation

model_repo = 'sshleifer/distilbart-cnn-12-6'

summary_pipe = pipeline('summarization',
                        model=model_repo)

In [33]:
summary_pipe("""
Research papers can be summarized to allow researchers to spend less time selecting which articles to read. There are several approaches you can take for a task like this:

Use an existing extractive summarization model on the Hub to do inference.
Pick an existing language model trained for academic papers. This model can then be trained in a process called fine-tuning so it can solve the summarization task.
Use a sequence-to-sequence model like T5 for abstractive text summarization.
""")

Your max_length is set to 142, but you input_length is only 106. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=53)


[{'summary_text': ' Research papers can be summarized to allow researchers to spend less time selecting which articles to read . There are several approaches you can take for a task like this: Use an existing extractive summarization model on the Hub to do this . Pick an existing language model trained for academic papers .'}]

In [34]:
## Table QnA

from transformers import pipeline
import pandas as pd

# prepare table + question
data = {"Actors": ["Brad Pitt", 
                   "Leonardo Di Caprio", 
                   "George Clooney"], 
        "Number of movies": ["87", 
                             "53", 
                             "69"]}

table = pd.DataFrame.from_dict(data)

In [35]:
question = "how many movies does Leonardo Di Caprio have?"

In [None]:
tqa = pipeline(task="table-question-answering", model="google/tapas-large-finetuned-wtq")

In [38]:
print(tqa(table=table, query=question))

{'answer': 'SUM > 53', 'coordinates': [(1, 1)], 'cells': ['53'], 'aggregator': 'SUM'}


In [None]:
#Text Classification

text_classifier = pipeline("text-classification", 
                      model = "roberta-large-mnli")

NLI model takes a premise and a hypothesis and returns a class that can either be:

entailment: which means the hypothesis is true.

contraction: which means the hypothesis is false.

neutral: which means there's no relation between the hypothesis and the premise.

In [39]:
test_query = "Tin Tin comics are the best written by Herge?"

In [None]:
text_classifier(test_query)

In [46]:
#NER pipeline

ner_classifier = pipeline("ner",
                      model="dslim/bert-base-NER")

In [47]:
ner_classifier("Hello I'm Omar and I live in Zürich.")

[{'entity': 'B-PER',
  'score': 0.99869895,
  'index': 5,
  'word': 'Omar',
  'start': 10,
  'end': 14},
 {'entity': 'B-LOC',
  'score': 0.99772304,
  'index': 10,
  'word': 'Zürich',
  'start': 29,
  'end': 35}]

In [48]:
#Translation
model_checkpoint = "Helsinki-NLP/opus-mt-en-fr"

translator = pipeline("translation", 
                      model=model_checkpoint)


Downloading (…)lve/main/config.json:   0%|          | 0.00/1.42k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/301M [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/42.0 [00:00<?, ?B/s]

Downloading (…)olve/main/source.spm:   0%|          | 0.00/778k [00:00<?, ?B/s]

Downloading (…)olve/main/target.spm:   0%|          | 0.00/802k [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/1.34M [00:00<?, ?B/s]



In [None]:
!pip install sacremoses

In [51]:
translator("Where are you living?")

[{'translation_text': 'Où habites-tu ?'}]

In [52]:
#zero-shot-classification

pipe = pipeline(model="facebook/bart-large-mnli")

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.15k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

In [53]:
pipe("I have a problem with my iphone that needs to be resolved asap!",
    candidate_labels=["urgent", 
                      "not urgent", 
                      "phone", 
                      "tablet", 
                      "computer"],)

{'sequence': 'I have a problem with my iphone that needs to be resolved asap!',
 'labels': ['urgent', 'phone', 'computer', 'not urgent', 'tablet'],
 'scores': [0.5227572321891785,
  0.45814111828804016,
  0.014264623634517193,
  0.002685008803382516,
  0.002152056200429797]}