In [7]:
!pip install transformers



In [8]:
!pip install requests beautifulsoup4 pandas numpy



## Pipeline

In [20]:
from transformers import pipeline

import warnings 
warnings.filterwarnings("ignore")

In [4]:
!pip install 'ipywidgets~=7.7'





### 1. Sentiment Analysis

In [50]:
classifier = pipeline('sentiment-analysis')

No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


In [51]:
classifier("I hate that movie and the cast was bad ")

[{'label': 'NEGATIVE', 'score': 0.9996799230575562}]

In [52]:
classifier2 = pipeline('sentiment-analysis', model='bert-base-uncased')

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [53]:
classifier2("I loved that movie and the cast was good ")

[{'label': 'LABEL_0', 'score': 0.5929425954818726}]

In [54]:
results = classifier(["We are very happy to show you the 🤗 Transformers library.",
           "We hope you don't hate it."])
for i in results:
    print(f"{i['label']}, {round(i['score'],2)}")

POSITIVE, 1.0
NEGATIVE, 0.53


### 2. Generator

In [14]:
generator = pipeline('text-generation')

No model was supplied, defaulted to openai-community/gpt2 and revision 6c0e608 (https://huggingface.co/openai-community/gpt2).
Using a pipeline without specifying a model name and revision in production is not recommended.


In [16]:
print(generator("As far as I am concerned, I will",))

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[{'generated_text': 'As far as I am concerned, I will have no problem getting in the process of creating a working draft, and making it public in the coming months, but I will be taking my time and am not sure of the exact timing.\n\nAs'}]


### 3. Fill Mask

In [17]:
unmasker = pipeline("fill-mask")

No model was supplied, defaulted to distilbert/distilroberta-base and revision ec58a5b (https://huggingface.co/distilbert/distilroberta-base).
Using a pipeline without specifying a model name and revision in production is not recommended.
Some weights of the model checkpoint at distilbert/distilroberta-base were not used when initializing RobertaForMaskedLM: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [20]:
from pprint import pprint
pprint(unmasker(f"HuggingFace is creating a {unmasker.tokenizer.mask_token} that the community uses to solve NLP tasks."))

[{'score': 0.1792750358581543,
  'sequence': 'HuggingFace is creating a tool that the community uses to solve '
              'NLP tasks.',
  'token': 3944,
  'token_str': ' tool'},
 {'score': 0.11349526792764664,
  'sequence': 'HuggingFace is creating a framework that the community uses to '
              'solve NLP tasks.',
  'token': 7208,
  'token_str': ' framework'},
 {'score': 0.05243587866425514,
  'sequence': 'HuggingFace is creating a library that the community uses to '
              'solve NLP tasks.',
  'token': 5560,
  'token_str': ' library'},
 {'score': 0.03493529185652733,
  'sequence': 'HuggingFace is creating a database that the community uses to '
              'solve NLP tasks.',
  'token': 8503,
  'token_str': ' database'},
 {'score': 0.028602492064237595,
  'sequence': 'HuggingFace is creating a prototype that the community uses to '
              'solve NLP tasks.',
  'token': 17715,
  'token_str': ' prototype'}]


### 4. NER

In [21]:
ner_pipe = pipeline("ner")

No model was supplied, defaulted to dbmdz/bert-large-cased-finetuned-conll03-english and revision f2482bf (https://huggingface.co/dbmdz/bert-large-cased-finetuned-conll03-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
Some weights of the model checkpoint at dbmdz/bert-large-cased-finetuned-conll03-english were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [22]:
sequence = """Hugging Face Inc. is a company based in New York City. Its headquarters are in DUMBO,
therefore very close to the Manhattan Bridge which is visible from the window."""

In [23]:
pprint(ner_pipe(sequence))

[{'end': 2,
  'entity': 'I-ORG',
  'index': 1,
  'score': 0.99957865,
  'start': 0,
  'word': 'Hu'},
 {'end': 7,
  'entity': 'I-ORG',
  'index': 2,
  'score': 0.9909764,
  'start': 2,
  'word': '##gging'},
 {'end': 12,
  'entity': 'I-ORG',
  'index': 3,
  'score': 0.9982224,
  'start': 8,
  'word': 'Face'},
 {'end': 16,
  'entity': 'I-ORG',
  'index': 4,
  'score': 0.9994879,
  'start': 13,
  'word': 'Inc'},
 {'end': 43,
  'entity': 'I-LOC',
  'index': 11,
  'score': 0.9994344,
  'start': 40,
  'word': 'New'},
 {'end': 48,
  'entity': 'I-LOC',
  'index': 12,
  'score': 0.99931955,
  'start': 44,
  'word': 'York'},
 {'end': 53,
  'entity': 'I-LOC',
  'index': 13,
  'score': 0.9993794,
  'start': 49,
  'word': 'City'},
 {'end': 80,
  'entity': 'I-LOC',
  'index': 19,
  'score': 0.98625827,
  'start': 79,
  'word': 'D'},
 {'end': 82,
  'entity': 'I-LOC',
  'index': 20,
  'score': 0.9514272,
  'start': 80,
  'word': '##UM'},
 {'end': 84,
  'entity': 'I-LOC',
  'index': 21,
  'score': 0.933

In [24]:
from transformers import AutoTokenizer, AutoModelForTokenClassification
from transformers import pipeline

tokenizer = AutoTokenizer.from_pretrained("Babelscape/wikineural-multilingual-ner")
model = AutoModelForTokenClassification.from_pretrained("Babelscape/wikineural-multilingual-ner")

nlp = pipeline("ner", model=model, tokenizer=tokenizer, grouped_entities=True)
example = """Hugging Face Inc. is a company based in New York City. Its headquarters are in DUMBO,
therefore very close to the Manhattan Bridge which is visible from the window."""

ner_results = nlp(example)
pprint(ner_results)


tokenizer_config.json:   0%|          | 0.00/333 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/996k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.96M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.19k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/709M [00:00<?, ?B/s]

[{'end': 17,
  'entity_group': 'ORG',
  'score': 0.9916129,
  'start': 0,
  'word': 'Hugging Face Inc.'},
 {'end': 53,
  'entity_group': 'LOC',
  'score': 0.99963516,
  'start': 40,
  'word': 'New York City'},
 {'end': 84,
  'entity_group': 'LOC',
  'score': 0.7212909,
  'start': 79,
  'word': 'DUMBO'},
 {'end': 130,
  'entity_group': 'LOC',
  'score': 0.9992337,
  'start': 114,
  'word': 'Manhattan Bridge'}]


### 5. Question Answering

In [25]:
question_answerer = pipeline("question-answering")


No model was supplied, defaulted to distilbert/distilbert-base-cased-distilled-squad and revision 626af31 (https://huggingface.co/distilbert/distilbert-base-cased-distilled-squad).
Using a pipeline without specifying a model name and revision in production is not recommended.


In [26]:
context = r"""
Extractive Question Answering is the task of extracting an answer from a text given a question. An example of a
question answering dataset is the SQuAD dataset, which is entirely based on that task. If you would like to fine-tune
a model on a SQuAD task, you may leverage the examples/pytorch/question-answering/run_squad.py script.
"""

In [27]:
result = question_answerer(question="What is extractive question answering?", context=context)
print(f"Answer: '{result['answer']}', score: {round(result['score'], 4)}, start: {result['start']}, end: {result['end']}")

result = question_answerer(question="What is a good example of a question answering dataset?", context=context)
print(f"Answer: '{result['answer']}', score: {round(result['score'], 4)}, start: {result['start']}, end: {result['end']}")

Answer: 'the task of extracting an answer from a text given a question', score: 0.6177, start: 34, end: 95
Answer: 'SQuAD dataset', score: 0.5152, start: 147, end: 160


In [33]:
from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline

model_name = "deepset/roberta-base-squad2"

context = r"""
Extractive Question Answering is the task of extracting an answer from a text given a question. An example of a
question answering dataset is the SQuAD dataset, which is entirely based on that task. If you would like to fine-tune
a model on a SQuAD task, you may leverage the examples/pytorch/question-answering/run_squad.py script.
"""

# a) Get predictions
nlp = pipeline('question-answering', model=model_name, tokenizer=model_name)
QA_input = {
    'question': 'What is a good example of a question answering dataset?',
    'context': context }
res = nlp(QA_input)

# b) Load model & tokenizer
model = AutoModelForQuestionAnswering.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
print(res)

{'score': 0.4503432512283325, 'start': 147, 'end': 160, 'answer': 'SQuAD dataset'}


In [34]:
context = r"""
Extractive Question Answering is the task of extracting an answer from a text given a question. An example of a
question answering dataset is the SQuAD dataset, which is entirely based on that task. If you would like to fine-tune
a model on a SQuAD task, you may leverage the examples/pytorch/question-answering/run_squad.py script.
"""

# a) Get predictions
nlp = pipeline('question-answering', model=model_name, tokenizer=model_name)
QA_input = {
    'question': 'What is extractive question answering?',
    'context': context }
res = nlp(QA_input)

# b) Load model & tokenizer
model = AutoModelForQuestionAnswering.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
print(res)

{'score': 0.18873782455921173, 'start': 72, 'end': 95, 'answer': 'a text given a question'}


### 6. Summarization

In [55]:
summarizer = pipeline("summarization")

ARTICLE = """ New York (CNN)When Liana Barrientos was 23 years old, she got married in Westchester County, New York.
A year later, she got married again in Westchester County, but to a different man and without divorcing her first husband.
Only 18 days after that marriage, she got hitched yet again. Then, Barrientos declared "I do" five more times, sometimes only within two weeks of each other.
In 2010, she married once more, this time in the Bronx. In an application for a marriage license, she stated it was her "first and only" marriage.
Barrientos, now 39, is facing two criminal counts of "offering a false instrument for filing in the first degree," referring to her false statements on the
2010 marriage license application, according to court documents.
Prosecutors said the marriages were part of an immigration scam.
On Friday, she pleaded not guilty at State Supreme Court in the Bronx, according to her attorney, Christopher Wright, who declined to comment further.
After leaving court, Barrientos was arrested and charged with theft of service and criminal trespass for allegedly sneaking into the New York subway through an emergency exit, said Detective
Annette Markowski, a police spokeswoman. In total, Barrientos has been married 10 times, with nine of her marriages occurring between 1999 and 2002.
All occurred either in Westchester County, Long Island, New Jersey or the Bronx. She is believed to still be married to four men, and at one time, she was married to eight men at once, prosecutors say.
Prosecutors said the immigration scam involved some of her husbands, who filed for permanent residence status shortly after the marriages.
Any divorces happened only after such filings were approved. It was unclear whether any of the men will be prosecuted.
The case was referred to the Bronx District Attorney\'s Office by Immigration and Customs Enforcement and the Department of Homeland Security\'s
Investigation Division. Seven of the men are from so-called "red-flagged" countries, including Egypt, Turkey, Georgia, Pakistan and Mali.
Her eighth husband, Rashid Rajput, was deported in 2006 to his native Pakistan after an investigation by the Joint Terrorism Task Force.
If convicted, Barrientos faces up to four years in prison.  Her next court appearance is scheduled for May 18.
"""

No model was supplied, defaulted to sshleifer/distilbart-cnn-12-6 and revision a4f8f3e (https://huggingface.co/sshleifer/distilbart-cnn-12-6).
Using a pipeline without specifying a model name and revision in production is not recommended.


In [56]:
print(summarizer(ARTICLE))

[{'summary_text': ' Liana Barrientos, 39, is charged with two counts of "offering a false instrument for filing in the first degree" In total, she has been married 10 times, nine of them between 1999 and 2002 . She is believed to still be married to four men, and at one time, she was married to eight men at once .'}]


### 7. Translation

In [35]:
translator = pipeline("translation_en_to_de")

No model was supplied, defaulted to google-t5/t5-base and revision 686f1db (https://huggingface.co/google-t5/t5-base).
Using a pipeline without specifying a model name and revision in production is not recommended.


In [36]:
print(translator("Hugging Face is a technology company based in New York and Paris", max_length=40))

[{'translation_text': 'Hugging Face ist ein Technologieunternehmen mit Sitz in New York und Paris.'}]


# 1.Do something on Real Data (Sentiment Analysis on Yelp - Social Brew Cafe)

In [37]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import requests
from bs4 import BeautifulSoup
import re
import numpy as np
import pandas as pd

## Instantiate Model

In [38]:
tokenizer = AutoTokenizer.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')

model = AutoModelForSequenceClassification.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')

## Encode and Calculate Sentiment

In [59]:
tokens = tokenizer.encode('It was good but couldve been better. Great', return_tensors='pt')

In [60]:
result = model(tokens)

In [61]:
result.logits

tensor([[-2.7768, -1.2353,  1.4419,  1.9804,  0.4584]],
       grad_fn=<AddmmBackward0>)

In [62]:
int(torch.argmax(result.logits))+1

4

## Collect Reviews

In [5]:
import requests
from bs4 import BeautifulSoup
import re

In [11]:
r = requests.get('https://www.yelp.com/biz/social-brew-cafe-pyrmont')
soup = BeautifulSoup(r.text, 'html.parser')
regex = re.compile('.*comment.*')
results = soup.find_all('p', {'class':regex})
reviews = [result.text for result in results]

In [14]:
reviews

['The food was delicious. The ricotta pancakes were light and tasty.  The cream brûlée French toast was a fruit lovers delight. There was fresh strawberries, blueberries, passion fruit and custard. No syrup required. And crispy kale as a side ordered with eggs. Worth the walk over the Pyrmont bridge.',
 "Very cute coffee shop and restaurant. They have a lovely outdoor seating area and several tables inside.  It was fairly busy on a Tuesday morning but we were to grab the last open table. The server was so enjoyable, she chatted and joked with us and provided fast service with our ordering, drinks and meals. The food was very good. We ordered a wide variety and every meal was good to delicious. The sweet potato fries on the Chicken Burger plate were absolutely delicious, some of the best I've ever had. I definitely enjoyed this cafe, the outdoor seating, the service and the food!!",
 "Six of us met here for breakfast before our walk to Manly. We were enjoying visiting with each other so

In [64]:
reviews

["Very cute coffee shop and restaurant. They have a lovely outdoor seating area and several tables inside.  It was fairly busy on a Tuesday morning but we were to grab the last open table. The server was so enjoyable, she chatted and joked with us and provided fast service with our ordering, drinks and meals. The food was very good. We ordered a wide variety and every meal was good to delicious. The sweet potato fries on the Chicken Burger plate were absolutely delicious, some of the best I've ever had. I definitely enjoyed this cafe, the outdoor seating, the service and the food!!",
 "Six of us met here for breakfast before our walk to Manly. We were enjoying visiting with each other so much that I apologize for not taking any photos. We all enjoyed our food, as well as our coffee and tea drinks.We were greeted immediately by a friendly server asking if we would like to sit inside or out. We said we would like inside, but weren't exactly sure how many were joining us yet- at least 4. 

## Load Reviews into DataFrame and Score

In [65]:
df = pd.DataFrame(np.array(reviews), columns=['review'])

In [66]:
df['review'].iloc[0]

"Very cute coffee shop and restaurant. They have a lovely outdoor seating area and several tables inside.  It was fairly busy on a Tuesday morning but we were to grab the last open table. The server was so enjoyable, she chatted and joked with us and provided fast service with our ordering, drinks and meals. The food was very good. We ordered a wide variety and every meal was good to delicious. The sweet potato fries on the Chicken Burger plate were absolutely delicious, some of the best I've ever had. I definitely enjoyed this cafe, the outdoor seating, the service and the food!!"

In [67]:
def sentiment_score(review):
    tokens = tokenizer.encode(review, return_tensors='pt')
    result = model(tokens)
    return int(torch.argmax(result.logits))+1

In [68]:
sentiment_score(df['review'].iloc[1])

4

In [69]:
df['sentiment'] = df['review'].apply(lambda x: sentiment_score(x[:512]))

In [70]:
df

Unnamed: 0,review,sentiment
0,Very cute coffee shop and restaurant. They hav...,4
1,Six of us met here for breakfast before our wa...,4
2,The food was delicious. The ricotta pancakes w...,4
3,Great place with delicious food and friendly s...,5
4,"Great service, lovely location, and really ama...",5
5,Great food amazing coffee and tea. Short walk ...,5
6,Ricotta hot cakes! These were so yummy. I ate ...,5
7,It was ok. Had coffee with my friends. I'm new...,3
8,We came for brunch twice in our week-long visi...,4
9,I came to Social brew cafe for brunch while ex...,5


In [71]:
df['review'].iloc[3]

'Great place with delicious food and friendly staff. It is small but has outdoor seating and a relaxed ambiance. Perfect place to enjoy a cup of coffee. I am visiting Sydney for the first time but this place seems like is a local favorite.'

# 2.Do something on Real Data (Translator + Gradio)

In [15]:
!pip install gradio==3.50



In [16]:
import gradio as gr   

In [17]:
gr.__version__

'3.50.0'

In [18]:
!pip install tf-keras



## Load Up Pipeline

In [21]:
translation_pipeline = pipeline('translation_en_to_de')

No model was supplied, defaulted to google-t5/t5-base and revision 686f1db (https://huggingface.co/google-t5/t5-base).
Using a pipeline without specifying a model name and revision in production is not recommended.


In [22]:
results = translation_pipeline('I love Juventus')

In [23]:
results[0]['translation_text']

'Ich liebe Juventus'

## Create Gradio Function and Interface

In [24]:
def translate_transformers(from_text):
    results = translation_pipeline(from_text)
    return results[0]['translation_text']

In [25]:
translate_transformers('My name is Niloofar')

'Mein Name ist Niloofar'

In [26]:
interface = gr.Interface(
    fn=translate_transformers,
    inputs="text",
    outputs="text",
    title="Text Translation",
    description="Translate text from English to another language using MarianMT.",
    theme="compact"
)
interface.launch(share=True)

Running on local URL:  http://127.0.0.1:7860


KeyboardInterrupt: 

# 3.Do something on Real Data (Summarization Medium)

In [5]:
from transformers import pipeline
from bs4 import BeautifulSoup
import requests

In [6]:
summarizer = pipeline("summarization")

No model was supplied, defaulted to sshleifer/distilbart-cnn-12-6 and revision a4f8f3e (https://huggingface.co/sshleifer/distilbart-cnn-12-6).
Using a pipeline without specifying a model name and revision in production is not recommended.


## Get Blog Post from Medium

In [7]:
!pip install pyopenssl ndg-httpsclient pyasn1

Collecting pyopenssl
  Downloading pyOpenSSL-24.1.0-py3-none-any.whl.metadata (12 kB)
Downloading pyOpenSSL-24.1.0-py3-none-any.whl (56 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.9/56.9 kB[0m [31m170.9 kB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: pyopenssl
Successfully installed pyopenssl-24.1.0


In [10]:
requests.__version__

'2.31.0'

In [26]:
URL = "https://freedium.cfd/https://medium.com/analytics-vidhya/openai-gpt-3-language-models-are-few-shot-learners-82531b3d3122"

In [27]:
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"}

In [28]:
r = requests.get(URL, headers=headers)

In [29]:
r

<Response [200]>

In [30]:
soup = BeautifulSoup(r.text, 'html.parser')
results = soup.find_all(['h1', 'p'])
text = [result.text for result in results]
ARTICLE = ' '.join(text)

In [2]:
ARTICLE

NameError: name 'ARTICLE' is not defined

## Chunk Text

In [32]:
max_chunk = 500

In [33]:
ARTICLE = ARTICLE.replace('.', '')
ARTICLE = ARTICLE.replace('?', '')
ARTICLE = ARTICLE.replace('!', '')

In [1]:
sentences = ARTICLE.split(' ')
current_chunk = 0 
chunks = []
for sentence in sentences:
    if len(chunks) == current_chunk + 1: 
        if len(chunks[current_chunk]) + len(sentence.split(' ')) <= max_chunk:
            chunks[current_chunk].extend(sentence.split(' '))
        else:
            current_chunk += 1
            chunks.append(sentence.split(' '))
    else:
        print(current_chunk)
        chunks.append(sentence.split(' '))

for chunk_id in range(len(chunks)):
    chunks[chunk_id] = ' '.join(chunks[chunk_id])

NameError: name 'ARTICLE' is not defined

In [35]:
len(chunks)

23

In [36]:
chunks[0]

'Bad news \n                We regret to inform you that our account on BuyMeACoffee has been suspended due to a violation of their terms of service This was an unexpected development, and we are currently addressing the matter with utmost priority\n            \n\n        However, our mission at Freedium remains unchanged, and your support is more crucial than ever We are transitioning to Patreon, a platform that aligns with our values and offers us the freedom to share our work with you\n    \n\nPlease join us on Patreon and continue to support our endeavors Your contributions are invaluable to us, and we are committed to delivering the quality content you’ve come to expect from Freedium\n\n\nThank you for your understanding and unwavering support\n\n\nSupport Us on Patreon\n\n\nWarm regards, The Freedium Team\n \n< Go to the original\n OpenAI GPT-3: Language Models are Few-Shot Learners Analytics Vidhya OpenAI recently published a paper describing GPT-3, a deep-learning model for Na

## Summarize Text

In [37]:
res = summarizer(chunks, max_length=120, min_length=30, do_sample=False)

In [38]:
res[0]

{'summary_text': " Freedium's account on BuyMeACoffee has been suspended due to a violation of their terms of service . However, our mission at Freedium remains unchanged, and your support is more crucial than ever . We are transitioning to a platform that aligns with our values and offers us the freedom to share our work with you ."}

# Your Turn

Summarize this article:

URL-test = https://medium.com/analytics-vidhya/openai-gpt-3-language-models-are-few-shot-learners-82531b3d3122


# Persian

In [32]:
from transformers import AutoConfig, AutoTokenizer, AutoModel

# v3.0
model_name_or_path = "HooshvareLab/bert-fa-zwnj-base"
config = AutoConfig.from_pretrained(model_name_or_path)
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

# model = TFAutoModel.from_pretrained(model_name_or_path)  For TF
model = AutoModel.from_pretrained(model_name_or_path)

text = "ما در هوش‌واره معتقدیم با انتقال صحیح دانش و آگاهی، همه افراد میتوانند از ابزارهای هوشمند استفاده کنند. شعار ما هوش مصنوعی برای همه است."
tokenizer.tokenize(text)

Some weights of BertModel were not initialized from the model checkpoint at HooshvareLab/bert-fa-zwnj-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


['ما',
 'در',
 'هوش',
 '[ZWNJ]',
 'واره',
 'معتقدیم',
 'با',
 'انتقال',
 'صحیح',
 'دانش',
 'و',
 'آ',
 '##گاهی',
 '،',
 'همه',
 'افراد',
 'میتوانند',
 'از',
 'ابزارهای',
 'هوشمند',
 'استفاده',
 'کنند',
 '.',
 'شعار',
 'ما',
 'هوش',
 'مصنوعی',
 'برای',
 'همه',
 'است',
 '.']

In [28]:
from __future__ import print_function
import ipywidgets as widgets
from transformers import AutoModelForSequenceClassification, AutoTokenizer, AutoConfig
from transformers import pipeline
tokenizer = AutoTokenizer.from_pretrained("HooshvareLab/bert-fa-base-uncased-sentiment-digikala")
config = AutoConfig.from_pretrained("HooshvareLab/bert-fa-base-uncased-sentiment-digikala")
model = AutoModelForSequenceClassification.from_pretrained("HooshvareLab/bert-fa-base-uncased-sentiment-digikala")

In [29]:
nlp_sentence_classif = pipeline('sentiment-analysis', model="HooshvareLab/bert-fa-base-uncased-sentiment-digikala")


In [30]:
nlp_sentence_classif('خوب بود')

[{'label': 'recommended', 'score': 0.7696138620376587}]

In [31]:
nlp_sentence_classif('وصل نشدن تماس ورودی')

[{'label': 'not_recommended', 'score': 0.8665766716003418}]