# Let's try HuggingFace Transformers NLP Pipelines!


INSTALL LIBRARIES

In [2]:
!pip install transformers



ZERO SHOT CLASSIFICATION

In [7]:
from transformers import pipeline

# Inisialisasi pipeline zero-shot classification dengan model
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

# Input Teks
text = "This is an educational course about using the Transformers library in data science."

# label
candidate_labels = ["learning", "education", "business", "politics"]

# klasifikasi
result = classifier(text, candidate_labels=candidate_labels, multi_label=True)

# print output
print(f"Text: {result['sequence']}")
print("Labels and Scores:")
for label, score in zip(result['labels'], result['scores']):
    print(f"- {label}: {score:.4f}")


Text: This is an educational course about using the Transformers library in data science.
Labels and Scores:
- learning: 0.9983
- education: 0.9968
- business: 0.0003
- politics: 0.0001


In [9]:
from transformers import pipeline

# Inisialisasi pipeline dengan model
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

# Input teks dan label kandidat
text = "Cryptocurrency is a digital or virtual form of money that uses cryptography for security, making it hard to counterfeit or double-spend. Unlike traditional currencies, cryptocurrencies operate on a decentralized network based on blockchain technology."
candidate_labels = ["finance", "technology", "investment", "economics", "risk management"]

# Klasifikasi
result = classifier(text, candidate_labels=candidate_labels, multi_label=True)

# Cetak output
print(f"Text: {result['sequence']}")
print("Labels and Scores:")
for label, score in zip(result['labels'], result['scores']):
    print(f"- {label}: {score:.4f}")


Text: Cryptocurrency is a digital or virtual form of money that uses cryptography for security, making it hard to counterfeit or double-spend. Unlike traditional currencies, cryptocurrencies operate on a decentralized network based on blockchain technology.
Labels and Scores:
- technology: 0.9643
- finance: 0.6042
- risk management: 0.5086
- investment: 0.0483
- economics: 0.0444


TEXT GENERATION

In [15]:
from transformers import pipeline

# Inisialisasi pipeline untuk generasi teks
generator = pipeline("text-generation", model="gpt2")

# Menghasilkan teks
result = generator(
    "In this bootcamp, we will teach you how to"
)

# Menampilkan hasil
for output in result:
    print(output['generated_text'])


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In this bootcamp, we will teach you how to create software that will be used in your home to create and analyze data about users' use of devices and how we will analyze and manage your data for real-time, predictive analytics.




In [26]:
from transformers import pipeline

# Inisialisasi pipeline untuk generasi teks
generator = pipeline("text-generation", model="distilgpt2")

# Menghasilkan teks dengan pemangkasan
result = generator(
    "In this place, we will guide you to",
    max_length=20,
    num_return_sequences=2, #menambahkan 2 variasi ouput
    truncation=True
)

# Menampilkan hasil
for output in result:
    print(output['generated_text'])


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In this place, we will guide you to the biggest city and it will help you to understand if
In this place, we will guide you to the things we can do to bring you best.�


FILL-MASK

In [28]:
from transformers import pipeline

# Initialize
unmasker = pipeline("fill-mask", model="roberta-large")

# text
text = "Jobs in the field of <mask> and machine learning have a bright future."

# Prediction
results = unmasker(text, top_k=3)

# Output
for result in results:
    print(f"Predicted Word: {result['token_str']}, Score: {result['score']:.4f}, Complete Sentence: {result['sequence']}")


config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Predicted Word:  AI, Score: 0.8606, Complete Sentence: Jobs in the field of AI and machine learning have a bright future.
Predicted Word:  robotics, Score: 0.0414, Complete Sentence: Jobs in the field of robotics and machine learning have a bright future.
Predicted Word:  automation, Score: 0.0124, Complete Sentence: Jobs in the field of automation and machine learning have a bright future.


NER

In [40]:
from transformers import pipeline
# Initialize
ner = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english", grouped_entities=True)
# text
ner("My name is roy and I work at Google in america.")

Some weights of the model checkpoint at dbmdz/bert-large-cased-finetuned-conll03-english were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[{'entity_group': 'PER',
  'score': 0.944896,
  'word': 'roy',
  'start': 11,
  'end': 14},
 {'entity_group': 'ORG',
  'score': 0.9987526,
  'word': 'Google',
  'start': 29,
  'end': 35},
 {'entity_group': 'LOC',
  'score': 0.9365749,
  'word': '##eric',
  'start': 41,
  'end': 45}]

QUESTION-ANSWERING

In [42]:
from transformers import pipeline

# Initialize the question-answering pipeline
qa_pipeline = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")

# Updated context and question
context = "Artificial Intelligence and Machine Learning can analyze data patterns to make predictions about future events."
question = "Can AI and Machine Learning predict the future?"

# Get the answer
result = qa_pipeline(question=question, context=context)
print(f"Answer: {result['answer']}")


Answer: can analyze data patterns to make predictions about future events.


SENTIMENT ANALYSIS

In [45]:
from transformers import pipeline

# Initialize
classifier = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")

# Text for analysis
result = classifier("KFC chicken looks really delicious, but I am already full.")

# Display the results
print(result)


config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

[{'label': 'POSITIVE', 'score': 0.9987543821334839}]


SUMMARIZATION

In [49]:
from transformers import pipeline

# Inisialisasi
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

# Teks Kalimat
text = """
The job market is undergoing a significant transformation, largely driven by the advancements in Artificial Intelligence (AI) and Machine Learning (ML). These cutting-edge technologies are not only reshaping traditional industries but are also creating entirely new sectors and job roles that did not exist a decade ago. As organizations increasingly rely on data-driven decision-making, the demand for skilled professionals who can develop, implement, and manage AI and ML systems is surging.

Educational institutions worldwide are recognizing this trend and are rapidly adapting their curricula to meet the needs of the evolving job market. Universities are now offering specialized degrees and programs in AI and ML, equipping students with the theoretical knowledge and practical skills required to excel in these fields. In addition, many online platforms are providing accessible learning resources, enabling individuals to upskill or transition into AI-related careers.

Countries such as China and India are leading the charge in AI and ML education, graduating a significantly higher number of qualified engineers compared to the United States. This discrepancy highlights a pressing need for America to bolster its output of tech-savvy graduates. As companies across various sectors, including healthcare, finance, manufacturing, and logistics, begin to incorporate AI solutions to enhance efficiency and innovation, the talent gap in AI and ML is becoming increasingly critical.

The future job landscape is expected to be heavily influenced by AI and ML capabilities. Emerging roles such as AI ethicists, data scientists, machine learning engineers, and automation specialists are set to dominate the employment scene. Furthermore, established positions in traditional sectors will also evolve, requiring current professionals to adopt AI tools and methodologies in their work. The integration of AI into everyday business practices will not only enhance productivity but will also create a more competitive job market, necessitating ongoing education and skill development.

Overall, the opportunities in AI and ML are vast and varied, presenting a promising future for those willing to invest in their education and embrace the change. As we move forward, the continuous evolution of technology will undoubtedly shape the workforce, leading to exciting new possibilities for innovation and career advancement.
"""

# Melakukan ringkasan
summary = summarizer(text, max_length=130, min_length=30, do_sample=False)
print(summary[0]['summary_text'])


The job market is undergoing a significant transformation, largely driven by the advancements in Artificial Intelligence (AI) and Machine Learning (ML) Universities are now offering specialized degrees and programs in AI and ML, equipping students with the theoretical knowledge and practical skills required to excel in these fields.


TRANSLATION

In [66]:
from transformers import pipeline

translator = pipeline("translation", model="Helsinki-NLP/opus-mt-id-en")

text_to_translate = "aku Lapar sekali, aku mau makan ramen"
result = translator(text_to_translate)

print(result[0]['translation_text'])

I'm so hungry, I want to eat ramen


#ANALISIS##

##ZERO-SHOT CLASSIFICATION
Zero-Shot Classification menurut saya merupakan metode yang memungkinkan model mengklasifikasikan teks ke kategori baru tanpa pelatihan khusus. Metode ini efisien, fleksibel, dan dapat digunakan di berbagai bidang seperti pemasaran dan analisis media sosial. Secara keseluruhan, ini adalah cara inovatif untuk menangani klasifikasi teks.

##TEXT-GENERATION
Text-Generation menurut saya merupakan metode dalam kecerdasan buatan yang memungkinkan mesin untuk menghasilkan konten tertulis secara otomatis. Menggunakan model berbasis pembelajaran mendalam, seperti GPT (Generative Pre-trained Transformer), generasi teks mampu memproduksi kalimat atau paragraf yang relevan berdasarkan input yang diberikan.

##FILL-MASK
Fill-Mask menurut saya merupakan metode pemrosesan bahasa alami yang memprediksi kata yang hilang dalam sebuah kalimat. Metode ini menggunakan model bahasa yang telah dilatih sebelumnya untuk memahami konteks dan menyarankan kata yang paling mungkin sesuai dengan teks yang diberikan.

##NER
Named Entity Recognition (NER) menurut saya merupakan metode dalam pemrosesan bahasa alami (NLP) yang digunakan untuk mengidentifikasi dan mengklasifikasikan entitas dalam teks, seperti nama orang, organisasi, dan lokasi. Tujuan NER adalah mengekstrak informasi relevan untuk memudahkan analisis data dan pengorganisasian konten.

##QUESTION-ANSWERING
Question-Answering (QA) menurut saya merupakan metode dalam pemrosesan bahasa alami (NLP) yang bertujuan untuk memberikan jawaban atas pertanyaan yang diajukan oleh pengguna, berdasarkan konteks atau teks yang diberikan. Model QA dapat digunakan untuk menjawab berbagai jenis pertanyaan, dari yang sederhana hingga yang kompleks.

##SENTIMENT ANALYSIS
Sentiment Analysis menurut saya merupakan metode untuk menentukan dan mengkategorikan opini atau perasaan dari teks, apakah positif, negatif, atau netral. Ini sering digunakan dalam pemrosesan bahasa alami (NLP) untuk memahami bagaimana orang merasa tentang suatu produk, layanan, atau topik tertentu.

##SUMMARIZATION
Summarization menurut saya merupakan metode yang sangat berguna dalam dunia informasi yang semakin kompleks, memberikan cara efisien untuk menyaring dan memahami konten. Dengan kemajuan dalam teknologi pembelajaran mesin dan NLP, proses ini akan semakin canggih, memungkinkan lebih banyak aplikasi dan integrasi dalam berbagai bidang.


##TRANSLATION
Translation menurut saya merupakan metode dalam proses mengalihkan makna dari satu bahasa ke bahasa lain. Tujuan utama penerjemahan adalah untuk membuat konten yang dapat diakses dan dipahami oleh pembaca yang berbicara bahasa target, sambil mempertahankan makna, nuansa, dan konteks asli dari teks sumber.