In [6]:
from transformers import pipeline

# Feature Extraction:
# pipeline("feature-extraction"): This function creates a pipeline for extracting vector representations (features) of input text using pre-trained models.


feature_extraction = pipeline("feature-extraction")
text = "Example text for feature extraction."
features = feature_extraction(text)
print(features)
print("")

# Fill-Mask:
# pipeline("fill-mask"): This function creates a pipeline for filling in masked tokens in a given text. It predicts the most likely token to fill in the mask.

fill_mask = pipeline("fill-mask")
text = "Hugging Face is a French company that is <mask> cutting-edge research in NLP."
result = fill_mask(text)
print(result)
print("")

# Named Entity Recognition (NER):
# pipeline("ner"): This function creates a pipeline for identifying named entities (such as persons, organizations, locations, etc.) in a given text.

ner = pipeline("ner")
text = "Hugging Face is located in New York City and was founded in 2016."
entities = ner(text)
print(entities)
print("")

# Question Answering:
# pipeline("question-answering"): This function creates a pipeline for answering questions based on a given context. It predicts the span of text within the context that answers the question.

question_answering = pipeline("question-answering")
context = "Hugging Face is a company that specializes in natural language processing."
question = "What does Hugging Face specialize in?"
answer = question_answering(question=question, context=context)
print(answer)
print("")

# Sentiment Analysis:
# pipeline("sentiment-analysis"): This function creates a pipeline for analyzing the sentiment of a given text. It predicts whether the sentiment expressed in the text is positive, negative, or neutral.

sentiment_analysis = pipeline("sentiment-analysis")
text = "I love using Hugging Face's Transformers library!"
result = sentiment_analysis(text)
print(result)
print("")

# Summarization:
# pipeline("summarization"): This function creates a pipeline for generating summaries of input text. It generates concise summaries that capture the key points of the input text.

summarization = pipeline("summarization")
text = "Hugging Face provides state-of-the-art natural language processing tools."
summary = summarization(text)
print(summary)
print("")

# Text Generation:
# pipeline("text-generation"): This function creates a pipeline for generating text based on a given prompt. It generates new text continuations based on the provided prompt.

text_generation = pipeline("text-generation")
prompt = "Once upon a time"
generated_text = text_generation(prompt, max_length=50, num_return_sequences=1)
print(generated_text)
print("")

# Translation:
# pipeline("translation"): This function creates a pipeline for translating text from one language to another. It utilizes pre-trained models to perform language translation.

translation = pipeline("translation", model="Helsinki-NLP/opus-mt-en-de")
text = "Hugging Face is an amazing platform for NLP."
translated_text = translation(text)
print(translated_text)
print("")

# Zero-Shot Classification:
# pipeline("zero-shot-classification"): This function creates a pipeline for performing zero-shot classification on input text. It predicts the likelihood of predefined labels being associated with the input text, even if the model has not been trained on specific labels.

zero_shot_classification = pipeline("zero-shot-classification")
sequence = "Hugging Face is a company that specializes in natural language processing."
candidate_labels = ["technology", "finance", "healthcare"]
result = zero_shot_classification(sequence, candidate_labels)
print(result)
print("")

No model was supplied, defaulted to distilbert/distilbert-base-cased and revision 935ac13 (https://huggingface.co/distilbert/distilbert-base-cased).
Using a pipeline without specifying a model name and revision in production is not recommended.
No model was supplied, defaulted to distilbert/distilroberta-base and revision ec58a5b (https://huggingface.co/distilbert/distilroberta-base).
Using a pipeline without specifying a model name and revision in production is not recommended.


[[[0.4770090579986572, 0.17599928379058838, 0.18270587921142578, -0.4303760826587677, -0.5309910178184509, -0.000764035212341696, 0.0831507071852684, 0.07937154918909073, 0.02161201275885105, -1.2887451648712158, -0.4631766378879547, 0.24813146889209747, -0.10897015035152435, 0.11042669415473938, -0.6965030431747437, 0.1450352519750595, 0.37073150277137756, 0.1516527682542801, -0.09352053701877594, -0.30417177081108093, 0.05507732182741165, -0.12694454193115234, 0.820496141910553, -0.21348203718662262, 0.21779288351535797, -0.12388961762189865, 0.4116840660572052, 0.04274265095591545, -0.20346425473690033, 0.5493922233581543, 0.026095395907759666, 0.29597315192222595, 0.1468891054391861, -0.014266920275986195, -0.4403071403503418, 0.06922291964292526, -0.1526719182729721, -0.36499089002609253, -0.09009414911270142, -0.09526676684617996, -0.5085986256599426, 0.10367541760206223, 0.6889033317565918, -0.26731258630752563, 0.22853805124759674, -0.5359692573547363, -0.008591839112341404, 0.

Some weights of the model checkpoint at distilbert/distilroberta-base were not used when initializing RobertaForMaskedLM: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
No model was supplied, defaulted to dbmdz/bert-large-cased-finetuned-conll03-english and revision f2482bf (https://huggingface.co/dbmdz/bert-large-cased-finetuned-conll03-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


[{'score': 0.2739757001399994, 'token': 7909, 'token_str': ' conducting', 'sequence': 'Hugging Face is a French company that is conducting cutting-edge research in NLP.'}, {'score': 0.24179378151893616, 'token': 22653, 'token_str': ' pioneering', 'sequence': 'Hugging Face is a French company that is pioneering cutting-edge research in NLP.'}, {'score': 0.09256937354803085, 'token': 17963, 'token_str': ' undertaking', 'sequence': 'Hugging Face is a French company that is undertaking cutting-edge research in NLP.'}, {'score': 0.049521319568157196, 'token': 11511, 'token_str': ' advancing', 'sequence': 'Hugging Face is a French company that is advancing cutting-edge research in NLP.'}, {'score': 0.04829561337828636, 'token': 8592, 'token_str': ' pursuing', 'sequence': 'Hugging Face is a French company that is pursuing cutting-edge research in NLP.'}]



Some weights of the model checkpoint at dbmdz/bert-large-cased-finetuned-conll03-english were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
No model was supplied, defaulted to distilbert/distilbert-base-cased-distilled-squad and revision 626af31 (https://huggingface.co/distilbert/distilbert-base-cased-distilled-squad).
Using a pipeline without specifying a model name and revision in production is not recommended.


[{'entity': 'I-ORG', 'score': 0.9821485, 'index': 1, 'word': 'Hu', 'start': 0, 'end': 2}, {'entity': 'I-ORG', 'score': 0.8223563, 'index': 2, 'word': '##gging', 'start': 2, 'end': 7}, {'entity': 'I-ORG', 'score': 0.9458477, 'index': 3, 'word': 'Face', 'start': 8, 'end': 12}, {'entity': 'I-LOC', 'score': 0.99934727, 'index': 7, 'word': 'New', 'start': 27, 'end': 30}, {'entity': 'I-LOC', 'score': 0.99916565, 'index': 8, 'word': 'York', 'start': 31, 'end': 35}, {'entity': 'I-LOC', 'score': 0.9994436, 'index': 9, 'word': 'City', 'start': 36, 'end': 40}]



No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


{'score': 0.9714844822883606, 'start': 46, 'end': 73, 'answer': 'natural language processing'}



No model was supplied, defaulted to sshleifer/distilbart-cnn-12-6 and revision a4f8f3e (https://huggingface.co/sshleifer/distilbart-cnn-12-6).
Using a pipeline without specifying a model name and revision in production is not recommended.


[{'label': 'POSITIVE', 'score': 0.9978122711181641}]



Your max_length is set to 142, but your input_length is only 18. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=9)
No model was supplied, defaulted to openai-community/gpt2 and revision 6c0e608 (https://huggingface.co/openai-community/gpt2).
Using a pipeline without specifying a model name and revision in production is not recommended.


[{'summary_text': ' Hugging Face provides state-of-the-art natural language processing tools . Hugging face is the name of Hugging Facsimation, a tool that uses natural language language processing techniques . It is available in the U.S. and Canada for $99.99 .'}]



Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[{'generated_text': "Once upon a time, you might have become obsessed with this little thing known as an 'X'. Now that you've been exposed to this weird and wonderful technology, things can easily get rather confusing. You're often unable to connect to other things and"}]



No model was supplied, defaulted to facebook/bart-large-mnli and revision c626438 (https://huggingface.co/facebook/bart-large-mnli).
Using a pipeline without specifying a model name and revision in production is not recommended.


[{'translation_text': 'Hugging Face ist eine erstaunliche Plattform für NLP.'}]

{'sequence': 'Hugging Face is a company that specializes in natural language processing.', 'labels': ['technology', 'healthcare', 'finance'], 'scores': [0.9871266484260559, 0.007305944804102182, 0.005567406304180622]}



In [2]:
!git clone https://github.com/tejaasreddy001/Beautiful_Soup.git

Cloning into 'Beautiful_Soup'...
remote: Enumerating objects: 3, done.[K
remote: Counting objects: 100% (3/3), done.[K
remote: Total 3 (delta 0), reused 0 (delta 0), pack-reused 0[K
Receiving objects: 100% (3/3), done.


In [3]:
!git init

[33mhint: Using 'master' as the name for the initial branch. This default branch name[m
[33mhint: is subject to change. To configure the initial branch name to use in all[m
[33mhint: [m
[33mhint: 	git config --global init.defaultBranch <name>[m
[33mhint: [m
[33mhint: Names commonly chosen instead of 'master' are 'main', 'trunk' and[m
[33mhint: 'development'. The just-created branch can be renamed via this command:[m
[33mhint: [m
[33mhint: 	git branch -m <name>[m
Initialized empty Git repository in /content/.git/


In [5]:
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

Mounted at /content/drive


In [8]:
!git add '/content/drive/MyDrive/Colab Notebooks/hugging_face.ipynb'

In [10]:
! git commit -m "first commit"

Author identity unknown

*** Please tell me who you are.

Run

  git config --global user.email "you@example.com"
  git config --global user.name "Your Name"

to set your account's default identity.
Omit --global to set the identity only in this repository.

fatal: unable to auto-detect email address (got 'root@94ba0efaa477.(none)')


In [11]:
!git config --global user.email "tejas.mukunda@gmail.com"
!git config --global user.name "tejaasreddy001"

In [12]:
! git commit -m "first commit"

[master (root-commit) 789846c] first commit
 1 file changed, 1 insertion(+)
 create mode 100644 drive/MyDrive/Colab Notebooks/hugging_face.ipynb


In [13]:
! git status

On branch master
Changes not staged for commit:
  (use "git add <file>..." to update what will be committed)
  (use "git restore <file>..." to discard changes in working directory)
	[31mmodified:   drive/MyDrive/Colab Notebooks/hugging_face.ipynb[m

Untracked files:
  (use "git add <file>..." to include in what will be committed)
	[31m.config/[m
	[31mBeautiful_Soup/[m
	[31mdrive/MyDrive/10674168.pdf[m
	[31mdrive/MyDrive/Automatic_Number_Plate_Detection_Recognition_YOLOv8/[m
	[31mdrive/MyDrive/Colab Notebooks/ANNPro2.ipynb[m
	[31mdrive/MyDrive/Colab Notebooks/ANNPrp2NLP.ipynb[m
	[31mdrive/MyDrive/Colab Notebooks/BeautifulSoupWebScrape.ipynb[m
	[31mdrive/MyDrive/Colab Notebooks/Copy of Copy of Sentiment Analysis Group Project 2.ipynb[m
	[31mdrive/MyDrive/Colab Notebooks/Copy of dbgroup0 (1).ipynb[m
	[31mdrive/MyDrive/Colab Notebooks/Copy of dbgroup0.ipynb[m
	[31mdrive/MyDrive/Colab Notebooks/FinalSemResumeFilter.ipynb[m
	[31mdrive/MyDrive/Colab Notebooks/MNIST_&_