***11.Implement a text classification program using the Naive Bayes algorithm to classify text documents into categories.***

In [13]:
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn import metrics

documents = [
    {'text': 'This is a positive document', 'label': 'positive'},
    {'text': 'Negative sentiment in this one', 'label': 'negative'},
    {'text': 'Spam! You won a prize!', 'label': 'spam'},
    {'text': 'Another positive example', 'label': 'positive'},
    {'text': 'More negative text for testing', 'label': 'negative'},
    {'text': 'Congratulations! You are selected as a winner!', 'label': 'spam'},
]

texts = [doc['text'] for doc in documents]
labels = [doc['label'] for doc in documents]

text_train, text_test, label_train, label_test = train_test_split(texts, labels, test_size=0.2, random_state=42)

vectorizer = CountVectorizer()
X_train = vectorizer.fit_transform(text_train)
X_test = vectorizer.transform(text_test)

classifier = MultinomialNB()
classifier.fit(X_train, label_train)

predictions = classifier.predict(X_test)

print("Classification Report:")
print(metrics.classification_report(label_test, predictions))
print("\nConfusion Matrix:")
print(metrics.confusion_matrix(label_test, predictions))

Classification Report:
              precision    recall  f1-score   support

    negative       1.00      1.00      1.00         1
    positive       1.00      1.00      1.00         1

    accuracy                           1.00         2
   macro avg       1.00      1.00      1.00         2
weighted avg       1.00      1.00      1.00         2


Confusion Matrix:
[[1 0]
 [0 1]]


***12.Write a Python program using the Spacy library to perform Namedd Entity Recognition forclassifying named entities in text,such as names of people,organizations,locations,dates and more on a given sentence"BarackObama was the 44th President of the United States,and he was born in Honolulu,Hawaii".***

In [1]:
import spacy

nlp = spacy.load("en_core_web_sm")

sentence = "Barack Obama was the 44th President of the United States, and he was born in Honolulu, Hawaii."

doc = nlp(sentence)

print("Named Entities:")
for ent in doc.ents:
    print(f"{ent.text}: {ent.label_}")

Named Entities:
Barack Obama: PERSON
44th: ORDINAL
the United States: GPE
Honolulu: GPE
Hawaii: GPE


***13.Develop a program for sentiment analysis using textblob library,to the sentences"I love this product!It's amazing.","The weather is terrible today."***

In [2]:
pip install textblob



In [3]:
from textblob import TextBlob

def analyze_sentiment(text):
    blob = TextBlob(text)

    polarity = blob.sentiment.polarity

    if polarity > 0:
        return "Positive"
    elif polarity < 0:
        return "Negative"
    else:
        return "Neutral"

sentence1 = "I love this product! It's amazing."
sentence2 = "The weather is terrible today."

result1 = analyze_sentiment(sentence1)
result2 = analyze_sentiment(sentence2)

print(f"Sentence 1: {result1}")
print(f"Sentence 2: {result2}")

Sentence 1: Positive
Sentence 2: Negative


***14.Write a Python program to perform extractive text summarization on a longer text document with libraries NLTK or Genism.***

In [12]:
import nltk
from nltk.tokenize import sent_tokenize
from nltk.corpus import stopwords
from nltk.probability import FreqDist
from nltk.tokenize.treebank import TreebankWordDetokenizer

nltk.download('punkt')
nltk.download('stopwords')

def extractive_summarization(document, num_sentences=3):
    sentences = sent_tokenize(document)

    stop_words = set(stopwords.words('english'))
    words = [word.lower() for sentence in sentences for word in nltk.word_tokenize(sentence) if word.isalnum() and word.lower() not in stop_words]

    word_freq = FreqDist(words)
    sentence_scores = {sentence: sum(word_freq[word] for word in nltk.word_tokenize(sentence) if word.isalnum() and word.lower() not in stop_words) for sentence in sentences}

    top_sentences = sorted(sentences, key=lambda sentence: sentence_scores[sentence], reverse=True)[:num_sentences]
    summary = TreebankWordDetokenizer().detokenize(top_sentences)

    return summary

document = """
Natural language processing (NLP) is a subfield of artificial intelligence (AI) that focuses on the interaction between computers and humans through natural language. NLP technologies are used to process, analyze, and understand large amounts of natural language data.

One of the primary applications of NLP is sentiment analysis, which determines the sentiment or emotional tone of a piece of text. Sentiment analysis is widely used in social media monitoring, customer feedback analysis, and brand reputation management.

Text summarization is another important NLP task. Extractive summarization involves selecting a subset of sentences from a text to create a shorter version that retains the most critical information. Abstractive summarization, on the other hand, generates a summary by paraphrasing and rephrasing the original text. The extractive summarization method typically involves the following steps:

1. Sentence Tokenization: Divide the text into individual sentences.

2. Text Preprocessing: Remove stopwords and punctuation, and convert words to lowercase.

3. Calculate Sentence Scores: Assign scores to sentences based on their importance.

4. Select Top Sentences: Choose sentences with the highest scores to form the summary.
"""
num_sentences_in_summary = 3

result_summary = extractive_summarization(document, num_sentences=num_sentences_in_summary)
print(result_summary)

Extractive summarization involves selecting a subset of sentences from a text to create a shorter version that retains the most critical information. One of the primary applications of NLP is sentiment analysis, which determines the sentiment or emotional tone of a piece of text. 
Natural language processing (NLP) is a subfield of artificial intelligence (AI) that focuses on the interaction between computers and humans through natural language.


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


15.Create a Python program using Spacy library and English model"en_core_web_sm" to perform dependency parsing on a given sentence"The cat sat on the mat.","She quickly ran to catch the bus.".

In [5]:
import spacy

nlp = spacy.load("en_core_web_sm")

sentences = ["The cat sat on the mat.", "She quickly ran to catch the bus."]


for sentence in sentences:

    doc = nlp(sentence)


    print("\nOriginal Sentence:", sentence)
    for token in doc:
        print(f"{token.text} --({token.dep_})--> {token.head.text}")



Original Sentence: The cat sat on the mat.
The --(det)--> cat
cat --(nsubj)--> sat
sat --(ROOT)--> sat
on --(prep)--> sat
the --(det)--> mat
mat --(pobj)--> on
. --(punct)--> sat

Original Sentence: She quickly ran to catch the bus.
She --(nsubj)--> ran
quickly --(advmod)--> ran
ran --(ROOT)--> ran
to --(aux)--> catch
catch --(advcl)--> ran
the --(det)--> bus
bus --(dobj)--> catch
. --(punct)--> ran


***16.Implement a Python program using the Spacy library to perform NAmed Entity Recognition on a given sentences "Apple Inc. is headquartered in Cupertino,California, and its CEO,Tim Cook,often delivers keynote speeches.","The Eiffel Tower in Paris,France,is a popular tourist attraction.".***

In [6]:
import spacy


nlp = spacy.load("en_core_web_sm")


sentences = [
    "Apple Inc. is headquartered in Cupertino, California, and its CEO, Tim Cook, often delivers keynote speeches.",
    "The Eiffel Tower in Paris, France, is a popular tourist attraction."
]


for sentence in sentences:

    doc = nlp(sentence)


    print("\nOriginal Sentence:", sentence)
    for ent in doc.ents:
        print(f"Entity: {ent.text}, Label: {ent.label_}")


Original Sentence: Apple Inc. is headquartered in Cupertino, California, and its CEO, Tim Cook, often delivers keynote speeches.
Entity: Apple Inc., Label: ORG
Entity: Cupertino, Label: GPE
Entity: California, Label: GPE
Entity: Tim Cook, Label: PERSON

Original Sentence: The Eiffel Tower in Paris, France, is a popular tourist attraction.
Entity: The Eiffel Tower, Label: FAC
Entity: Paris, Label: GPE
Entity: France, Label: GPE


***17.Create a Python program for abstractive text summarization ,a more advanced technique that generates summaries by rewriting the content in a human-readable form.Compose using following sentence"The World Health Organization plays a vital role in global health.WHO is headquartered in Geneva Switzerland,and it is responsible for coordinating international efforts to control and prevent the spread of diseases?Its mission is to promote and protect the health of people worldwide."***

In [7]:
pip install transformers



In [8]:
from transformers import pipeline, GPT2Tokenizer, GPT2LMHeadModel

def
    model_name = "gpt2"
    model = GPT2LMHeadModel.from_pretrained(model_name)
    tokenizer = GPT2Tokenizer.from_pretrained(model_name)


    inputs = tokenizer(text, return_tensors="pt", max_length=1024, truncation=True)
    summary_ids = model.generate(inputs["input_ids"], max_length=150, length_penalty=2.0, num_beams=4, early_stopping=True)
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)

    return summary


input_text = "The World Health Organization (WHO) plays a vital role in global health. WHO is headquartered in Geneva, Switzerland, and it is responsible for coordinating international efforts to control and prevent the spread of diseases? Its mission is to promote and protect the health of people worldwide."

summary = generate_abstractive_summary(input_text)
print(summary)


config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


The World Health Organization (WHO) plays a vital role in global health. WHO is headquartered in Geneva, Switzerland, and it is responsible for coordinating international efforts to control and prevent the spread of diseases? Its mission is to promote and protect the health of people worldwide.

The World Health Organization (WHO) plays a vital role in global health. WHO is headquartered in Geneva, Switzerland, and it is responsible for coordinating international efforts to control and prevent the spread of diseases? Its mission is to promote and protect the health of people worldwide. The World Health Organization (WHO) is responsible for coordinating international efforts to control and prevent the spread of diseases? Its mission is to promote and protect the health of people worldwide. The World Health Organization (WHO)


***18.Develop a Python program that performs Named Entity Recognition on a given text using popular libraries or models for the sentence"The capital of France is Paris,and its known for the Eiffel Tower."***

In [9]:
import spacy

def perform_ner(text):

    nlp = spacy.load("en_core_web_sm")

    doc = nlp(text)


    entities = [(entity.text, entity.label_) for entity in doc.ents]

    return entities


input_text = "The capital of France is Paris, and it's known for the Eiffel Tower."


ner_results = perform_ner(input_text)


for entity, label in ner_results:
    print(f"Entity: {entity}, Label: {label}")


Entity: France, Label: GPE
Entity: Paris, Label: GPE
Entity: the Eiffel Tower, Label: FAC


19.Create a Python program for sentiment analysis which can be positive,negative,or neutralon a set of text data" I love this product! It's amazing."

In [15]:
from transformers import pipeline

def analyze_sentiment(text):
    sentiment_pipeline = pipeline("sentiment-analysis")


    result = sentiment_pipeline(text)

    return result[0]

input_text = "I love this product! It's amazing."

sentiment_result = analyze_sentiment(input_text)

print(f"Sentiment: {sentiment_result['label']}, Confidence: {sentiment_result['score']}")


No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


Sentiment: POSITIVE, Confidence: 0.9998866319656372


***20.Develop a Python progrsm for dependency parsing of a sentence using a dependency parser to the sentences"John and Mary went to the store.","The big brown dog chased the small black cat.".***

In [11]:
import spacy

def dependency_parsing(sentence):
    nlp = spacy.load("en_core_web_sm")

    doc = nlp(sentence)

    dependencies = [(token.text, token.dep_, token.head.text) for token in doc]

    return dependencies

sentence1 = "John and Mary went to the store."
sentence2 = "The big brown dog chased the small black cat."

dependencies1 = dependency_parsing(sentence1)
dependencies2 = dependency_parsing(sentence2)

print("Dependencies for Sentence 1:")
for dep in dependencies1:
    print(f"{dep[0]} - {dep[1]} - {dep[2]}")

print("\nDependencies for Sentence 2:")
for dep in dependencies2:
    print(f"{dep[0]} - {dep[1]} - {dep[2]}")


Dependencies for Sentence 1:
John - nsubj - went
and - cc - John
Mary - conj - John
went - ROOT - went
to - prep - went
the - det - store
store - pobj - to
. - punct - went

Dependencies for Sentence 2:
The - det - dog
big - amod - dog
brown - amod - dog
dog - nsubj - chased
chased - ROOT - chased
the - det - cat
small - amod - cat
black - amod - cat
cat - dobj - chased
. - punct - chased
