***21.Extract_Keywords__using_TF-IDF***

In [1]:
from sklearn.feature_extraction.text import TfidfVectorizer

sentences = [
    "Artificial intelligence (AI) is a field of computer science.",
    "Machine learning is a subset of AI that focuses on training models to make predictions.",
    "Deep learning is a type of machine learning that uses neural networks with multiple layers.",
    "Neural networks are composed of interconnected nodes called neurons.",
    "Recurrent neural networks (RNNs) are commonly used in natural language processing tasks.",
]

tfidf_vectorizer = TfidfVectorizer(stop_words='english')

tfidf_matrix = tfidf_vectorizer.fit_transform(sentences)

feature_names = tfidf_vectorizer.get_feature_names_out()

key_phrases = []
for i, sentence in enumerate(sentences):
    scores = tfidf_matrix[i].toarray().flatten()
    top_indices = scores.argsort()[-3:][::-1]
    top_words = [feature_names[idx] for idx in top_indices]
    key_phrases.append(top_words)

for i, key_phrase in enumerate(key_phrases):
    print(f"Sentence {i + 1}: Key Phrases - {', '.join(key_phrase)}")


Sentence 1: Key Phrases - intelligence, artificial, science
Sentence 2: Key Phrases - models, predictions, focuses
Sentence 3: Key Phrases - learning, deep, layers
Sentence 4: Key Phrases - nodes, called, composed
Sentence 5: Key Phrases - language, tasks, commonly


***22.Entity_mention_using_wikipediaapi***

In [2]:
!pip install wikipedia-api
!pip install requests

Collecting wikipedia-api
  Downloading Wikipedia_API-0.6.0-py3-none-any.whl (14 kB)
Installing collected packages: wikipedia-api
Successfully installed wikipedia-api-0.6.0


In [37]:
import wikipediaapi
import requests

sentences = [
    "Apple is a leading tech company.",
    "I love apples as a fruit.",
    "Python is a popular programming language.",
    "The python is a non-venomous snake."
]

headers = {
    'User-Agent': 'My_User_Agent/1.0 (Your_Name_or_Application_Name)'
}

wiki_wiki = wikipediaapi.Wikipedia('en', headers=headers)

def disambiguate_entities(sentences):
    for sentence in sentences:
        words = sentence.split()
        for word in words:
            page = wiki_wiki.page(word)
            if page.exists():
                print(f"Entity Mention: {word}")
                print(f"Corresponding Wikipedia Entity: {page.fullurl}")
                print(f"Summary: {page.summary}")
                print("")

disambiguate_entities(sentences)

Entity Mention: Apple
Corresponding Wikipedia Entity: https://en.wikipedia.org/wiki/Apple
Summary: An apple is a round, edible fruit produced by an apple tree (Malus spp., among them the domestic or orchard apple; Malus domestica). Apple trees are cultivated worldwide and are the most widely grown species in the genus Malus. The tree originated in Central Asia, where its wild ancestor, Malus sieversii, is still found. Apples have been grown for thousands of years in Asia and Europe and were introduced to North America by European colonists. Apples have religious and mythological significance in many cultures, including Norse, Greek, and European Christian tradition.
Apples grown from seed tend to be very different from those of their parents, and the resultant fruit frequently lacks desired characteristics. For commercial purposes, including botanical evaluation, apple cultivars are propagated by clonal grafting onto rootstocks. Apple trees grown without rootstocks tend to be larger an

***23.Access_Wordnet***

In [4]:
import nltk
nltk.download('wordnet')

[nltk_data] Downloading package wordnet to /root/nltk_data...


True

In [5]:
from nltk.corpus import wordnet

def explore_word_meanings(sentence):
    words = sentence.split()
    for word in words:
        synsets = wordnet.synsets(word)
        if synsets:
            print(f"\nWord: {word}")
            print("Definitions:")
            for synset in synsets:
                print(f"- {synset.definition()}")
            print("Synonyms:")
            synonyms = set()
            for synset in synsets:
                synonyms.update(synset.lemma_names())
            print(f"- {', '.join(synonyms)}")

sentences = [
    "The cat sat on the mat.",
    "The dog barked loudly.",
    "She played the piano beautifully.",
]

for i, sentence in enumerate(sentences):
    print(f"\nSentence {i + 1}: {sentence}")
    explore_word_meanings(sentence)


Sentence 1: The cat sat on the mat.

Word: cat
Definitions:
- feline mammal usually having thick soft fur and no ability to roar: domestic cats; wildcats
- an informal term for a youth or man
- a spiteful woman gossip
- the leaves of the shrub Catha edulis which are chewed like tobacco or used to make tea; has the effect of a euphoric stimulant
- a whip with nine knotted cords
- a large tracked vehicle that is propelled by two endless metal belts; frequently used for moving earth in construction and farm work
- any of several large cats typically able to roar and living in the wild
- a method of examining body organs by scanning them with X rays and using a computer to construct a series of cross-sectional scans along a single axis
- beat with a cat-o'-nine-tails
- eject the contents of the stomach through the mouth
Synonyms:
- cast, Arabian_tea, computed_axial_tomography, spue, spew, quat, regurgitate, African_tea, barf, CAT, hombre, guy, bozo, CT, Caterpillar, vomit, vomit_up, chuck

***24.First-Order-Predicate_calculus***

In [15]:
from pyparsing import Word, Literal, Keyword, infixNotation, opAssoc


and_ = Keyword("and")
or_ = Keyword("or")
not_ = Keyword("not")

variable = Word("xy", exact=1)

expr = infixNotation(variable, [
    (not_, 1, opAssoc.RIGHT),
    (and_, 2, opAssoc.LEFT),
    (or_, 2, opAssoc.LEFT),
])

expressions = [
    "x and y",
    "x or (not y)",
    "x and (y or (not x))",
]

for i, expression in enumerate(expressions):
    print(f"\nExpression {i + 1}: {expression}")
    try:
        result = expr.parseString(expression)[0]
        print(f"Result: {result}")
    except Exception as e:
        print(f"Error: {e}")


Expression 1: x and y
Result: ['x', 'and', 'y']

Expression 2: x or (not y)
Result: ['x', 'or', ['not', 'y']]

Expression 3: x and (y or (not x))
Result: ['x', 'and', ['y', 'or', ['not', 'x']]]


***25.Regex***

In [7]:
import re

def perform_regex_operations(text, patterns):
    print("Original Text:")
    print(text)

    for pattern in patterns:
        print("\nRegex Pattern:", pattern)
        matches = re.findall(pattern, text)
        if matches:
            print("Matches:", matches)
        else:
            print("No matches found.")


text = "Regular expressions are a powerful tool for pattern matching in text. They provide a flexible way to search and manipulate strings."


regex_patterns = [
    r"regex",
    r"4G\*",
    r"powerful|flexible|manipulate",
]

perform_regex_operations(text, regex_patterns)

Original Text:
Regular expressions are a powerful tool for pattern matching in text. They provide a flexible way to search and manipulate strings.

Regex Pattern: regex
No matches found.

Regex Pattern: 4G\*
No matches found.

Regex Pattern: powerful|flexible|manipulate
Matches: ['powerful', 'flexible', 'manipulate']


***26.Basic information retrival using TF-IDF***

In [8]:
from sklearn.feature_extraction.text import TfidfVectorizer

sentences = [
    "Climate change is a pressing global issue that requires immediate action.",
    "Renewable energy sources, such as solar and wind power, are essential for reducing carbon emissions.",
    "Greenhouse gases, like carbon dioxide and methane, contribute to global warming.",
    "The Paris Agreement is an international treaty aimed at addressing climate change.",
    "Sustainability and environmental conservation are crucial for the future of our planet."
]

query = "Climate change action"

sentences.append(query)

vectorizer = TfidfVectorizer()

tfidf_matrix = vectorizer.fit_transform(sentences)

query_tfidf = tfidf_matrix[-1]

similarity_scores = (tfidf_matrix[:-1] @ query_tfidf.T).toarray().flatten()

sorted_indices = similarity_scores.argsort()[::-1]

print("Retrieved documents (in order of relevance):")
for idx in sorted_indices:
    print(f"Document {idx + 1}: {sentences[idx]}")

Retrieved documents (in order of relevance):
Document 1: Climate change is a pressing global issue that requires immediate action.
Document 4: The Paris Agreement is an international treaty aimed at addressing climate change.
Document 5: Sustainability and environmental conservation are crucial for the future of our planet.
Document 3: Greenhouse gases, like carbon dioxide and methane, contribute to global warming.
Document 2: Renewable energy sources, such as solar and wind power, are essential for reducing carbon emissions.


***27.syntax_driven_semantic_analysis_using_nounphrases***

In [38]:
import nltk
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.tag import pos_tag
from nltk.chunk import ne_chunk

nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('maxent_ne_chunker')
nltk.download('words')

sentences = [
    "The quick brown fox jumps over the lazy dog.",
    "She is an excellent chef and loves to cook delicious meals.",
    "The Eiffel Tower in Paris is a famous landmark."
]

def extract_noun_phrases_meanings(sentences):
    for sentence in sentences:
        words = word_tokenize(sentence)

        pos_tags = pos_tag(words)

        chunks = ne_chunk(pos_tags)
        noun_phrases = []
        meanings = []
        for chunk in chunks:
            if hasattr(chunk, 'label') and chunk.label() == 'NP':
                noun_phrase = ' '.join([token for token, pos in chunk.leaves()])
                noun_phrases.append(noun_phrase)

                if 'Eiffel Tower' in noun_phrase:
                    meanings.append("A famous landmark in Paris, France.")
                elif 'chef' in noun_phrase:
                    meanings.append("Someone skilled in cooking.")
                else:
                    meanings.append("Meaning not defined.")

        print(f"Sentence: {sentence}")
        print("Extracted Noun Phrases:")
        for i, noun_phrase in enumerate(noun_phrases):
            print(f"{i + 1}. {noun_phrase}")
            print(f"   Meaning: {meanings[i]}")
        print("")

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package maxent_ne_chunker is already up-to-date!
[nltk_data] Downloading package words to /root/nltk_data...
[nltk_data]   Package words is already up-to-date!


***28.PCFG_using_PCNF_grammar***

In [10]:
import random

grammar = {
    "S": ["NP VP", 1.0],
    "NP": ["Det N", 0.5, "NP PP", 0.4, "'the'", 0.1],
    "VP": ["V NP", 0.7, "VP PP", 0.3],
    "PP": ["P NP", 1.0],
    "Det": ["'the'", 0.7, "'a'", 0.3],
    "N": ["'fox'", 0.4, "'dog'", 0.3, "'cat'", 0.2, "'bird'", 0.1],
    "V": ["'jumps'", 0.5, "'runs'", 0.3, "'sits'", 0.2],
    "P": ["'over'", 0.6, "'on'", 0.4]
}

def parse_sentence(sentence):
    parse_tree = []
    tokens = sentence.split()

    def expand_symbol(symbol):
        if symbol in grammar:
            options = grammar[symbol]
            choice = random.choices(options)[0]

            if type(choice) is list:
                prob = choice[1]
                if random.random() < prob:
                    parse_tree.append((symbol, choice[0]))
                    for sub_symbol in choice[0]:
                        expand_symbol(sub_symbol)
                else:
                    expand_symbol(symbol)
            else:
                parse_tree.append((symbol, choice))
        else:
            parse_tree.append((symbol, tokens.pop(0)))

    expand_symbol("S")

    return parse_tree

sentence = "The quick brown fox jumps over the lazy dog."
parsed_tree = parse_sentence(sentence)
print("Parse tree:", parsed_tree)

Parse tree: [('S', 1.0)]


***29.Recognize_dialogacts***

In [11]:
import spacy

def recognize_dialog_acts(dialog):
    nlp = spacy.load("en_core_web_sm")
    dialog_acts = []

    for utterance in dialog:
        doc = nlp(utterance)
        if any(token.text.lower() in ["hello", "hi", "hey"] for token in doc):
            dialog_acts.append(("Greeting", utterance))
        elif any(token.text.lower() in ["how", "doing"] for token in doc) and "you" in [token.text.lower() for token in doc]:
            dialog_acts.append(("Inquiry - Well-being", utterance))
        elif any(token.text.lower() in ["thank", "thanks", "thank you"] for token in doc):
            dialog_acts.append(("Expression of Thanks", utterance))
        elif any(token.text.lower() in ["please", "could", "can"] for token in doc) and "you" in [token.text.lower() for token in doc]:
            dialog_acts.append(("Request", utterance))
        elif any(token.text.lower() in ["sure", "here", "go"] for token in doc):
            dialog_acts.append(("Acknowledgment/Confirmation", utterance))
        elif any(token.text.lower() in ["time", "meeting", "tomorrow"] for token in doc):
            dialog_acts.append(("Inquiry - Meeting Time", utterance))
        else:
            dialog_acts.append(("Other", utterance))

    return dialog_acts

dialog = [
    "Hello! How are you today?",
    "I'm doing well, thank you. How about you?",
    "Can you please pass the salt?",
    "Sure, here you go.",
    "What time is the meeting tomorrow?",
    "The meeting is at 2:00 PM.",
]

recognized_acts = recognize_dialog_acts(dialog)

for act, utterance in recognized_acts:
    print(f"{act}: {utterance}")


Greeting: Hello! How are you today?
Inquiry - Well-being: I'm doing well, thank you. How about you?
Request: Can you please pass the salt?
Acknowledgment/Confirmation: Sure, here you go.
Inquiry - Meeting Time: What time is the meeting tomorrow?
Inquiry - Meeting Time: The meeting is at 2:00 PM.


***30.Hugging_face_transformers***

In [26]:
!pip install transformers==4.2.1
!pip install sentencepiece==0.1.95

Collecting transformers==4.2.1
  Downloading transformers-4.2.1-py3-none-any.whl (1.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m6.8 MB/s[0m eta [36m0:00:00[0m
Collecting sacremoses (from transformers==4.2.1)
  Downloading sacremoses-0.1.1-py3-none-any.whl (897 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m897.5/897.5 kB[0m [31m11.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting tokenizers==0.9.4 (from transformers==4.2.1)
  Downloading tokenizers-0.9.4.tar.gz (184 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m184.2/184.2 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: tokenizers
  [1;31merror[0m: [1msubprocess-exited-with-error[0m
  
  [31m×[0m [32mBuilding wheel for tokeniz

In [25]:
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
translator = pipeline("translation_en_to_de")
text = "Hello world! Hugging Face is the best NLP tool."
translation = translator(text)

print(translation)
tokenizer = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-nl")
model = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-en-nl")
text = "Hello my friends! How are you doing today?"
tokenized_text = tokenizer(text, return_tensors="pt")
print(tokenized_text)

{'input_ids': tensor([[ 147, 2105,  121, 2108,   54,  457,   56,   23,  728, 1042,   17,    0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}
translation = model.generate(**tokenized_text)
translated_text = tokenizer.batch_decode(translation, skip_special_tokens=True)[0]
print(translated_text)


No model was supplied, defaulted to t5-base and revision 686f1db (https://huggingface.co/t5-base).
Using a pipeline without specifying a model name and revision in production is not recommended.


[{'translation_text': 'Hello world, Hugging Face ist das beste NLP-Tool.'}]


ValueError: ignored

***31.Extract_Entities***

In [27]:
import nltk
from nltk import word_tokenize, pos_tag, ne_chunk

nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('maxent_ne_chunker')
nltk.download('words')

def extract_named_entities(text):
    words = word_tokenize(text)
    pos_tags = pos_tag(words)
    named_entities = ne_chunk(pos_tags)
    return named_entities

def resolve_references(named_entities):
    resolved_text = ""
    for chunk in named_entities:
        if isinstance(chunk, nltk.Tree):
            entity_type = chunk.label()
            entity_text = ' '.join(c[0] for c in chunk.leaves())

            if entity_type == 'GPE':
                resolved_text += f"[Location: {entity_text}] "
            elif entity_type == 'PERSON':
                resolved_text += f"[Person: {entity_text}] "
            else:
                resolved_text += f"[{entity_type}: {entity_text}] "
        else:
            resolved_text += chunk[0] + ' '

    return resolved_text.strip()


input_text = "Harvard University, located in Cambridge, Massachusetts, is a prestigious institution."

named_entities_result = extract_named_entities(input_text)

resolved_text = resolve_references(named_entities_result)

print("Original Text:")
print(input_text)
print("\nResolved Text:")
print(resolved_text)


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package maxent_ne_chunker is already up-to-date!
[nltk_data] Downloading package words to /root/nltk_data...
[nltk_data]   Package words is already up-to-date!


Original Text:
Harvard University, located in Cambridge, Massachusetts, is a prestigious institution.

Resolved Text:
[ORGANIZATION: Harvard] [Location: University] , located in [Location: Cambridge] , [Location: Massachusetts] , is a prestigious institution .


***32.providing_coherence_for_sample***

In [28]:
import re

sample_text = "Once upon a time, there was a young boy named Peter."

def check_coherence(text):
    sentences = re.split(r'(?<=[.!?]) +', text)
    coherence_score = 0

    for sentence in sentences:
        if sentence[0].isupper() and sentence[-1] in ['.', '!', '?']:
            coherence_score += 1

    normalized_score = coherence_score / max(len(sentences), 1)

    return normalized_score

coherence = check_coherence(sample_text)
print(f"The coherence score of the text is: {coherence}")

The coherence score of the text is: 1.0


***33.Recognize_dialog_acts***

In [29]:
import re

def recognize_dialog_acts(dialog):
    dialog_acts = []

    for utterance in dialog:
        if re.search(r'\b(?:hi|hello|good morning|good afternoon|good evening)\b', utterance, re.IGNORECASE):
            dialog_acts.append({"utterance": utterance, "act": "greeting"})
        elif re.search(r'\b(?:how\'s the weather today)\b', utterance, re.IGNORECASE):
            dialog_acts.append({"utterance": utterance, "act": "weather inquiry"})
        elif re.search(r'\b(?:\w+ it\'s going to be \w+ and \w+)\b', utterance, re.IGNORECASE):
            dialog_acts.append({"utterance": utterance, "act": "weather report"})
        elif re.search(r'\b(?:please send me the \w+ by \d+\s?(?:am|pm)?)\b', utterance, re.IGNORECASE):
            dialog_acts.append({"utterance": utterance, "act": "request"})
        elif re.search(r'\b(?:of course|sure|yes|okay)\b', utterance, re.IGNORECASE):
            dialog_acts.append({"utterance": utterance, "act": "confirmation"})
        elif re.search(r'\b(?:do you know where(?: the)? nearest (?:\w+ )?post office is)\b', utterance, re.IGNORECASE):
            dialog_acts.append({"utterance": utterance, "act": "location inquiry"})
        elif re.search(r'\b(?:the post office is \w+ blocks down the street)\b', utterance, re.IGNORECASE):
            dialog_acts.append({"utterance": utterance, "act": "location information"})
        else:
            dialog_acts.append({"utterance": utterance, "act": "unclassified"})

    return dialog_acts

dialog = [
    "Good morning! How's the weather today?",
    "I heard it's going to be sunny and warm.",
    "Could you please send me the report by 3 PM?",
    "Of course, I'll send it over before the deadline.",
    "Do you know where the nearest post office is?",
    "The post office is two blocks down the street."
]


recognized_acts = recognize_dialog_acts(dialog)

for act in recognized_acts:
    print(f"Dialog Act: {act['act']}\nUtterance: {act['utterance']}\n")

Dialog Act: greeting
Utterance: Good morning! How's the weather today?

Dialog Act: weather report
Utterance: I heard it's going to be sunny and warm.

Dialog Act: request
Utterance: Could you please send me the report by 3 PM?

Dialog Act: confirmation
Utterance: Of course, I'll send it over before the deadline.

Dialog Act: location inquiry
Utterance: Do you know where the nearest post office is?

Dialog Act: location information
Utterance: The post office is two blocks down the street.



***34.GPT-3_model_text_based***

In [30]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer


model_name = "distilgpt2"
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)


prompt_text = "Once upon a time,"


input_ids = tokenizer.encode(prompt_text, return_tensors='pt')

output = model.generate(input_ids, max_length=100, num_return_sequences=1, temperature=0.7)

generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
print("Generated Text:\n", generated_text)

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/762 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/353M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Generated Text:
 Once upon a time, the world was a little more like a place where you could go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go


***35.spaCY_or_NLTK_Nameed_entity_recognition***

In [31]:
!pip install spacy



In [32]:
import spacy

def perform_ner(text):
    nlp = spacy.load("en_core_web_sm")

    doc = nlp(text)


    named_entities = [(ent.text, ent.label_) for ent in doc.ents]

    return named_entities


input_text = "Microsoft Corporation is headquartered in Redmond, Washington."

ner_results = perform_ner(input_text)

print("Named Entities:")
for entity, label in ner_results:
    print(f"{entity} - {label}")


Named Entities:
Microsoft Corporation - ORG
Redmond - GPE
Washington - GPE


***36.Extract_regexp***

In [33]:
import re

def extract_words_with_pattern(text, pattern):
    matches = re.findall(pattern, text)
    return matches

input_sentence = "The quick brown fox jumps over the lazy dog. The cat is also agile."

pattern = r'\b\w{3}\b'

result = extract_words_with_pattern(input_sentence, pattern)

print("Input Sentence:", input_sentence)
print("Three-letter words:", result)

Input Sentence: The quick brown fox jumps over the lazy dog. The cat is also agile.
Three-letter words: ['The', 'fox', 'the', 'dog', 'The', 'cat']


***37.MORPHOLOGICAL_ANALYSIS***

In [34]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer

nltk.download('punkt')
nltk.download('wordnet')

def perform_lemmatization(sentence):
    tokens = word_tokenize(sentence)
    lemmatizer = WordNetLemmatizer()
    lemmatized_words = [lemmatizer.lemmatize(word) for word in tokens]
    return ' '.join(lemmatized_words)

sentence1 = "The quick brown foxes jumped over the lazy dogs."
sentence2 = "I am running in the park with my friends."

lemmatized_sentence1 = perform_lemmatization(sentence1)
lemmatized_sentence2 = perform_lemmatization(sentence2)

print("Original Sentence 1:", sentence1)
print("Lemmatized Sentence 1:", lemmatized_sentence1)
print("\nOriginal Sentence 2:", sentence2)
print("Lemmatized Sentence 2:", lemmatized_sentence2)


Original Sentence 1: The quick brown foxes jumped over the lazy dogs.
Lemmatized Sentence 1: The quick brown fox jumped over the lazy dog .

Original Sentence 2: I am running in the park with my friends.
Lemmatized Sentence 2: I am running in the park with my friend .


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


***38.PORTER_STEMMER_ALGORITHM***

In [40]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer

nltk.download('punkt')

def perform_stemming(sentence):
    tokens = word_tokenize(sentence)
    stemmer = PorterStemmer()
    stemmed_words = [stemmer.stem(word) for word in tokens]
    return ' '.join(stemmed_words)


sentence1 = "Coding with Python is very enjoyable."
sentence2 = "I had a delicious meal at the restaurant."

stemmed_sentence1 = perform_stemming(sentence1)
stemmed_sentence2 = perform_stemming(sentence2)

print("Original Sentence 1:", sentence1)
print("Stemmed Sentence 1:", stemmed_sentence1)
print("\nOriginal Sentence 2:", sentence2)
print("Stemmed Sentence 2:", stemmed_sentence2)


Original Sentence 1: Coding with Python is very enjoyable.
Stemmed Sentence 1: code with python is veri enjoy .

Original Sentence 2: I had a delicious meal at the restaurant.
Stemmed Sentence 2: i had a delici meal at the restaur .


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


***39.FINITE_STATE_AUTOMATION_DATE***

In [41]:
class DateFSM:
    def __init__(self):
        self.states = {'start', 'day', 'separator1', 'month', 'separator2', 'year', 'accept'}
        self.transitions = {
            'start': {'0': 'day', '1-3': 'day'},
            'day': {'0-9': 'day', '/': 'separator1'},
            'separator1': {'/': 'month'},
            'month': {'0': 'month', '1': 'month', '2': 'month', '/': 'separator2'},
            'separator2': {'/': 'year'},
            'year': {'0-9': 'year', 'accept': 'accept'},
        }

    def recognize_date(self, date_str):
        current_state = 'start'
        for char in date_str:
            if char.isdigit():
                char_type = '0-9'
            else:
                char_type = char

            if char_type in self.transitions[current_state]:
                current_state = self.transitions[current_state][char_type]
            else:
                return False

        return current_state == 'accept'

date_fsm = DateFSM()
date1 = "31/12/2022"
date2 = "15/05/1985"
date3 = "02/29/2021"

print(f"{date1} is valid: {date_fsm.recognize_date(date1)}")
print(f"{date2} is valid: {date_fsm.recognize_date(date2)}")
print(f"{date3} is valid: {date_fsm.recognize_date(date3)}")


31/12/2022 is valid: False
15/05/1985 is valid: False
02/29/2021 is valid: False


***40.FOPC_logical_expression***

In [36]:
class FOPCParser:
    def __init__(self, variables):
        self.variables = variables

    def parse_expression(self, expression):
        tokens = expression.split()
        stack = []

        for token in tokens:
            if token in self.variables:
                stack.append(self.variables[token])
            elif token == 'and':
                if len(stack) >= 2:
                    operand2 = stack.pop()
                    operand1 = stack.pop()
                    stack.append(operand1 and operand2)
                else:
                    return None
            elif token == 'or':
                if len(stack) >= 2:
                    operand2 = stack.pop()
                    operand1 = stack.pop()
                    stack.append(operand1 or operand2)
                else:
                    return None
            elif token == 'not':
                if stack:
                    operand = stack.pop()
                    stack.append(not operand)
                else:
                    return None
            elif token.startswith('('):
                stack.append(token[1:])
            elif token.endswith(')'):
                subexpression = [stack.pop()]
                while stack and not subexpression[-1].startswith('('):
                    subexpression.append(stack.pop())
                if not stack:
                    return None
                subexpression.reverse()
                subexpression = ' '.join(subexpression)
                stack.append(self.parse_expression(subexpression))
            else:
                stack.append(token.lower() == 'true')

        return stack[0] if stack else None


variables = {'p': True, 'q': True, 'r': False}


expressions = ["p and q", "p or r", "not p", "q and (r or p)", "((p and q) or r", "p and (q or r))"]

parser = FOPCParser(variables)
for expression in expressions:
    result = parser.parse_expression(expression)
    if result is not None:
        print(f"{expression}: {result}")
    else:
        print(f"{expression}: Invalid expression")

p and q: Invalid expression
p or r: Invalid expression
not p: Invalid expression
q and (r or p): Invalid expression
((p and q) or r: Invalid expression
p and (q or r)): Invalid expression
