<a href="https://colab.research.google.com/github/kamini8888/DSA0206-Computer-Vision/blob/main/NLP_programs.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import re
# Text to search for patterns
text = "Hello, this is a sample text with some patterns like abc and abcd."
# Define a regular expression pattern
pattern = r'\b\w*ab\w*\b'
# Search for the pattern in the text
matches = re.findall(pattern, text)
# Print the matches
print("Matches:", matches)

Matches: ['abc', 'abcd']


In [None]:
class FSA:
    def __init__(self):
        self.state = 0

    def transition(self, char):
        if self.state == 0 and char == 'a':
            self.state = 1
        elif self.state == 1 and char == 'b':
            self.state = 2
        else:
            self.state = 0

    def is_accepted(self):
        return self.state == 2

# Test the FSA
def test_fsa(input_str):
    automaton = FSA()
    for char in input_str:
        automaton.transition(char)
    return automaton.is_accepted()

# Example usage
test_string = "some textab"
result = test_fsa(test_string)
print(f"Does '{test_string}' end with 'ab'? {result}")

Does 'some textab' end with 'ab'? True


In [None]:
class PluralizationFSM:
    def __init__(self):
        self.state = 0  # Initial state

    def transition(self, word):
        if self.state == 0 and word.endswith('s'):
            self.state = 1  # Noun ends with 's', no change
        elif self.state == 0 and word.endswith('y'):
            self.state = 2  # Noun ends with 'y,' switch to 'ies'
        elif self.state == 0:
            self.state = 3  # Noun ends with another letter, add 's'

    def pluralize(self, word):
        self.state = 0  # Reset the state
        self.transition(word)  # Transition based on rules
        if self.state == 1:
            return word  # No change
        elif self.state == 2:
            return word[:-1] + 'ies'  # Change 'y' to 'ies'
        elif self.state == 3:
            return word + 's'  # Add 's'

# Example usage
fsm = PluralizationFSM()
nouns = ['cat', 'dog', 'bus', 'dress', 'class']
pluralized_nouns = [fsm.pluralize(noun) for noun in nouns]

for singular, plural in zip(nouns, pluralized_nouns):
    print(f"{singular} (singular) -> {plural} (plural)")

cat (singular) -> cats (plural)
dog (singular) -> dogs (plural)
bus (singular) -> bus (plural)
dress (singular) -> dress (plural)
class (singular) -> class (plural)


In [None]:
class MorphologicalFSM:
    def __init__(self):
        self.state = 0

    def parse(self, word):
        if word.endswith('s'):
            self.state = 1
        else:
            self.state = 0

    def generate_plural(self, word):
        if self.state == 0:
            return word + 's'
        else:
            return word  # Already plural

# Test the MorphologicalFSM
def test_morphological_fsm(word):
    fsm = MorphologicalFSM()
    fsm.parse(word)
    plural_form = fsm.generate_plural(word)
    return plural_form

# Example usage
word_to_pluralize = "cat"
plural_result = test_morphological_fsm(word_to_pluralize)
print(f"The plural form of '{word_to_pluralize}' is '{plural_result}'.")


The plural form of 'cat' is 'cats'.


In [None]:
from nltk.stem import PorterStemmer

# Sample list of words for stemming
words_to_stem = ["running", "jumps", "happily", "cats", "easily"]

# Create a Porter Stemmer
porter_stemmer = PorterStemmer()

# Perform stemming on the list of words
stemmed_words = [porter_stemmer.stem(word) for word in words_to_stem]

# Print the results
print("Original words:", words_to_stem)
print("Stemmed words:", stemmed_words)

Original words: ['running', 'jumps', 'happily', 'cats', 'easily']
Stemmed words: ['run', 'jump', 'happili', 'cat', 'easili']


In [None]:
import random

def build_bigram_model(text):
    bigram_model = {}
    words = text.split()

    for i in range(len(words) - 1):
        current_word = words[i]
        next_word = words[i + 1]

        if current_word in bigram_model:
            bigram_model[current_word].append(next_word)
        else:
            bigram_model[current_word] = [next_word]

    return bigram_model

def generate_text(bigram_model, seed_word, length=10):
    current_word = seed_word
    generated_text = [current_word]

    for _ in range(length - 1):
        if current_word in bigram_model:
            next_word = random.choice(bigram_model[current_word])
            generated_text.append(next_word)
            current_word = next_word
        else:
            break

    return ' '.join(generated_text)

# Example usage
sample_text = "This is a sample text for demonstrating a bigram model in Python. This model generates text based on the probability of the next word given the current word."
bigram_model = build_bigram_model(sample_text)
seed_word = "This"
generated_text = generate_text(bigram_model, seed_word, length=15)

print("Original Text:")
print(sample_text)
print("\nGenerated Text:")
print(generated_text)

Original Text:
This is a sample text for demonstrating a bigram model in Python. This model generates text based on the probability of the next word given the current word.

Generated Text:
This model in Python. This is a bigram model in Python. This model generates text


In [None]:
import nltk
from nltk.tokenize import word_tokenize

# Download NLTK data for part-of-speech tagging
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

def pos_tagging(text):
    # Tokenize the text
    words = word_tokenize(text)

    # Perform part-of-speech tagging
    pos_tags = nltk.pos_tag(words)

    return pos_tags

# Example text
sample_text = "NLTK is a powerful library for natural language processing."

# Perform part-of-speech tagging
pos_tags_result = pos_tagging(sample_text)

# Print the result
print("Original Text:")
print(sample_text)
print("\nPart-of-Speech Tags:")
print(pos_tags_result)

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


Original Text:
NLTK is a powerful library for natural language processing.

Part-of-Speech Tags:
[('NLTK', 'NNP'), ('is', 'VBZ'), ('a', 'DT'), ('powerful', 'JJ'), ('library', 'NN'), ('for', 'IN'), ('natural', 'JJ'), ('language', 'NN'), ('processing', 'NN'), ('.', '.')]


In [None]:
import nltk
import random

# Download NLTK data for part-of-speech tagging
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

def train_stochastic_pos_tagger(corpus):
    # Tokenize and tag the provided corpus
    tagged_corpus = [nltk.pos_tag(nltk.word_tokenize(sentence)) for sentence in corpus]

    # Build a basic probabilistic model
    pos_probabilities = {}
    for tagged_sentence in tagged_corpus:
        for word, pos_tag in tagged_sentence:
            if pos_tag not in pos_probabilities:
                pos_probabilities[pos_tag] = {}

            if word not in pos_probabilities[pos_tag]:
                pos_probabilities[pos_tag][word] = 1
            else:
                pos_probabilities[pos_tag][word] += 1

    # Normalize probabilities
    for pos_tag in pos_probabilities:
        total_count = sum(pos_probabilities[pos_tag].values())
        for word in pos_probabilities[pos_tag]:
            pos_probabilities[pos_tag][word] /= total_count

    return pos_probabilities

def stochastic_pos_tagging(sentence, pos_probabilities):
    words = nltk.word_tokenize(sentence)
    pos_tags = []

    for word in words:
        if word in pos_probabilities:
            pos_tag_probabilities = pos_probabilities[word]
            chosen_pos_tag = random.choices(list(pos_tag_probabilities.keys()), weights=list(pos_tag_probabilities.values()))[0]
            pos_tags.append((word, chosen_pos_tag))
        else:
            # If the word is not in the model, assign a default POS tag
            pos_tags.append((word, 'NN'))  # Defaulting to 'NN' (Noun)

    return pos_tags

# Example training corpus
training_corpus = [
    "This is a sample sentence.",
    "POS tagging helps analyze text.",
    "NLTK provides useful tools for NLP.",
]

# Train the stochastic POS tagger
pos_probabilities_model = train_stochastic_pos_tagger(training_corpus)

# Example sentence for tagging
sample_sentence = "This is a test sentence for the stochastic POS tagger."

# Perform stochastic POS tagging
stochastic_pos_tags = stochastic_pos_tagging(sample_sentence, pos_probabilities_model)

# Print the results
print("Original Sentence:")
print(sample_sentence)
print("\nStochastic POS Tags:")
print(stochastic_pos_tags)


Original Sentence:
This is a test sentence for the stochastic POS tagger.

Stochastic POS Tags:
[('This', 'NN'), ('is', 'NN'), ('a', 'NN'), ('test', 'NN'), ('sentence', 'NN'), ('for', 'NN'), ('the', 'NN'), ('stochastic', 'NN'), ('POS', 'NN'), ('tagger', 'NN'), ('.', '.')]


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


In [None]:
import re

def rule_based_pos_tagging(sentence):
    pos_tags = []

    for word in sentence.split():
        if re.match(r'\b(?:is|am|are|was|were)\b', word, re.I):
            pos_tags.append((word, 'VB'))  # Verb (to be)
        elif re.match(r'\b(?:the|a|an)\b', word, re.I):
            pos_tags.append((word, 'DT'))  # Determiner
        elif re.match(r'\b(?:cat|dog|bird)\b', word, re.I):
            pos_tags.append((word, 'NN'))  # Noun
        elif re.match(r'\b(?:quick|brown|lazy)\b', word, re.I):
            pos_tags.append((word, 'JJ'))  # Adjective
        else:
            pos_tags.append((word, 'NN'))  # Default to Noun

    return pos_tags

# Example sentence for tagging
sample_sentence = "The quick brown fox is a lazy dog."

# Perform rule-based POS tagging
rule_based_pos_tags = rule_based_pos_tagging(sample_sentence)

# Print the results
print("Original Sentence:")
print(sample_sentence)
print("\nRule-Based POS Tags:")
print(rule_based_pos_tags)


Original Sentence:
The quick brown fox is a lazy dog.

Rule-Based POS Tags:
[('The', 'DT'), ('quick', 'JJ'), ('brown', 'JJ'), ('fox', 'NN'), ('is', 'VB'), ('a', 'DT'), ('lazy', 'JJ'), ('dog.', 'NN')]


In [None]:
# Define a simple transformation rule
def transform_rule(word, tag):
    if word.endswith('ed'):
        return ('VBD', tag)
    else:
        return (word, tag)

# Apply the transformation rule to a tagged sentence
tagged_sentence = [('The', 'DT'), ('cat', 'NN'), ('sat', 'VBD'), ('on', 'IN'), ('the', 'DT'), ('mat', 'NN')]
transformed_sentence = [(word, transform_rule(word, tag)[0]) for word, tag in tagged_sentence]

# Print the transformed sentence
print(transformed_sentence)


[('The', 'The'), ('cat', 'cat'), ('sat', 'sat'), ('on', 'on'), ('the', 'the'), ('mat', 'mat')]


In [None]:
class SimpleParser:
    def __init__ (self, grammar):
        self.grammar = grammar
    def parse (self, input_string):
        self.input = input_string
        self.index = 0
        self.result = True
        if self.expression ():
            if self.index == len (self.input):
                print (f'Parsing successful for input: {input_string}')
                return
        print (f'Parsing failed for input: {input_string}')
    def expression (self):
        return self.term () and self.expression_tail ()
    def expression_tail (self):
        current_index = self.index
        if self.match ('+'):
            return self.term () and self.expression_tail ()
        self.index = current_index
        return True
    def term (self):
        return self.factor () and self.term_tail ()
    def term_tail (self):
        current_index = self.index
        if self.match ('*'):
            return self.factor () and self.term_tail ()
        self.index = current_index
        return True
    def factor (self):
        if self.match ('('):
            if self.expression () and self.match (')'):
                return True
            return False
        return self.match ('number')
    def match (self, expected):
        if self.index < len (self.input) and (expected == self.input [self.index] or expected == 'number' and self.input [self.index].isdigit ()):
            self.index += 1
            return True
        return False

grammar = {
    'start': 'Expression',
}

parser = SimpleParser (grammar)

parser.parse ('3* (2+1)')
parser.parse ('2+1*3')
parser.parse ('2+ (1*1)')

Parsing failed for input: 3* (2+1)
Parsing successful for input: 2+1*3
Parsing failed for input: 2+ (1*1)


In [None]:
class EarleyParser:
    def __init__(self, grammar):
        self.grammar = grammar
    def parse(self, input_string):
        self.chart = [[] for _ in range(len(input_string) + 1)]
        self.chart[0].append(('start', '', 0))
        for i in range(len(input_string) + 1):
            for state in self.chart[i]:
                self.predictor(state, i)
                if i < len(input_string):
                    self.scanner(state, input_string[i], i)
                else:
                    self.completer(state, i)
        if ('start', self.grammar['start'], 0) in self.chart[len(input_string)]:
            print(f'Parsing failed for input: {input_string}')
        else:
            print(f'Parsing successfull for input: {input_string}')
    def predictor(self, state, index):
        if state[1] in self.grammar:
            for production in self.grammar[state[1]]:
                self.chart[index].append((state[1], production, index))
    def scanner(self, state, token, index):
        if state[1] == '' or state[1][0] != token:
            return
        self.chart[index + 1].append((state[0], state[1][1:], state[2]))
    def completer(self, state, index):
        for st in self.chart[state[2]]:
            if st[1] == '' or st[1][0] != state[0]:
                continue
            self.chart[index].append((st[0], st[1][1:], st[2]))
# Example usage
grammar = {
    'start': 'Expression',
    'Expression': ['Term + Expression', 'Term'],
    'Term': ['Factor * Term', 'Factor'],
    'Factor': ['( Expression )', 'number']
}

parser = EarleyParser(grammar)

# Test the parser
parser.parse('3* (2+1)') # Parsing successful for input: 3* (2+1)
parser.parse('2+1*3') # Parsing successful for input: 2+1*3
parser.parse('2+ (1*3)') # Parsing successful for input: 2+ (1*3)

Parsing successfull for input: 3* (2+1)
Parsing successfull for input: 2+1*3
Parsing successfull for input: 2+ (1*3)


In [None]:
import nltk

def generate_parse_tree(grammar, sentence):
    parser = nltk.ChartParser(grammar)
    trees = parser.parse(sentence.split())
    for tree in trees:
        tree.pretty_print()

# Example usage
cfg = nltk.CFG.fromstring("""
    S -> NP VP
    NP -> Det N
    VP -> V NP
    Det -> 'the' | 'a'
    N -> 'dog' | 'cat'
    V -> 'chased' | 'caught'
""")

generate_parse_tree(cfg, "the dog chased a cat")

              S               
      ________|_____           
     |              VP        
     |         _____|___       
     NP       |         NP    
  ___|___     |      ___|___   
Det      N    V    Det      N 
 |       |    |     |       |  
the     dog chased  a      cat



In [None]:
import nltk
def check_agreement(sentence):
    tagged_words = nltk.pos_tag(nltk.word_tokenize(sentence))
    subjects = [word for word, tag in tagged_words if tag.startswith('N')]
    verbs = [word for word, tag in tagged_words if tag.startswith('V')]
    if subjects and verbs:
        subject_number = 'singular' if tagged_words[0][1].startswith('NNS') else 'singular'
        verb_number = 'singular' if tagged_words[-1][1].startswith('VB') else 'plural'
        if subject_number != verb_number:
            print("Subject-verb agreement error:")
            print(f"Subjects: {subjects} ({subject_number})")
            print(f"Verbs: {verbs} ({verb_number})")
        else:
            print("Subject-verb agreement is correct.")
    else:
        print("Unable to find subjects and verbs in the sentence.")
example_sentence = "The cat catches a dog"
check_agreement(example_sentence)

Subject-verb agreement error:
Subjects: ['cat', 'dog'] (singular)
Verbs: ['catches'] (plural)


In [None]:
import nltk
from nltk import PCFG
from nltk.parse import pchart

# Updated PCFG
pcfg_grammar = PCFG.fromstring("""
    S -> NP VP [1.0]
    VP -> V NP [0.7] | VP PP [0.3]
    PP -> P NP [1.0]
    NP -> Det N [0.2] | NP PP [0.3] | N [0.5]
    Det -> 'the' [0.8] | 'a' [0.2]
    N -> 'dog' [0.4] | 'cat' [0.3] | 'stick' [0.3]
    V -> 'chased' [0.9] | 'saw' [0.1]
    P -> 'with' [0.6] | 'in' [0.4]
""")

# PCFG Parsing
parser = pchart.InsideChartParser(pcfg_grammar)
sentence = "the dog chased the cat with a stick"
words = sentence.split()

trees = parser.parse(words)
for tree in trees:
    tree.pretty_print()

                    S                            
      ______________|_______                      
     |                      VP                   
     |         _____________|___                  
     |        |                 NP               
     |        |          _______|____             
     |        |         |            PP          
     |        |         |        ____|___         
     NP       |         NP      |        NP      
  ___|___     |      ___|___    |     ___|____    
Det      N    V    Det      N   P   Det       N  
 |       |    |     |       |   |    |        |   
the     dog chased the     cat with  a      stick

                    S                            
      ______________|_______                      
     |                      VP                   
     |               _______|________             
     |              VP               PP          
     |         _____|___         ____|___         
     NP       |         NP      |       

In [None]:
import spacy

# Load the SpaCy English model
nlp = spacy.load("en_core_web_sm")

# Example sentence for Named Entity Recognition
sample_text = "Apple Inc. was founded by Steve Jobs in Cupertino. Google is headquartered in Mountain View."

# Perform NER using SpaCy
doc = nlp(sample_text)

# Print named entities
for ent in doc.ents:
    print(f"Entity: {ent.text}, Type: {ent.label_}")

Entity: Apple Inc., Type: ORG
Entity: Steve Jobs, Type: PERSON
Entity: Cupertino, Type: GPE
Entity: Google, Type: ORG
Entity: Mountain View, Type: GPE


In [None]:
import nltk
nltk.download('wordnet')

from nltk.corpus import wordnet

# Example: Retrieve synsets for the word "example"
synsets = wordnet.synsets("example")

# Check if there are any synsets
if not synsets:
    print("No synsets found for the word 'example'.")
else:
    # Print information about each synset
    for synset in synsets:
        print(f"Synset: {synset.name()}")
        print(f"Definition: {synset.definition()}")
        print(f"Examples: {synset.examples()}\n")

[nltk_data] Downloading package wordnet to /root/nltk_data...


Synset: example.n.01
Definition: an item of information that is typical of a class or group
Examples: ['this patient provides a typical example of the syndrome', 'there is an example on page 10']

Synset: model.n.07
Definition: a representative form or pattern
Examples: ['I profited from his example']

Synset: exemplar.n.01
Definition: something to be imitated
Examples: ['an exemplar of success', 'a model of clarity', 'he is the very model of a modern major general']

Synset: example.n.04
Examples: ['they decided to make an example of him']

Synset: case.n.01
Definition: an occurrence of something
Examples: ['it was a case of bad judgment', 'another instance occurred yesterday', 'but there is always the famous example of the Smiths']

Synset: exercise.n.04
Definition: a task performed or problem solved in order to develop skill or understanding
Examples: ['you must work the examples at the end of each chapter in the textbook']



In [None]:
from pyparsing import Word, alphas, OneOrMore, ZeroOrMore, Forward, Group, Suppress

# Define FOPC grammar
identifier = Word(alphas, alphas + "_", asKeyword=True)
variable = Word(alphas.lower(), alphas.lower() + "_", asKeyword=True)
term = variable | identifier
expr = Forward()
arg_list = Suppress("(") + Group(OneOrMore(term + ZeroOrMore(Suppress(",") + term))) + Suppress(")")
expr << (identifier + arg_list) | variable

# Example FOPC expressions
expr1 = "isHuman(john)"
expr2 = "parentOf(john, alice)"
expr3 = "hasColor(apple, red)"

# Parse FOPC expressions
parsed_expr1 = expr.parseString(expr1, parseAll=True)
parsed_expr2 = expr.parseString(expr2, parseAll=True)
parsed_expr3 = expr.parseString(expr3, parseAll=True)

# Print parsed expressions
print(parsed_expr1.asList())
print(parsed_expr2.asList())
print(parsed_expr3.asList())

['isHuman', ['john']]
['parentOf', ['john', 'alice']]
['hasColor', ['apple', 'red']]


In [None]:
from nltk.wsd import lesk
from nltk.tokenize import word_tokenize

# Example sentence
sentence = "I went to the bank to deposit some money."

# Word Sense Disambiguation using Lesk Algorithm
word_to_disambiguate = "bank"
sense = lesk(word_tokenize(sentence), word_to_disambiguate)

# Print the result
print(f"Word: {word_to_disambiguate}, Sense: {sense}")

# Print the definition of the selected sense (if available)
if sense:
    print(f"Sense Definition: {sense.definition()}")

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


Word: bank, Sense: Synset('savings_bank.n.02')
Sense Definition: a container (usually with a slot in the top) for keeping money at home


In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

# Sample documents
documents = [
    "TF-IDF is a text analysis technique widely used in information retrieval.",
    "It evaluates the importance of each word in a document relative to a collection of documents (corpus).",
    "The higher the TF-IDF score, the more important the word is to the document.",
    "Information retrieval systems use TF-IDF to rank documents based on their relevance to a query.",
    "Python is a popular programming language for implementing information retrieval algorithms.",
]

# User query
query = "TF-IDF in information retrieval"

# TF-IDF Vectorizer
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(documents)

# Transform the query using the same vectorizer
query_vector = vectorizer.transform([query])

# Compute cosine similarity between the query and documents
cosine_similarities = linear_kernel(query_vector, tfidf_matrix).flatten()

# Rank documents based on similarity scores
document_ranking = sorted(list(enumerate(cosine_similarities)), key=lambda x: x[1], reverse=True)

# Display the ranked documents
print("Ranked Documents:")
for rank, (doc_index, similarity) in enumerate(document_ranking, 1):
    print(f"Rank {rank}: Document {doc_index + 1} (Similarity: {similarity:.4f})")
    print(documents[doc_index])
    print("\n")

Ranked Documents:
Rank 1: Document 1 (Similarity: 0.5565)
TF-IDF is a text analysis technique widely used in information retrieval.


Rank 2: Document 4 (Similarity: 0.3280)
Information retrieval systems use TF-IDF to rank documents based on their relevance to a query.


Rank 3: Document 5 (Similarity: 0.1986)
Python is a popular programming language for implementing information retrieval algorithms.


Rank 4: Document 3 (Similarity: 0.1187)
The higher the TF-IDF score, the more important the word is to the document.


Rank 5: Document 2 (Similarity: 0.1086)
It evaluates the importance of each word in a document relative to a collection of documents (corpus).




In [None]:
import nltk
nltk.download('punkt')
import spacy
from nltk.corpus import wordnet
from nltk.tokenize import word_tokenize  # Add this line
nltk.download('wordnet')
# Load the SpaCy English model
nlp = spacy.load("en_core_web_sm")

# Example sentence
sentence = "The quick brown fox jumps over the lazy dog."

# Perform syntax-driven semantic analysis
doc = nlp(sentence)
for np in doc.noun_chunks:
    meanings = []
    for word in word_tokenize(np.text):
        synsets = wordnet.synsets(word)
        meanings.extend([synset.definition() for synset in synsets])
    print(f"Noun Phrase: {np.text}, Meanings: {meanings}")

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...


Noun Phrase: The quick brown fox, Meanings: ['any area of the body that is highly sensitive to pain (as the flesh underneath the skin or a fingernail or toenail)', 'accomplished rapidly and without delay', 'hurried and brief', 'moving quickly and lightly', 'apprehending and responding with speed and sensitivity', 'performed with little or no delay', 'easily aroused or excited', 'with little or no delay', 'an orange of low brightness and saturation', 'Scottish botanist who first observed the movement of small particles in fluids now known a Brownian motion (1773-1858)', "abolitionist who was hanged after leading an unsuccessful raid at Harper's Ferry, Virginia (1800-1859)", 'a university in Rhode Island', 'fry in a pan until it changes color', 'make brown in color', 'of a color similar to that of wood or earth', '(of skin) deeply suntanned', 'alert carnivorous mammal with pointed muzzle and ears and a bushy tail; most are predators that do not hunt in packs', 'a shifty deceptive person'

In [None]:
import spacy

# Load the SpaCy English model
nlp = spacy.load("en_core_web_sm")

# Example sentence
sentence = "John has a dog. He loves it."

# Perform reference resolution
doc = nlp(sentence)
for token in doc:
    if token.pos_ == "PRON" and token.text.lower() in ["he", "she", "it"]:
        referent = [t.text for t in doc if t.i == token.i + 1][0]
        print(f"Pronoun: {token.text}, Referent: {referent}")

Pronoun: He, Referent: loves
Pronoun: it, Referent: .


In [None]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Example text
text = ["The quick brown fox.", "Jumps over the lazy dog.", "This is another sentence."]

# Coherence evaluation using cosine similarity
vectorizer = CountVectorizer()
vectorized_text = vectorizer.fit_transform(text)
similarity_matrix = cosine_similarity(vectorized_text)

print("Coherence Matrix:")
print(similarity_matrix)

Coherence Matrix:
[[1.        0.2236068 0.       ]
 [0.2236068 1.        0.       ]
 [0.        0.        1.       ]]


In [None]:
import spacy

# Load the SpaCy conversational AI model
nlp = spacy.load("en_core_web_sm")

# Example dialog
dialog = "User: How's the weather today? Assistant: The weather is sunny and warm."

# Perform dialog act recognition
doc = nlp(dialog)
for sent in doc.sents:
    dialog_act_type = sent._.dialog_act_type if hasattr(sent._, 'dialog_act_type') else "Not recognized"
    print(f"Dialog Act: {dialog_act_type}, Sentence: {sent.text}")

Dialog Act: Not recognized, Sentence: User: How's the weather today?
Dialog Act: Not recognized, Sentence: Assistant:
Dialog Act: Not recognized, Sentence: The weather is sunny and warm.


In [None]:
from transformers import MarianMTModel, MarianTokenizer

# Load English to French translation model and tokenizer
model_name = "Helsinki-NLP/opus-mt-en-fr"
model = MarianMTModel.from_pretrained(model_name)
tokenizer = MarianTokenizer.from_pretrained(model_name)

# Translate English text to French
english_text = "Hello, how are you?"
input_ids = tokenizer.encode(english_text, return_tensors="pt")
output_ids = model.generate(input_ids)
french_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)

print(f"English: {english_text}")
print(f"French: {french_text}")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/1.42k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/301M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/42.0 [00:00<?, ?B/s]

source.spm:   0%|          | 0.00/778k [00:00<?, ?B/s]

target.spm:   0%|          | 0.00/802k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.34M [00:00<?, ?B/s]



English: Hello, how are you?
French: Bonjour, comment allez-vous?


In [None]:
import openai

# Set up your OpenAI API key
openai.api_key = 'your-api-key'

ModuleNotFoundError: No module named 'openai'

In [None]:
!pip install openai==0.28

import openai

# Set your OpenAI API key
api_key = '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'
openai.api_key = api_key

# Define your prompt
prompt = "Once upon a time in a far-off kingdom, there was a brave knight named Sir Lancelot. He embarked on a quest to..."

# Generate text based on the prompt
response = openai.Completion.create(
    prompt=prompt,
    max_tokens=100
)

# Print the generated text
print(response.choices[0].text.strip())



InvalidRequestError: Must provide an 'engine' or 'model' parameter to create a <class 'openai.api_resources.completion.Completion'>

In [None]:
openai.api_key = api_key

# Define your prompt
prompt = "Once upon a time in a far-off kingdom, there was a brave knight named Sir Lancelot. He embarked on a quest to..."

# Generate text based on the prompt
response = openai.Completion.create(
    engine="davinci",
    prompt=prompt,
    max_tokens=100
)

# Print the generated text
print(response.choices[0].text.strip())
print(response.choices[0].text.strip())

InvalidRequestError: The model `davinci` has been deprecated, learn more here: https://platform.openai.com/docs/deprecations

In [None]:
import os
import openai

# Set your OpenAI API key as an environment variable
os.environ["OPENAI_API_KEY"] = "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"

def generate_text(prompt):
    # Create an OpenAI client using your API key
    openai.api_key = os.environ['OPENAI_API_KEY']

    # Generate text using the GPT-3 model
    response = openai.Completion.create(
        engine="text-davinci-002",  # Choose the appropriate engine
        prompt=prompt,
        max_tokens=1000,
        temperature=0.7,
    )

    # Extract the generated text from the response
    generated_text = response["choices"][0]["text"]

    return generated_text

prompt = "Write a haiku about a sunset."
generated_text = generate_text(prompt)
print(generated_text)

InvalidRequestError: The model `text-davinci-002` has been deprecated, learn more here: https://platform.openai.com/docs/deprecations

In [None]:
import openai
openai.api_key = "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"



In [None]:
prompt = "Once upon a time, in a faraway kingdom, there was a brave knight"


In [None]:
models = ["davinci", "curie", "babbage"]

# Generate text with each model
for model in models:
    print(f"Generating text with model: {model}")
    response = openai.Completion.create(
      engine=model,
      prompt=prompt,
      temperature=0.7,
      max_tokens=100
    )
    print(response.choices[0].text.strip())
    print("----------------------------------------")

Generating text with model: davinci


InvalidRequestError: The model `davinci` has been deprecated, learn more here: https://platform.openai.com/docs/deprecations