# Advanced Rule based NLP

## Example 1 - Sentiment Analysis

In [1]:
import re

# Sample rules for sentiment analysis
positive_words = ['good', 'great', 'awesome', 'happy', 'delightful', 'excellent']
negative_words = ['bad', 'terrible', 'awful', 'sad', 'disappointing', 'poor']

def classify_sentiment(text):
    """
    Classifies the sentiment of a given text using predefined positive and negative keywords.
    """
    # Convert text to lowercase to ensure case insensitivity
    text = text.lower()

    # Count occurrences of each positive and negative word
    positive_count = sum(len(re.findall(r'\b' + word + r'\b', text)) for word in positive_words)
    negative_count = sum(len(re.findall(r'\b' + word + r'\b', text)) for word in negative_words)

    # Determine overall sentiment
    if positive_count > negative_count:
        return "Positive"
    elif negative_count > positive_count:
        return "Negative"
    else:
        return "Neutral"

# Test cases
texts = [
    "This product is great! I had an awesome experience.",
    "What a terrible event, it was such a disappointment!",
    "I think this movie was just fine, neither good nor bad."
]

for text in texts:
    sentiment = classify_sentiment(text)
    print(f"Text: {text}\nSentiment: {sentiment}\n")


Text: This product is great! I had an awesome experience.
Sentiment: Positive

Text: What a terrible event, it was such a disappointment!
Sentiment: Negative

Text: I think this movie was just fine, neither good nor bad.
Sentiment: Neutral



## Example 2 - Simple Keyword Search

It uses simple string matching. It is very straightforward but less flexible as it looks for exact keyword matches within the text.


In [4]:
documents = [
    "Python is a great programming language for data science.",
    "Machine learning can be applied in healthcare.",
    "Natural language processing is a subset of machine learning.",
    "Deep learning is used for image recognition."
]

def search_documents(documents, keyword):
    """
    Search for a keyword in a list of documents and return all documents containing that keyword.
    """
    keyword = keyword.lower()  # convert keyword to lowercase for case-insensitive search
    return [doc for doc in documents if keyword in doc.lower()]

# Example keyword search
keyword = 'machine learning'
found_documents = search_documents(documents, keyword)

print("Documents containing the keyword:")
for doc in found_documents:
    print(doc)


Documents containing the keyword:
Machine learning can be applied in healthcare.
Natural language processing is a subset of machine learning.


## Example 3 - Robust Keyword Search using Regular Expression

This enhances Keyword Search by using regular expressions, which allows for more complex matching conditions such as word boundaries, variations of a word, or even simple synonyms and related terms

In [5]:
import re

documents = [
    "The Python ecosystem is evolving.",
    "Machine learning and deep learning are parts of artificial intelligence.",
    "The growth of data science applications is phenomenal.",
    "There are many programming languages, but Python is very popular in data science."
]

def regex_search_documents(documents, search_pattern):
    """
    Search for a regex pattern in a list of documents and return all documents containing the pattern.
    """
    pattern = re.compile(search_pattern, re.IGNORECASE)  # Compile regex pattern with case-insensitive flag
    return [doc for doc in documents if pattern.search(doc)]

# Example regex search to find words like 'Python', 'pythonic', etc.
search_pattern = r'\bpython\b'  # \b is a word boundary
found_documents = regex_search_documents(documents, search_pattern)

print("Documents containing the pattern:")
for doc in found_documents:
    print(doc)


Documents containing the pattern:
The Python ecosystem is evolving.
There are many programming languages, but Python is very popular in data science.
