## <center>Blackboard Architecture Demonstration (Sentiment Analysis)</center>
![blackboard architecture](blackboard_architecture.png)

In [2]:
import os
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
import pandas as pd
import re

In [3]:
#nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/rupesh/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [4]:
class Blackboard:
    def __init__(self):
        self.data = {
            'reviews': [],
            'cleaned_reviews': [],
            'rule_based_scores': [],
            'ml_based_scores': [],
            'final_sentiments': []
        }

    def update(self, key, value):
        if key in self.data and isinstance(self.data[key], list):
            self.data[key].extend(value)
        else:
            self.data[key] = value

    def read(self, key):
        return self.data.get(key)


In [5]:
class KnowledgeSource:
    def __init__(self, blackboard):
        self.blackboard = blackboard

    def act(self):
        raise NotImplementedError

In [6]:
# Basic Text Processing KS
class TextProcessor(KnowledgeSource):
    def act(self):
        reviews = self.blackboard.read('reviews')
        cleaned_reviews = [self.clean_text(review) for review in reviews]
        self.blackboard.update('cleaned_reviews', cleaned_reviews)

    def clean_text(self, text):
        text = re.sub(r'<.*?>', ' ', text)
        text = re.sub(r'[^a-zA-Z\s]', ' ', text)
        text = text.lower().strip()
        return text

# Rule-Based Sentiment Analysis KS
class RuleBasedSentimentAnalysis(KnowledgeSource):
    def __init__(self, blackboard):
        super().__init__(blackboard)
        self.analyzer = SentimentIntensityAnalyzer()

    def act(self):
        cleaned_reviews = self.blackboard.read('cleaned_reviews')
        scores = [self.analyzer.polarity_scores(review)['compound'] for review in cleaned_reviews]
        sentiments = ['positive' if score >= 0 else 'negative' for score in scores]
        self.blackboard.update('rule_based_scores', sentiments)

# Machine Learning Sentiment Analysis KS
class MachineLearningSentimentAnalysis(KnowledgeSource):
    def __init__(self, blackboard):
        super().__init__(blackboard)
        self.vectorizer = TfidfVectorizer(max_features=5000)
        self.model = LogisticRegression()

    def train(self, reviews, labels):
        vectors = self.vectorizer.fit_transform(reviews)
        self.model.fit(vectors, labels)

    def act(self):
        cleaned_reviews = self.blackboard.read('cleaned_reviews')
        vectors = self.vectorizer.transform(cleaned_reviews)
        sentiments = self.model.predict(vectors)
        self.blackboard.update('ml_based_scores', sentiments)

In [7]:
class ControlComponent:
    def __init__(self, blackboard, knowledge_sources):
        self.blackboard = blackboard
        self.knowledge_sources = knowledge_sources

    def run(self):
        for ks in self.knowledge_sources:
            ks.act()

In [8]:
# Instantiate the system components
blackboard = Blackboard()

# Load IMDb dataset
def load_imdb_dataset():
    # Read positive reviews
    pos_reviews = []
    for filename in os.listdir('dataset/train/pos'):
        with open(os.path.join('dataset/train/pos', filename), 'r', encoding='utf-8') as file:
            pos_reviews.append(file.read())

    # Read negative reviews
    neg_reviews = []
    for filename in os.listdir('dataset/train/neg'):
        with open(os.path.join('dataset/train/neg', filename), 'r', encoding='utf-8') as file:
            neg_reviews.append(file.read())

    return pos_reviews, neg_reviews

# Preprocess data for simplicity
pos_reviews, neg_reviews = load_imdb_dataset()
reviews = pos_reviews + neg_reviews
labels = [1] * len(pos_reviews) + [0] * len(neg_reviews)
df = pd.DataFrame({'review': reviews, 'sentiment': labels})
df = df.sample(100)  # Use a smaller sample for demonstration

blackboard.update('reviews', df['review'].tolist())

# Create knowledge sources
text_processor = TextProcessor(blackboard)
rule_based_sa = RuleBasedSentimentAnalysis(blackboard)
ml_based_sa = MachineLearningSentimentAnalysis(blackboard)

# Train ML model
ml_based_sa.train(df['review'].tolist(), df['sentiment'].tolist())

knowledge_sources = [text_processor, rule_based_sa, ml_based_sa]

# Run the control component
control = ControlComponent(blackboard, knowledge_sources)
control.run()

# Output the final state of the blackboard
# print("\nFinal Blackboard State:")
# print(blackboard.data)


In [9]:
blackboard.data.keys()

dict_keys(['reviews', 'cleaned_reviews', 'rule_based_scores', 'ml_based_scores', 'final_sentiments'])

In [10]:
blackboard.data['reviews'][:5]

['LOC could have been a very well made movie on how the Kargil war was fought; it had the locations, the budget, and the skill to have been India\'s "Saving Private Ryan" or "Black Hawk Down". Instead it come across as a bloated, 4 hour bore of trying to meld the war move with the masala movie. Even the war scenes were terribly executed, using the same hill in all their battle scenes, and spending unnecessary time on casual talk. Instead of trying to appeal to the indian public, a better movie would have been a to-the-book account of what happened at Kargil (like "Black Hawk Down") or even spending time on the militant point of view (like "Tora, Tora, Tora"). Even better, it could have used a competent director like Ram Gopal Verma to write, direct and edit the film. Until then, I\'d like to see some one re-edit this film, with only the pertinent portions included; it would make the movie more watchable.',
 'This was a wonderful little American propaganda film that is both highly creat

In [11]:
blackboard.data['cleaned_reviews'][:5]

['loc could have been a very well made movie on how the kargil war was fought  it had the locations  the budget  and the skill to have been india s  saving private ryan  or  black hawk down   instead it come across as a bloated    hour bore of trying to meld the war move with the masala movie  even the war scenes were terribly executed  using the same hill in all their battle scenes  and spending unnecessary time on casual talk  instead of trying to appeal to the indian public  a better movie would have been a to the book account of what happened at kargil  like  black hawk down   or even spending time on the militant point of view  like  tora  tora  tora    even better  it could have used a competent director like ram gopal verma to write  direct and edit the film  until then  i d like to see some one re edit this film  with only the pertinent portions included  it would make the movie more watchable',
 'this was a wonderful little american propaganda film that is both highly creative

In [12]:
blackboard.data['rule_based_scores'][:5]

['negative', 'positive', 'positive', 'positive', 'positive']

In [13]:
blackboard.data['ml_based_scores'][:5]

[0, 1, 1, 1, 1]