## <center>Blackboard Architecture Demonstration (Sentiment Analysis)</center>
![blackboard architecture](blackboard_architecture.png)

In [1]:
import os
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
import pandas as pd
import re

In [2]:
#nltk.download('vader_lexicon')

**VADER** (Valence Aware Dictionary and sEntiment Reasoner) is a lexicon and rule-based sentiment analysis tool that is specifically attuned to sentiments expressed in social media.

In [3]:
class Blackboard:
    def __init__(self):
        self.data = {
            'reviews': [],
            'cleaned_reviews': [],
            'rule_based_scores': [],
            'ml_based_scores': [],
            'final_sentiments': []
        }

    def update(self, key, value):
        if key in self.data and isinstance(self.data[key], list):
            self.data[key].extend(value)
        else:
            self.data[key] = value

    def read(self, key):
        return self.data.get(key)


In [4]:
class KnowledgeSource:
    def __init__(self, blackboard):
        self.blackboard = blackboard

    def act(self):
        raise NotImplementedError

In [5]:
# Basic Text Processing KS
class TextProcessor(KnowledgeSource):
    def act(self):
        reviews = self.blackboard.read('reviews')
        cleaned_reviews = [self.clean_text(review) for review in reviews]
        self.blackboard.update('cleaned_reviews', cleaned_reviews)

    def clean_text(self, text):
        text = re.sub(r'<.*?>', ' ', text)
        text = re.sub(r'[^a-zA-Z\s]', ' ', text)
        text = text.lower().strip()
        return text

# Rule-Based Sentiment Analysis KS
class RuleBasedSentimentAnalysis(KnowledgeSource):
    def __init__(self, blackboard):
        super().__init__(blackboard)
        self.analyzer = SentimentIntensityAnalyzer()

    def act(self):
        cleaned_reviews = self.blackboard.read('cleaned_reviews')
        scores = [self.analyzer.polarity_scores(review)['compound'] for review in cleaned_reviews]
        sentiments = ['positive' if score >= 0 else 'negative' for score in scores]
        self.blackboard.update('rule_based_scores', sentiments)

# Machine Learning Sentiment Analysis KS
class MachineLearningSentimentAnalysis(KnowledgeSource):
    def __init__(self, blackboard):
        super().__init__(blackboard)
        self.vectorizer = TfidfVectorizer(max_features=5000)
        self.model = LogisticRegression()

    def train(self, reviews, labels):
        vectors = self.vectorizer.fit_transform(reviews)
        self.model.fit(vectors, labels)

    def act(self):
        cleaned_reviews = self.blackboard.read('cleaned_reviews')
        vectors = self.vectorizer.transform(cleaned_reviews)
        sentiments = self.model.predict(vectors)
        self.blackboard.update('ml_based_scores', sentiments)

In [6]:
class ControlComponent:
    def __init__(self, blackboard, knowledge_sources):
        self.blackboard = blackboard
        self.knowledge_sources = knowledge_sources

    def run(self):
        for ks in self.knowledge_sources:
            ks.act()

In [7]:
# Instantiate the system components
blackboard = Blackboard()

# Load IMDb dataset
def load_imdb_dataset():
    # Read positive reviews
    pos_reviews = []
    for filename in os.listdir('dataset/train/pos'):
        with open(os.path.join('dataset/train/pos', filename), 'r', encoding='utf-8') as file:
            pos_reviews.append(file.read())

    # Read negative reviews
    neg_reviews = []
    for filename in os.listdir('dataset/train/neg'):
        with open(os.path.join('dataset/train/neg', filename), 'r', encoding='utf-8') as file:
            neg_reviews.append(file.read())

    return pos_reviews, neg_reviews

# Preprocess data for simplicity
pos_reviews, neg_reviews = load_imdb_dataset()
reviews = pos_reviews + neg_reviews
labels = [1] * len(pos_reviews) + [0] * len(neg_reviews)
df = pd.DataFrame({'review': reviews, 'sentiment': labels})
df = df.sample(100)  # Use a smaller sample for demonstration

blackboard.update('reviews', df['review'].tolist())

# Create knowledge sources
text_processor = TextProcessor(blackboard)
rule_based_sa = RuleBasedSentimentAnalysis(blackboard)
ml_based_sa = MachineLearningSentimentAnalysis(blackboard)

# Train ML model
ml_based_sa.train(df['review'].tolist(), df['sentiment'].tolist())

knowledge_sources = [text_processor, rule_based_sa, ml_based_sa]

# Run the control component
control = ControlComponent(blackboard, knowledge_sources)
control.run()

# Output the final state of the blackboard
# print("\nFinal Blackboard State:")
# print(blackboard.data)


In [8]:
blackboard.data.keys()

dict_keys(['reviews', 'cleaned_reviews', 'rule_based_scores', 'ml_based_scores', 'final_sentiments'])

In [9]:
blackboard.data['reviews'][:5]

["The Matador is a witty, dark humored and suspenseful melodrama that rises way above mediocrity thanks to two very engaging and earnest performances from Pierce Brosnan, who has never ever been better than here, and the always reliable Greg Kinnear who has his best role here since playing Jack Nicholson's gay neighbor in As Good As It Gets. A big plus goes to the writing as well. Clever and occasionally very nasty dialog is delivered with gusto by Brosnan and the slow building friendship between these two different individuals is completely convincing all the way. The story takes a few unexpected turns and keeps the viewer constantly guessing where it's gonna go next.<br /><br />Nice artistic touches from the director such as good use of music, clever editing and somewhat unorthodox cinematography at times set a nice tone for the film and for a long period of time you don't quite know how to label the film. But that's also thanks to a very well written script which keeps the viewer (m

In [10]:
blackboard.data['cleaned_reviews'][:5]

['the matador is a witty  dark humored and suspenseful melodrama that rises way above mediocrity thanks to two very engaging and earnest performances from pierce brosnan  who has never ever been better than here  and the always reliable greg kinnear who has his best role here since playing jack nicholson s gay neighbor in as good as it gets  a big plus goes to the writing as well  clever and occasionally very nasty dialog is delivered with gusto by brosnan and the slow building friendship between these two different individuals is completely convincing all the way  the story takes a few unexpected turns and keeps the viewer constantly guessing where it s gonna go next   nice artistic touches from the director such as good use of music  clever editing and somewhat unorthodox cinematography at times set a nice tone for the film and for a long period of time you don t quite know how to label the film  but that s also thanks to a very well written script which keeps the viewer  me anyway  

In [11]:
blackboard.data['rule_based_scores'][:5]

['positive', 'positive', 'negative', 'negative', 'positive']

In [12]:
blackboard.data['ml_based_scores'][:5]

[1, 0, 0, 0, 0]