In [9]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.feature_extraction.text import CountVectorizer
import numpy as np

class NeuralLDA(nn.Module):
    def __init__(self, num_topics, input_dim):
        super(NeuralLDA, self).__init__()
        self.num_topics = num_topics
        self.input_dim = input_dim
        self.topic_word_matrix = nn.Parameter(torch.randn(num_topics, input_dim))
        self.topic_prior = nn.Parameter(torch.randn(num_topics))
    
    def forward(self, X):
        topic_word_matrix = torch.softmax(self.topic_word_matrix, dim=1)
        topic_prior = torch.softmax(self.topic_prior, dim=0)
        reconstruction = torch.matmul(X, topic_word_matrix.T)
        return reconstruction

def train_model(X_tensor, model, optimizer, num_epochs=50):
    for epoch in range(num_epochs):
        model.train()
        optimizer.zero_grad()
        reconstruction = model(X_tensor)
        loss = nn.MSELoss()(reconstruction, X_tensor)
        loss.backward()
        optimizer.step()
        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}')

# Example documents
documents = [
    "This is a sample document about machine learning.",
    "Neural networks are a type of machine learning model.",
    "Topic modeling can be used to find topics in documents.",
    "Latent Dirichlet Allocation is a popular topic modeling method.",
    "We love tasty food.",
    "bangladeshi cuisine.",
]

# Vectorize the documents
vectorizer = CountVectorizer(stop_words='english')
X = vectorizer.fit_transform(documents).toarray()
X_tensor = torch.tensor(X, dtype=torch.float32)

# Define model and optimizer
num_topics = X_tensor.shape[1]  # Use the number of features as num_topics
model = NeuralLDA(num_topics=num_topics, input_dim=X_tensor.shape[1])
optimizer = optim.Adam(model.parameters(), lr=0.01)

# Train the model
train_model(X_tensor, model, optimizer)

# Get the topic-word distributions
topic_word_distributions = model.topic_word_matrix.detach().numpy()

# Define a function to categorize topics based on top words
def categorize_topic(topic_dist, top_words, vectorizer):
    top_word_indices = np.argsort(topic_dist)[::-1][:10]
    top_words_list = [vectorizer.get_feature_names_out()[i] for i in top_word_indices]
    
    # Define your categories based on top words
    categories = {
        'Machine Learning': ['machine', 'learning', 'neural', 'networks'],
        'Topic Modeling': ['topic', 'modeling', 'documents', 'allocation'],
        'Food': ['food', 'eat', 'tasty', 'crispy','cusine'],
        
        'Other': []
    }
    
    for category, keywords in categories.items():
        if any(word in top_words_list for word in keywords):
            return category
    return 'Uncategorized'

# Print topic-word distributions and their categories
for topic_idx, topic_dist in enumerate(topic_word_distributions):
    print(f"Topic {topic_idx}:")
    top_words = np.argsort(topic_dist)[::-1][:10]
    for word_idx in top_words:
        print(f"  {vectorizer.get_feature_names_out()[word_idx]}")
    
    # Categorize the topic
    category = categorize_topic(topic_dist, top_words, vectorizer)
    print(f"Category: {category}")


Epoch 1/50, Loss: 0.170182004570961
Epoch 2/50, Loss: 0.169032484292984
Epoch 3/50, Loss: 0.16788168251514435
Epoch 4/50, Loss: 0.16672958433628082
Epoch 5/50, Loss: 0.16557620465755463
Epoch 6/50, Loss: 0.16442155838012695
Epoch 7/50, Loss: 0.1632656455039978
Epoch 8/50, Loss: 0.16210836172103882
Epoch 9/50, Loss: 0.16094955801963806
Epoch 10/50, Loss: 0.1597890406847
Epoch 11/50, Loss: 0.15862677991390228
Epoch 12/50, Loss: 0.1574627012014389
Epoch 13/50, Loss: 0.15629680454730988
Epoch 14/50, Loss: 0.15512916445732117
Epoch 15/50, Loss: 0.15395987033843994
Epoch 16/50, Loss: 0.15278904139995575
Epoch 17/50, Loss: 0.15161679685115814
Epoch 18/50, Loss: 0.15044328570365906
Epoch 19/50, Loss: 0.14926868677139282
Epoch 20/50, Loss: 0.1480931043624878
Epoch 21/50, Loss: 0.1469167321920395
Epoch 22/50, Loss: 0.14573973417282104
Epoch 23/50, Loss: 0.1445622742176056
Epoch 24/50, Loss: 0.14338447153568268
Epoch 25/50, Loss: 0.14220654964447021
Epoch 26/50, Loss: 0.14102859795093536
Epoch 27

In [5]:
from textblob import TextBlob

def analyze_sentiment(text):
    analysis = TextBlob(text)
    return analysis.sentiment.polarity  # Returns a sentiment score between -1 (negative) and 1 (positive)

# Extract topics and calculate sentiment
aspect_sentiments = {}
for i, document in enumerate(documents):
    for topic_idx, topic_dist in enumerate(topic_word_distributions):
        top_words = np.argsort(topic_dist)[::-1][:10]
        aspect_keywords = [vectorizer.get_feature_names_out()[word_idx] for word_idx in top_words]
        
        # Check if document contains any top words for the current topic
        if any(word in document for word in aspect_keywords):
            sentiment = analyze_sentiment(document)
            aspect_sentiments[f"Aspect {topic_idx}"] = sentiment

# Print aspect-based sentiments
for aspect, sentiment in aspect_sentiments.items():
    print(f"{aspect}: Sentiment Score = {sentiment}")


Aspect 0: Sentiment Score = 0.6
Aspect 1: Sentiment Score = 0.6
Aspect 2: Sentiment Score = 0.6
Aspect 3: Sentiment Score = 0.6
Aspect 4: Sentiment Score = 0.6
Aspect 5: Sentiment Score = 0.6
Aspect 6: Sentiment Score = 0.6
Aspect 7: Sentiment Score = 0.6
Aspect 8: Sentiment Score = 0.6
Aspect 9: Sentiment Score = 0.6
Aspect 10: Sentiment Score = 0.6
Aspect 11: Sentiment Score = 0.6
Aspect 12: Sentiment Score = 0.6
Aspect 13: Sentiment Score = 0.6
Aspect 14: Sentiment Score = 0.6
Aspect 15: Sentiment Score = 0.6
Aspect 16: Sentiment Score = 0.6
Aspect 17: Sentiment Score = 0.6
