# DMA - Scratch Implementation

Here, I'm implementing the DMA model. 

## Importing libraries

In [17]:
import numpy as np

In [18]:
from model.dma import DMA

In [19]:
# Generate synthetic data
np.random.seed(42)
N = 100  # number of documents
V = 50  # vocabulary size
K = 5  # number of topics

# Generate document-topic distribution
theta = np.random.dirichlet([0.5] * K, N)

# Generate topic-word distribution
phi = np.random.dirichlet([0.5] * V, K)

# Generate documents
X = np.zeros((N, V), dtype=int)
for i in range(N):
    z = np.random.choice(K, p=theta[i])
    X[i] = np.random.multinomial(100, phi[z])

# Fit DMA model
alpha = 0.5
beta = 0.5

In [20]:
dma = DMA(K, alpha, beta)
dma.fit(X)

In [21]:
# Generate a new document
X_new = np.zeros((5, V), dtype=int)
for i in range(5):
    z = np.random.choice(K, p=theta[i])
    X_new[i] = np.random.multinomial(100, phi[z])

In [22]:
# Predict the topic distribution for the new document
predicted_topics = dma.predict(X_new)

In [23]:
# Print the predicted topic distribution for each document
for i in range(5):
    print(f"Document {i+1} - Predicted Topic Distribution: {predicted_topics[i]}")

Document 1 - Predicted Topic Distribution: [1.32269992e-175 1.56879766e-147 1.00000000e+000 2.65316137e-135
 1.48498789e-115]
Document 2 - Predicted Topic Distribution: [4.88275435e-078 1.83287752e-078 3.98570932e-086 4.70088113e-106
 1.00000000e+000]
Document 3 - Predicted Topic Distribution: [3.21519914e-169 2.17509076e-124 1.00000000e+000 9.11490836e-149
 1.18314102e-091]
Document 4 - Predicted Topic Distribution: [1.06264695e-085 3.47572747e-089 1.82573773e-085 1.16590233e-101
 1.00000000e+000]
Document 5 - Predicted Topic Distribution: [1.89812934e-151 1.09980934e-127 1.00000000e+000 8.97912522e-145
 1.35943884e-089]
