In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

import plotly.express as px
import plotly.graph_objects as go

from sentence_transformers import SentenceTransformer
from sklearn.manifold import TSNE
import torch

In [None]:
only_nagpur_data = pd.read_csv('path/to/only_nagpur_df.csv')

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

## Generate embeddings using MPNet model

In [None]:
model = SentenceTransformer("sentence-transformers/all-mpnet-base-v2").to(device)

In [None]:
titles = only_nagpur_data['translated_title'].fillna("").tolist()

batch_size = 32
embeddings = []

for i in tqdm(range(0, len(titles), batch_size), desc="Encoding Titles with MPNet"):
    batch = titles[i:i + batch_size]
    batch_embeddings = model.encode(batch, device=device)
    embeddings.extend(batch_embeddings)

only_nagpur_data['mpnet_embedding'] = [e.tolist() for e in embeddings]



## Do the dimensionality reduction using t-SNE

In [None]:
# list of embeddings to NumPy array
embedding_matrix = np.array(only_nagpur_data["mpnet_embedding"].tolist())

# Run t-SNE
tsne = TSNE(
    n_components=2,
    perplexity=50,
    learning_rate=200,
    random_state=42,
    max_iter=2000,
)

tsne_result = tsne.fit_transform(embedding_matrix)


In [None]:
nagpur_tsne_df = only_nagpur_data.copy()

# Add reduced t-SNE coordinates
nagpur_tsne_df["x"] = tsne_result[:, 0]
nagpur_tsne_df["y"] = tsne_result[:, 1]

# :Semantic Representation Map

In [None]:
# Colored by Language
fig1 = px.scatter(
    nagpur_tsne_df,
    x="x",
    y="y",
    color="language",
    hover_data=["translated_title", "language", "source_name"],
    title="Semantic Map of Titles by Language",
    labels={"x": "t-SNE Dimension 1", "y": "t-SNE Dimension 2"},
)
fig1.show()

# Temporal Shifts

In [None]:
nagpur_tsne_df['pubDate'] = pd.to_datetime(nagpur_tsne_df['pubDate'])
nagpur_tsne_df = nagpur_tsne_df.sort_values('pubDate')

# Step 2: Format date into string for animation frames
nagpur_tsne_df['pubDateStr'] = nagpur_tsne_df['pubDate'].dt.strftime('%Y-%m-%d')

# Step 3: Create the animation
fig = px.scatter(
    nagpur_tsne_df,
    x='x',
    y='y',
    animation_frame='pubDateStr',
    animation_group='article_id',
    color='language',
    hover_name='title',
    hover_data={'pubDateStr': True, 'language': True, 'translated_title': True},
    title='t-SNE Cluster Animation Nagpur Violence News Articles Over Time',
    labels={'x': 't-SNE X', 'y': 't-SNE Y'},
    opacity=0.7,
    height=700
)

fig.update_layout(
    title_font_size=22,
    xaxis=dict(showgrid=False),
    yaxis=dict(showgrid=False),
    legend_title_text='Language',
    showlegend=True
)

fig.layout.updatemenus[0].buttons[0].args[1]['frame']['duration'] = 600  # ms
fig.layout.updatemenus[0].buttons[0].args[1]['transition']['duration'] = 300

fig.show()