# Load, Sample and Add english translation of messaged

In [2]:
import pandas as pd
import numpy as np
import json
# from deep_translator import GoogleTranslator

SAMPLE_SIZE = 100

# def translate(x):
#     try:
#         tranlsation = GoogleTranslator(source='auto', target='en').translate(x)
#     except:
#         tranlsation = ""
#     return tranlsation

def total_interactions(x):
    reactions_total = 0
    for _, value in json.loads(x.replace("'", '"')).items():
        reactions_total += value
    return reactions_total

def weighted_popularity_score(row):
    return np.mean([row.views, row.forwards, row.reaction_nb])

def min_max_normalization(col, df):
    return (df[col]-df[col].min())/(df[col].max()-df[col].min())


df = pd.read_csv('../../data/telegram/telegram.csv')
df['reaction_nb'] = df.reactions.apply(lambda x: total_interactions(x))

df["views_norm"] = min_max_normalization("views", df)
df["forwards_norm"] = min_max_normalization("forwards", df)
df["reaction_nb_norm"] = min_max_normalization("reaction_nb", df)

df['popularity_score'] = df.apply(lambda x: weighted_popularity_score(x), axis=1)
df = df.sort_values('popularity_score', ascending=False)

sampled = df.iloc[:SAMPLE_SIZE]
# sampled['englishMessageText'] = sampled.messageText.apply(lambda x: translate(x))
# sampled.to_csv('../../data/telegram/sample_with_translations.csv', index=False)

In [None]:
df.messageDatetime.min()

In [None]:
sampled[sampled.messageText.str.contains('Какой')]

In [None]:
sampled.chat.value_counts()

In [None]:
sampled.shape

In [None]:
sampled[['chat', 'messageText']].to_dict()

# Request propaganda detection for sample messages

In [None]:
import asyncio
import websockets
import json
import time
from tqdm import tqdm

async def connect_to_websocket(dataframe, client_id, message, progress_bar, semaphore):
    # Use a semaphore to limit the number of parallel connections
    async with semaphore:
        try:
            async with websockets.connect(
                "ws://13.48.71.178:8000/ws/analyze_propaganda", 
                ping_interval=None  # No timeout on ping
            ) as websocket:
                # Define the request data
                request_data = {
                    "model_name": "gpt-4o",  # Example model
                    "text": message,
                    "contextualize": "False"
                }

                # Send the request data
                await websocket.send(json.dumps(request_data))
                print(f"Client {client_id}: Request sent.")

                # Keep receiving messages until the server closes the connection
                while True:
                    try:
                        response = await websocket.recv()  # Wait for the next message from the server
                        print(f"Client {client_id}: Response received:\n{response}")
                        dataframe.at[client_id, "response"] = response  # Save response in dataframe
                    except websockets.ConnectionClosedOK:
                        # Server closed the connection gracefully
                        print(f"Client {client_id}: Connection closed OK.")
                        break
                    except websockets.ConnectionClosedError:
                        # Server closed with an error
                        print(f"Client {client_id}: Connection closed with an error.")
                        break
                    except Exception as e:
                        print(f"Client {client_id}: An error occurred: {e}")
                        break

        except Exception as e:
            print(f"Client {client_id}: An error occurred: {e}")
        finally:
            # Update the progress bar when a client request is complete
            progress_bar.update(1)

async def simulate_multiple_clients(dataframe, parallel_connections):
    tasks = []
    
    # Create a semaphore to limit parallel connections to 'parallel_connections'
    semaphore = asyncio.Semaphore(parallel_connections)
    
    # Create a tqdm progress bar for tracking finished tasks
    with tqdm(total=len(dataframe), desc="Finished Requests") as progress_bar:
        for i, row in enumerate(dataframe.itertuples()):
            tasks.append(connect_to_websocket(dataframe, i, row.messageText, progress_bar, semaphore))
        
        # Wait for all tasks to complete (i.e., wait for WebSocket closure for each client)
        await asyncio.gather(*tasks)

# Example dataframe
# Replace 'sampled' with your actual dataframe
parallel_connections = 5  # You can change this value to control how many requests run in parallel
await simulate_multiple_clients(sampled, parallel_connections)

# Save the dataframe after processing
sampled.to_csv('../../data/telegram/best_100_detection.csv', index=False)


In [None]:
df

In [None]:
def unfold_results(x):
    obj = json.loads(x['response'])
    for key, value in obj['data'].items():
        x[key] = 1

    return x

new = sampled.apply(lambda x: unfold_results(x), axis=1)
new.fillna(0, inplace=True)
new['is_propaganda'] = new[['Appeal_to_Authority', 'Appeal_to_fear-prejudice',
       'Bandwagon, Reductio_ad_hitlerum', 'Black-and-White_Fallacy',
       'Causal_Oversimplification', 'Doubt', 'Exaggeration, Minimization',
       'Flag-Waving', 'Loaded_Language', 'Name_Calling, Labeling',
       'Repetition', 'Slogans', 'Whataboutism, Straw_Men, Red_Herring']].sum(axis=1)
new['is_propaganda'] = new['is_propaganda']>0
print(new['is_propaganda'].value_counts())
new.to_csv('../../data/telegram/unfolded.csv')

# Telethon

In [None]:
# Import libraries
import os
from dotenv import load_dotenv
from telethon import TelegramClient
from telethon.errors import SessionPasswordNeededError
from tqdm.notebook import tqdm
import getpass
from IPython.display import clear_output
import asyncio
from telethon.tl.types import Message
import inspect

# Option 1: Load environment variables from .env file
load_dotenv()
TELEGRAM_API_ID = os.getenv("TELEGRAM_API_ID")
TELEGRAM_API_HASH = os.getenv("TELEGRAM_API_HASH")

# Option 2: Directly set API credentials (Uncomment if not using .env)
# TELEGRAM_API_ID = 'your_api_id'
# TELEGRAM_API_HASH = 'your_api_hash'

# Initialize the Telegram client
client = TelegramClient('simple_session', TELEGRAM_API_ID, TELEGRAM_API_HASH)

# Authentication function
async def authenticate_client():
    await client.connect()
    
    if not await client.is_user_authorized():
        phone = input("Enter your phone number (with country code, e.g., +123456789): ")
        await client.send_code_request(phone)
        code = input("Enter the code you received: ")
        try:
            await client.sign_in(phone, code)
        except SessionPasswordNeededError:
            password = getpass.getpass("Two-step verification enabled. Enter your password: ")
            await client.sign_in(password=password)
        clear_output()
        print("Authentication successful!")
    else:
        print("Client is already authorized.")

# Run authentication
await authenticate_client()

# Function to print message attributes
def print_message_attributes(chat, limit=10):
    """
    Fetches and prints all attributes of the first `limit` messages from a specified chat.

    :param chat: The username or ID of the Telegram chat to scrape.
    :param limit: Number of messages to retrieve.
    """
    async def fetch_messages():
        print(f'\nScraping the first {limit} messages from chat: {chat}\n')
        try:
            # Fetch messages asynchronously
            messages = await client.iter_messages(chat, limit=limit).to_list()
            
            for idx, message in enumerate(messages, start=1):
                print(f"--- Message {idx} ---")
                # Get all attributes of the message
                attributes = {attr: getattr(message, attr) for attr in dir(message) 
                              if not attr.startswith('_') and not inspect.ismethod(getattr(message, attr))}
                for key, value in attributes.items():
                    print(f"{key}: {value}")
                print("\n")
        except Exception as e:
            print(f"An error occurred: {e}")

    # Run the asynchronous fetch
    asyncio.run(fetch_messages())

# Specify the chat to scrape
chat_to_scrape = 'https://t.me/opersvodki'  # Replace with your target chat https://t.me/medvedev_telegram

# Fetch and print message attributes
print_message_attributes(chat_to_scrape, limit=10)


In [None]:
from telethon.sync import TelegramClient
import os

# Your API ID and API Hash from my.telegram.org
TELEGRAM_API_ID = os.getenv("TELEGRAM_API_ID")
TELEGRAM_API_HASH = os.getenv("TELEGRAM_API_HASH")
# Peer ID of the Telegram channel
peer_id = '1315735637'

# Connect to the Telegram client
client = TelegramClient('session_name', TELEGRAM_API_ID, TELEGRAM_API_HASH)

entity = client.get_entity(peer_id)

    # Print the channel name
print('Channel Name:', entity.title)


In [None]:
import os
import asyncio
from telethon import TelegramClient
from telethon.tl.types import PeerChannel

# Fetch API credentials from environment variables and convert API_ID to integer
TELEGRAM_API_ID = int(os.getenv("TELEGRAM_API_ID"))
TELEGRAM_API_HASH = os.getenv("TELEGRAM_API_HASH")

# Peer ID of the Telegram channel (ensure it's an integer)
peer_id = 1315735637  # Remove quotes to make it an integer

async def main():
    # Initialize the Telegram client with a unique session name
    async with TelegramClient('unique_session_name', TELEGRAM_API_ID, TELEGRAM_API_HASH) as client:
        try:
            # Retrieve the entity using the peer ID
            entity = await client.get_entity(PeerChannel(peer_id))
            
            # Print the channel name
            print('Channel Name:', entity.title)
        except ValueError:
            print('Invalid peer ID or entity not found')
        except Exception as e:
            print(f'An error occurred: {e}')

# Run the asynchronous main function
if __name__ == "__main__":
    asyncio.run(main())


In [None]:
import os
import asyncio
import logging
from telethon import TelegramClient
from telethon.tl.types import PeerChannel

# Configure logging
logging.basicConfig(
    level=logging.INFO,  # Set to DEBUG for more detailed logs
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

# Fetch API credentials from environment variables and convert API_ID to integer
TELEGRAM_API_ID = os.getenv("TELEGRAM_API_ID")
TELEGRAM_API_HASH = os.getenv("TELEGRAM_API_HASH")

# Validate API credentials
if not TELEGRAM_API_ID or not TELEGRAM_API_HASH:
    logger.error("TELEGRAM_API_ID and TELEGRAM_API_HASH must be set as environment variables.")
    exit(1)

try:
    TELEGRAM_API_ID = int(TELEGRAM_API_ID)
except ValueError:
    logger.error("TELEGRAM_API_ID must be an integer.")
    exit(1)

# Peer ID of the Telegram channel (ensure it's an integer)
peer_id = 1315735637  # Ensure this is the correct integer ID

async def main():
    # Initialize the Telegram client with a unique session name
    try:
        async with TelegramClient('unique_session_name', TELEGRAM_API_ID, TELEGRAM_API_HASH) as client:
            logger.info("Client started successfully.")

            try:
                # Retrieve the entity using the peer ID
                entity = await client.get_entity(PeerChannel(peer_id))
                
                # Print the channel name
                print('Channel Name:', entity.title)
                logger.info(f'Channel Name: {entity.title}')
            except ValueError:
                logger.error('Invalid peer ID or entity not found.')
            except Exception as e:
                logger.exception(f'An unexpected error occurred: {e}')

    except Exception as e:
        logger.exception(f'Failed to initialize TelegramClient: {e}')

# Run the asynchronous main function
if __name__ == "__main__":
    asyncio.run(main())


In [None]:
async with TelegramClient('SessionName', TELEGRAM_API_ID, TELEGRAM_API_HASH) as client:
    # Fetch the entity using the peer ID
    entity = await client.get_entity(PeerChannel(1391419522))
    
    # Print the channel name
    print('Channel Name:', entity.title)

In [None]:
PeerChannel(1315735637)

# Analyse network between channels

In [57]:
import pandas as pd

df = pd.read_csv('../../data/telegram/messages_scraped.csv')
print("size df total", df.shape)
# Convert 'messageDate' column to datetime format
df['messageDate'] = pd.to_datetime(df['messageDate'])

# Filter rows with 'messageDate' after 2022
df = df[df['messageDate'] > '2022-01-01']
print("size df after filtering", df.shape)



  df = pd.read_csv('../../data/telegram/messages_scraped.csv')


size df total (1623145, 39)
size df after filtering (1275438, 39)


In [58]:
TOP_FWD_OBSERVED=1000
peer_id_to_chat = df[['peer_id', 'chat']].drop_duplicates().set_index('peer_id')['chat'].to_dict()
df['peer_id_name'] = df['peer_id'].map(peer_id_to_chat).fillna('unknown')
df['peer_id_name_fwd_from'] = df['fwd_from'].map(peer_id_to_chat).fillna('unknown')
df_fwd_known = df[df.peer_id_name_fwd_from != 'unknown']
df_fwd_known.sort_values('forwards', ascending=False, inplace=True)
print("size df with fwd_from known", df_fwd_known.shape)
df_fwd_known_top = df_fwd_known[:TOP_FWD_OBSERVED]

# df_fwd_known_top = df_fwd_known[df_fwd_known.messageText.isin(top_fwd_lst)]
print("size df with fwd_from known top", df_fwd_known_top.shape)

size df with fwd_from known (23503, 41)
size df with fwd_from known top (1000, 41)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_fwd_known.sort_values('forwards', ascending=False, inplace=True)


In [59]:
import asyncio
import websockets
import json
import time
from tqdm import tqdm

async def connect_to_websocket(response_dict, client_id, message, progress_bar, semaphore):
    async with semaphore:
        try:
            async with websockets.connect(
                "ws://13.48.71.178:8000/ws/analyze_propaganda", 
                ping_interval=None
            ) as websocket:
                request_data = {
                    "model_name": "gpt-4o-mini", 
                    "text": message,
                    "contextualize": "False"
                }

                await websocket.send(json.dumps(request_data))
                
                while True:
                    try:
                        response = await websocket.recv()  # Receive message
                        response = json.loads(response)  # Convert to JSON
                        
                        # Store the response data in the shared dictionary
                        response_dict[client_id] = response.get('data', None)
                    except websockets.ConnectionClosedOK:
                        break
                    except websockets.ConnectionClosedError:
                        break
                    except Exception as e:
                        break
        except Exception as e:
            print(f"Client {client_id}: An error occurred: {e}")
        finally:
            progress_bar.update(1)

async def simulate_multiple_clients(dataframe, parallel_connections):
    tasks = []
    semaphore = asyncio.Semaphore(parallel_connections)
    
    # Dictionary to collect responses
    response_dict = {}
    
    with tqdm(total=len(dataframe), desc="Finished Requests") as progress_bar:
        for i, row in enumerate(dataframe.itertuples()):
            client_id = row.Index
            tasks.append(
                connect_to_websocket(response_dict, client_id, row.messageText, progress_bar, semaphore)
            )
        
        await asyncio.gather(*tasks)
    
    return response_dict



# Example use
parallel_connections = 50
response_dict = await simulate_multiple_clients(df_fwd_known_top, parallel_connections)

for key, value in response_dict.items():
    messageText = df_fwd_known_top.at[key, 'messageText']
    df.loc[df['messageText'] == messageText, 'response'] = [value] * len(df[df['messageText'] == messageText])

df_prop = df.dropna(subset=['response'])
df_prop = df_prop[df_prop['response'] != {}]


Finished Requests: 100%|██████████| 1000/1000 [22:51<00:00,  1.37s/it]


In [65]:
df.drop_duplicates(subset=['messageText'], inplace=True)
df = df.dropna(subset=['response'])
df = df[df['response'] != {}]
df.shape

(700, 42)

# BERTopic

In [133]:
import pandas as pd
df = pd.read_csv('../../data/telegram/messages_scraped_with_propaganda.csv')
df_prop = df.dropna(subset=['response'])
df_prop = df_prop[df_prop['response'] != {}]

  df = pd.read_csv('../../data/telegram/messages_scraped_with_propaganda.csv')


In [135]:
len(df_prop.messageText.unique())

870

In [123]:
import openai
from bertopic import BERTopic
from bertopic.backend import OpenAIBackend
from bertopic.representation import OpenAI
import os

client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
embedding_model = OpenAIBackend(client, "text-embedding-3-small")
representation_model = OpenAI(
    client,
    model="gpt-4o-mini", 
    chat=True,
)

# Initialize BERTopic with the representation model 
topic_model = BERTopic(embedding_model=embedding_model, representation_model=representation_model, nr_topics=25)

# # Prepare your documents
docs = df.messageText.unique()

# # Fit the model on your documents
topics, probs = topic_model.fit_transform(docs)

# # Optionally visualize or inspect topics
print(topic_model.get_topic_info())

    Topic  Count                                               Name  \
0      -1    410  -1_Военные события и проблемы командования на ...   
1       0     31  0_Проблемы и советы мобилизованным в условиях ...   
2       1     90  1_Херсонское и Курское направления: текущие бо...   
3       2    121  2_атаки ВСУ и противоракетная оборона в Крыму ...   
4       3     22      3_Фронтовые истории и экзистенциальные сказки   
5       4     31       4_Обмен пленными и боевые действия в Украине   
6       5     16  5_Задержания и военно-мобилизационные события ...   
7       6     13  6_Военные действия и заложники в Брянской и Бе...   
8       7     25  7_Неонацистский видеонекролог и пропагандистск...   
9       8     42  8_Влияние исторического опыта и социальных изм...   
10      9     32  9_Взаимоотношения между Кадыровым, Пригожиным ...   
11     10     26  10_Военные преступления украинских войск проти...   
12     11     11  11_Политическая судьба Зеленского и общественн...   

     

In [151]:
print(topic_model.get_topic_info())

    Topic  Count                                               Name  \
0      -1    410  -1_Военные события и проблемы командования на ...   
1       0     31  0_Проблемы и советы мобилизованным в условиях ...   
2       1     90  1_Херсонское и Курское направления: текущие бо...   
3       2    121  2_атаки ВСУ и противоракетная оборона в Крыму ...   
4       3     22      3_Фронтовые истории и экзистенциальные сказки   
5       4     31       4_Обмен пленными и боевые действия в Украине   
6       5     16  5_Задержания и военно-мобилизационные события ...   
7       6     13  6_Военные действия и заложники в Брянской и Бе...   
8       7     25  7_Неонацистский видеонекролог и пропагандистск...   
9       8     42  8_Влияние исторического опыта и социальных изм...   
10      9     32  9_Взаимоотношения между Кадыровым, Пригожиным ...   
11     10     26  10_Военные преступления украинских войск проти...   
12     11     11  11_Политическая судьба Зеленского и общественн...   

     

In [148]:
import tiktoken
df_sample = df.sample(1000)  
def count_tokens(text: str, model_name: str = "gpt-3.5-turbo") -> int:
    """
    Count the number of tokens in a given text for a specific model.

    Parameters:
        text (str): The input text to measure.
        model_name (str): The name of the model to use for tokenization.
                          Default is "gpt-3.5-turbo".

    Returns:
        int: The number of tokens in the input text.
    """
    # Initialize the tokenizer for the specified model
    encoding = tiktoken.encoding_for_model(model_name)
    
    # Tokenize the input text
    tokens = encoding.encode(text)
    
    # Return the count of tokens
    return len(tokens)

df_sample["token"] = df.messageText.apply(lambda x: count_tokens(x, "text-embedding-3-small"))
df_sample = df_sample[df_sample["token"]<8000]
print(len(df_sample))
preds, probs = topic_model.transform(df_sample.messageText.values)
df_sample['topic'] = preds

1000


In [164]:
import pandas as pd
import networkx as nx
from pyvis.network import Network
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors

def visualize_forwarded_messages_with_highlight(df, output_file='interactive_graph.html', time_unit='month'):
    """
    Visualize forwarded messages in a DataFrame using PyVis with a temporal slider at the top.
    Edges corresponding to messages sent in the selected time frame are colored based on their topics.
    Nodes are kept with default colors or can be colored differently if desired.

    Parameters:
    - df (pd.DataFrame): DataFrame containing messages with 'peer_id', 'fwd_from', 'id', 'messageText', 'messageDate', 'forwards', 'chat', and 'topic' columns.
    - output_file (str): File name for the output HTML file with the interactive graph.
    - time_unit (str): Time unit for slider filtering ('month' or 'week').
    """

    # Ensure messageDate is in datetime format
    df['messageDate'] = pd.to_datetime(df['messageDate'])

    # Ensure 'topic' column is present
    if 'topic' not in df.columns:
        raise ValueError("The DataFrame must contain a 'topic' column.")

    # Step 1: Filter to include only forwarded messages
    forwarded_messages = df[df['fwd_from'].notna()]

    # Define time periods with year included
    if time_unit == 'month':
        forwarded_messages['time_period'] = forwarded_messages['messageDate'].dt.to_period('M')  # Monthly periods (e.g., '2024-01')
    elif time_unit == 'week':
        forwarded_messages['time_period'] = forwarded_messages['messageDate'].dt.to_period('W')  # Weekly periods (e.g., '2024-W01')
    else:
        raise ValueError("time_unit must be 'month' or 'week'.")

    # Get unique time periods for the slider
    time_periods = sorted(forwarded_messages['time_period'].unique())

    # Convert time periods to strings for JavaScript
    time_periods_str = [str(tp) for tp in time_periods]

    # Step 2: Assign unique colors to each unique topic
    unique_topics = forwarded_messages['topic'].unique()

    # Choose a colormap
    cmap = plt.get_cmap('tab20')
    num_colors = len(unique_topics)

    # If number of unique topics exceeds the colormap range, switch to 'hsv'
    if num_colors > cmap.N:
        cmap = plt.get_cmap('hsv')
    colors = [mcolors.rgb2hex(cmap(i / num_colors)) for i in range(num_colors)]

    # Create a dictionary mapping topics to colors
    topic_to_color = {topic: colors[i] for i, topic in enumerate(unique_topics)}

    # Assign a default color for edges with unknown topics
    default_edge_color = '#C0C0C0'  # Silver

    # Step 3: Initialize a directed MultiDiGraph
    G = nx.MultiDiGraph()

    # Step 4: Iterate over each forwarded message to add edges to the graph
    peer_id_to_chat = df[['peer_id', 'chat']].drop_duplicates().set_index('peer_id')['chat'].to_dict()
    for index, row in forwarded_messages.iterrows():
        source_peer_id = row['fwd_from']
        target_peer_id = row['peer_id']
        message_id = row['id']
        message_text = row['messageText'] if pd.notna(row['messageText']) else ''
        message_date = row['messageDate'].strftime('%Y-%m-%d')  # Convert to string
        time_period = str(row['time_period'])
        topic = row['topic']

        # Get chat names or use 'unknown name'
        source_chat = peer_id_to_chat.get(source_peer_id, "unknown name")
        target_chat = peer_id_to_chat.get(target_peer_id, "unknown name")

        # Use peer_id as unique identifier (converted to string)
        source_node = str(source_peer_id)
        target_node = str(target_peer_id)

        # Add nodes with labels (you can color nodes if desired)
        if not G.has_node(source_node):
            G.add_node(
                source_node,
                label=source_chat,
                title=source_chat
            )
        if not G.has_node(target_node):
            G.add_node(
                target_node,
                label=target_chat,
                title=target_chat
            )

        # Add an edge from source to target with message details as edge attributes
        G.add_edge(
            source_node,
            target_node,
            message_id=message_id,
            text=message_text,
            date=message_date,
            time_period=time_period,  # Add time period for filtering
            topic=topic,              # Add topic for coloring
            color=default_edge_color, # Default color for edges
            width=1                   # Default width for edges
        )

    # Step 5: Create a PyVis Network
    net = Network(height='750px', width='100%', notebook=True, directed=True)

    # Customize the physics layout (optional for better visualization)
    net.force_atlas_2based()

    # Add nodes with labels
    for node, data in G.nodes(data=True):
        label = data.get('label', 'unknown name')
        title = data.get('title', '')
        net.add_node(
            node,
            label=label,
            title=title,  # Tooltip on hover
            # You can set node color here if desired
            size=15
        )

    # Add edges with tooltips, time period, topic, and default styles
    for source, target, data in G.edges(data=True):
        message_id = data.get('message_id', '')
        text = data.get('text', '')
        date = data.get('date', '')
        time_period = data.get('time_period', '')
        topic = data.get('topic', None)
        tooltip = f"<b>Message ID:</b> {message_id}<br><b>Date:</b> {date}<br><b>Topic:</b> {topic}<br><b>Text:</b> {text}"

        net.add_edge(
            source,
            target,
            title=tooltip,        # Tooltip on hover
            time_period=time_period,  # Add time period as edge attribute for filtering
            topic=topic,              # Add topic as edge attribute for coloring
            color=data.get('color', default_edge_color),  # Default edge color
            width=data.get('width', 1)        # Default edge width
        )

    # Optionally, enable additional features like showing physics controls
    net.show_buttons(filter_=['physics'])

    # Generate JavaScript for the slider and dynamic edge coloring
    # Create a JavaScript object mapping topics to colors
    topic_to_color_js = "{\n" + ",\n".join([f"'{str(k)}': '{v}'" for k, v in topic_to_color.items()]) + "\n}"

    time_periods_js_array = "[" + ", ".join([f"'{tp}'" for tp in time_periods_str]) + "]"
    slider_js = f"""
        <script type="text/javascript">
            let timePeriods = {time_periods_js_array};  // JavaScript array of time periods
            let topicToColor = {topic_to_color_js};     // Mapping of topics to colors

            let sliderContainer = document.createElement("div");
            sliderContainer.style.margin = "10px";
            sliderContainer.style.textAlign = "center";

            let timeLabel = document.createElement("div");
            timeLabel.style.display = "inline-block";
            timeLabel.style.marginRight = "10px";
            timeLabel.innerHTML = "Highlighting period: " + timePeriods[0];

            let slider = document.createElement("input");
            slider.type = "range";
            slider.min = 0;
            slider.max = timePeriods.length - 1;
            slider.value = 0;
            slider.id = "timeSlider";
            slider.style.width = "70%";
            slider.style.verticalAlign = "middle";

            slider.oninput = function() {{
                let selectedPeriod = timePeriods[this.value];
                timeLabel.innerHTML = "Highlighting period: " + selectedPeriod;

                network.body.data.edges.update(
                    network.body.data.edges.get().map(edge => {{
                        let isHighlighted = edge.time_period === selectedPeriod;
                        if (isHighlighted) {{
                            let topicColor = topicToColor[edge.topic] || '{default_edge_color}';
                            return {{
                                id: edge.id,
                                color: {{
                                    color: topicColor,  // Edge color based on topic
                                    highlight: topicColor,
                                    hover: topicColor
                                }},
                                width: 3  // Thicker width for highlighted edges
                            }};
                        }} else {{
                            return {{
                                id: edge.id,
                                color: {{
                                    color: '{default_edge_color}',  // Default color for edges not in selected period
                                    highlight: '{default_edge_color}',
                                    hover: '{default_edge_color}'
                                }},
                                width: 1  // Default width
                            }};
                        }}
                    }})
                );

                // Optionally, you can adjust node appearance if desired
            }};

            sliderContainer.appendChild(timeLabel);
            sliderContainer.appendChild(slider);

            // Insert the slider container before the network container
            let networkContainer = document.getElementById("mynetwork");
            networkContainer.parentNode.insertBefore(sliderContainer, networkContainer);
        </script>
    """

    # Generate and save the interactive graph to an HTML file
    net.save_graph('temp_graph.html')  # Save initial HTML to a temp file

    # Read the saved HTML and inject the slider JavaScript
    with open('temp_graph.html', 'r') as f:
        html = f.read()

    # Insert the slider JavaScript before the closing body tag
    html = html.replace('</body>', slider_js + '</body>')

    # Save the final HTML with the slider
    with open(output_file, 'w') as f:
        f.write(html)

    print(f"Interactive graph saved to {output_file}")


# Example usage
visualize_forwarded_messages_with_highlight(df_sample[df_sample.topic !=-1], output_file='../../visuals/interactive_graph_with_topics.html', time_unit='month')


Interactive graph saved to ../../visuals/interactive_graph_with_topics.html


In [154]:
import pandas as pd
import networkx as nx
from pyvis.network import Network
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors

def visualize_forwarded_messages_with_highlight(df, output_file='interactive_graph.html', time_unit='month'):
    """
    Visualize forwarded messages in a DataFrame using PyVis with a temporal slider at the top.
    Edges corresponding to messages sent in the selected time frame are highlighted.

    Parameters:
    - df (pd.DataFrame): DataFrame containing messages with 'peer_id', 'fwd_from', 'id', 'messageText', 'messageDate', 'forwards', and 'chat' columns.
    - output_file (str): File name for the output HTML file with the interactive graph.
    - time_unit (str): Time unit for slider filtering ('month' or 'week').
    """

    # Ensure messageDate is in datetime format
    df['messageDate'] = pd.to_datetime(df['messageDate'])

    # Step 1: Create the peer_id_to_chat dictionary
    peer_id_to_chat = df[['peer_id', 'chat']].drop_duplicates().set_index('peer_id')['chat'].to_dict()

    # Step 2: Assign unique colors to each unique chat
    unique_chats = set(peer_id_to_chat.values())

    # Choose a colormap
    cmap = plt.get_cmap('tab20')
    num_colors = len(unique_chats)

    # If number of unique chats exceeds the colormap range, switch to 'hsv'
    if num_colors > cmap.N:
        cmap = plt.get_cmap('hsv')
        colors = [mcolors.rgb2hex(cmap(i / num_colors)) for i in range(num_colors)]
    else:
        colors = [mcolors.rgb2hex(cmap(i)) for i in range(num_colors)]

    # Create a dictionary mapping chat names to colors
    chat_to_color = {chat: colors[i] for i, chat in enumerate(unique_chats)}

    # Assign a default color for nodes with unknown names
    default_node_color = '#C0C0C0'  # Silver

    # Step 3: Filter to include only forwarded messages
    forwarded_messages = df[df['fwd_from'].notna()]

    # Define time periods with year included
    if time_unit == 'month':
        forwarded_messages['time_period'] = forwarded_messages['messageDate'].dt.to_period('M')  # Monthly periods (e.g., '2024-01')
    elif time_unit == 'week':
        forwarded_messages['time_period'] = forwarded_messages['messageDate'].dt.to_period('W')  # Weekly periods (e.g., '2024-W01')

    # Get unique time periods for the slider
    time_periods = sorted(forwarded_messages['time_period'].unique())

    # Convert time periods to strings for JavaScript
    time_periods_str = [str(tp) for tp in time_periods]

    # Step 4: Initialize a directed MultiDiGraph
    G = nx.MultiDiGraph()

    # Step 5: Iterate over each forwarded message to add edges to the graph
    for index, row in forwarded_messages.iterrows():
        source_peer_id = row['fwd_from']
        target_peer_id = row['peer_id']
        message_id = row['id']
        message_text = row['messageText'] if pd.notna(row['messageText']) else ''
        message_date = row['messageDate'].strftime('%Y-%m-%d')  # Convert to string
        time_period = str(row['time_period'])
        
        # Get chat names or use 'unknown name'
        source_chat = peer_id_to_chat.get(source_peer_id, "unknown name")
        target_chat = peer_id_to_chat.get(target_peer_id, "unknown name")
        
        # Use peer_id as unique identifier (converted to string)
        source_node = str(source_peer_id)
        target_node = str(target_peer_id)
        
        # Determine node colors based on chat names
        source_color = chat_to_color.get(source_chat, default_node_color)
        target_color = chat_to_color.get(target_chat, default_node_color)
        
        # Add nodes with label as chat names and assigned colors
        if not G.has_node(source_node):
            G.add_node(source_node, label=source_chat, color=source_color)
        if not G.has_node(target_node):
            G.add_node(target_node, label=target_chat, color=target_color)
        
        # Add an edge from source to target with message details as edge attributes
        G.add_edge(
            source_node,
            target_node,
            message_id=message_id,
            text=message_text,
            date=message_date,
            time_period=time_period,  # Add time period for filtering
            color='gray',             # Default color for edges
            width=1                   # Default width for edges
        )

    # Step 6: Create a PyVis Network
    net = Network(height='750px', width='100%', notebook=True, directed=True)

    # Customize the physics layout (optional for better visualization)
    net.force_atlas_2based()

    # Add nodes with labels and colors
    for node, data in G.nodes(data=True):
        label = data.get('label', 'unknown name')
        color = data.get('color', default_node_color)
        net.add_node(
            node,
            label=label,
            title=label,  # Tooltip on hover
            color=color,  # Node color based on chat
            size=15
        )

    # Add edges with tooltips, time period, and default styles
    for source, target, data in G.edges(data=True):
        message_id = data.get('message_id', '')
        text = data.get('text', '')
        date = data.get('date', '')
        time_period = data.get('time_period', '')
        tooltip = f"<b>Message ID:</b> {message_id}<br><b>Date:</b> {date}<br><b>Text:</b> {text}"
        
        net.add_edge(
            source,
            target,
            title=tooltip,        # Tooltip on hover
            time_period=time_period,  # Add time period as edge attribute for filtering
            color=data.get('color', 'gray'),  # Default edge color
            width=data.get('width', 1)        # Default edge width
        )

    # Optionally, enable additional features like showing physics controls
    net.show_buttons(filter_=['physics'])

    # Generate JavaScript for multi-year slider and filtering, inserting slider at the top
    time_periods_js_array = "[" + ", ".join([f"'{tp}'" for tp in time_periods_str]) + "]"
    slider_js = f"""
        <script type="text/javascript">
            let timePeriods = {time_periods_js_array};  // JavaScript array of time periods
            let sliderContainer = document.createElement("div");
            sliderContainer.style.margin = "10px";
            sliderContainer.style.textAlign = "center";

            let timeLabel = document.createElement("div");
            timeLabel.style.display = "inline-block";
            timeLabel.style.marginRight = "10px";
            timeLabel.innerHTML = "Highlighting period: " + timePeriods[0];

            let slider = document.createElement("input");
            slider.type = "range";
            slider.min = 0;
            slider.max = timePeriods.length - 1;
            slider.value = 0;
            slider.id = "timeSlider";
            slider.style.width = "70%";
            slider.style.verticalAlign = "middle";

            slider.oninput = function() {{
                let selectedPeriod = timePeriods[this.value];
                timeLabel.innerHTML = "Highlighting period: " + selectedPeriod;
                
                network.body.data.edges.update(
                    network.body.data.edges.get().map(edge => {{
                        let isHighlighted = edge.time_period === selectedPeriod;
                        return {{
                            id: edge.id,
                            color: {{
                                color: isHighlighted ? 'red' : 'gray',  // Highlight color and default color
                                highlight: 'red',  // Color when selected
                                hover: isHighlighted ? 'red' : 'gray'  // Color when hovered
                            }},
                            width: isHighlighted ? 3 : 1  // Thicker width for highlighted edges
                        }};
                    }})
                );

                // Optionally, you can adjust node appearance if desired
            }};

            sliderContainer.appendChild(timeLabel);
            sliderContainer.appendChild(slider);

            // Insert the slider container before the network container
            let networkContainer = document.getElementById("mynetwork");
            networkContainer.parentNode.insertBefore(sliderContainer, networkContainer);
        </script>
    """

    # Generate and save the interactive graph to an HTML file
    net.save_graph('temp_graph.html')  # Save initial HTML to a temp file

    # Read the saved HTML and inject the slider JavaScript
    with open('temp_graph.html', 'r') as f:
        html = f.read()

    # Insert the slider JavaScript before the closing body tag
    html = html.replace('</body>', slider_js + '</body>')

    # Save the final HTML with the slider
    with open(output_file, 'w') as f:
        f.write(html)

    print(f"Interactive graph saved to {output_file}")

# Example usage
visualize_forwarded_messages_with_highlight(df_prop, output_file='../../visuals/interactive_graph_with_highlight.html', time_unit='month')

Interactive graph saved to ../../visuals/interactive_graph_with_highlight.html


In [165]:
import pandas as pd
import networkx as nx
from pyvis.network import Network
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors

def visualize_forwarded_messages(df, output_file='interactive_graph.html'):
    """
    Visualize forwarded messages in a DataFrame using PyVis.
    
    Parameters:
    - df (pd.DataFrame): DataFrame containing messages with 'peer_id', 'fwd_from', 'id', 'messageText', 'messageDate', and 'forwards' columns.
    - output_file (str): File name for the output HTML file with the interactive graph.
    """
    
    # Step 1: Create the peer_id_to_chat dictionary
    peer_id_to_chat = df[['peer_id', 'chat']].drop_duplicates().set_index('peer_id')['chat'].to_dict()

    # Step 2: Assign unique colors to each unique chat
    unique_chats = set(peer_id_to_chat.values())

    # Choose a colormap
    cmap = plt.get_cmap('tab20')
    num_colors = len(unique_chats)

    # If number of unique chats exceeds the colormap range, switch to 'hsv'
    if num_colors > cmap.N:
        cmap = plt.get_cmap('hsv')
        colors = [mcolors.rgb2hex(cmap(i / num_colors)) for i in range(num_colors)]
    else:
        colors = [mcolors.rgb2hex(cmap(i)) for i in range(num_colors)]

    # Create a dictionary mapping chat names to colors
    chat_to_color = {chat: colors[i] for i, chat in enumerate(unique_chats)}

    # Assign a default color for nodes with unknown names
    default_node_color = '#C0C0C0'  # Silver

    # Step 3: Filter to include only forwarded messages
    forwarded_messages = df[df['fwd_from'].notna()]

    # Step 4: Initialize a directed graph
    G = nx.MultiDiGraph()


    # Step 5: Iterate over each forwarded message to add edges to the graph
    for index, row in forwarded_messages.iterrows():
        source_peer_id = row['fwd_from']
        target_peer_id = row['peer_id']
        message_id = row['id']
        message_text = row['messageText'] if pd.notna(row['messageText']) else ''
        message_date = row['messageDate']
        
        # Get chat names or use 'unknown name'
        source_chat = peer_id_to_chat.get(source_peer_id, "unknown name")
        target_chat = peer_id_to_chat.get(target_peer_id, "unknown name")
        
        # Use peer_id as unique identifier (converted to string)
        source_node = str(source_peer_id)
        target_node = str(target_peer_id)
        
        # Determine node colors based on chat names
        source_color = chat_to_color.get(source_chat, default_node_color)
        target_color = chat_to_color.get(target_chat, default_node_color)
        
        # Add nodes with label as chat names and assigned colors
        if not G.has_node(source_node):
            G.add_node(source_node, label=source_chat, color=source_color)
        if not G.has_node(target_node):
            G.add_node(target_node, label=target_chat, color=target_color)
        
        # Add an edge from source to target with message details as edge attributes
        G.add_edge(
            source_node,
            target_node,
            message_id=message_id,
            text=message_text,
            date=message_date
        )

    # Step 6: Create a PyVis Network
    net = Network(height='750px', width='100%', notebook=True, directed=True)

    # Customize the physics layout (optional for better visualization)
    net.force_atlas_2based()

    # Add nodes with labels and colors
    for node, data in G.nodes(data=True):
        label = data.get('label', 'unknown name')
        color = data.get('color', default_node_color)
        net.add_node(
            node,
            label=label,
            title=label,  # Tooltip on hover
            color=color,  # Node color based on chat
            size=15
        )

    # Add edges with tooltips
    for source, target, data in G.edges(data=True):
        message_id = data.get('message_id', '')
        text = data.get('text', '')
        date = data.get('date', '')
        tooltip = f"<b>Message ID:</b> {message_id}<br><b>Date:</b> {date}<br><b>Text:</b> {text}"
        
        net.add_edge(
            source,
            target,
            title=tooltip  # Tooltip on hover
        )

    # Optionally, enable additional features like showing physics controls
    net.show_buttons(filter_=['physics'])

    # Generate and save the interactive graph to an HTML file
    net.show(output_file)
    print(f"Interactive graph saved to {output_file}")

# Example usage
visualize_forwarded_messages(df_sample, output_file='../../visuals/forwarded_messages_graph.html')


../../visuals/forwarded_messages_graph.html
Interactive graph saved to ../../visuals/forwarded_messages_graph.html


In [None]:
import pandas as pd
import networkx as nx
from pyvis.network import Network
from datetime import datetime

def visualize_with_time_slider(df, output_file='temporal_graph.html', time_unit='month'):
    """
    Visualize forwarded messages with a temporal slider.
    
    Parameters:
    - df (pd.DataFrame): DataFrame with columns 'peer_id', 'fwd_from', 'id', 'messageText', 'messageDate', 'chat'
    - output_file (str): Output HTML file for the interactive graph.
    - time_unit (str): Time unit for slider filtering ('month' or 'week').
    """
    
    # Ensure messageDate is a datetime type
    df['messageDate'] = pd.to_datetime(df['messageDate'])
    
    # Define time periods based on time_unit
    if time_unit == 'month':
        df['time_period'] = df['messageDate'].dt.to_period('M')
    elif time_unit == 'week':
        df['time_period'] = df['messageDate'].dt.to_period('W')
    
    # Get unique time periods for the slider
    time_periods = sorted(df['time_period'].unique())
    
    # Create peer_id to chat mapping
    peer_id_to_chat = df[['peer_id', 'chat']].drop_duplicates().set_index('peer_id')['chat'].to_dict()
    
    # Initialize directed MultiDiGraph
    G = nx.MultiDiGraph()
    
    # Populate graph with nodes and edges
    for index, row in df.iterrows():
        source_peer_id = row['fwd_from']
        target_peer_id = row['peer_id']
        message_id = row['id']
        message_text = row['messageText'] if pd.notna(row['messageText']) else ''
        message_date = row['messageDate']
        time_period = row['time_period']
        
        # Get chat names
        source_chat = peer_id_to_chat.get(source_peer_id, "unknown name")
        target_chat = peer_id_to_chat.get(target_peer_id, "unknown name")
        
        # Convert peer_id to string for unique node identification
        source_node = str(source_peer_id)
        target_node = str(target_peer_id)
        
        # Add nodes
        if not G.has_node(source_node):
            G.add_node(source_node, label=source_chat, title=source_chat)
        if not G.has_node(target_node):
            G.add_node(target_node, label=target_chat, title=target_chat)
        
        # Add edge with date and time period
        G.add_edge(
            source_node,
            target_node,
            message_id=message_id,
            text=message_text,
            date=message_date,
            time_period=str(time_period)  # Store time_period as string for JavaScript filtering
        )

    # Create PyVis Network
    net = Network(height='750px', width='100%', notebook=True, directed=True)
    net.from_nx(G)

    # Generate JavaScript for slider and filtering
    time_periods_str = [str(tp) for tp in time_periods]  # Convert to strings for JS compatibility
    slider_js = f"""
        <script>
            let slider = document.createElement("input");
            slider.type = "range";
            slider.min = 0;
            slider.max = {len(time_periods_str) - 1};
            slider.value = 0;
            slider.id = "timeSlider";
            slider.style.width = "100%";

            let timeLabel = document.createElement("div");
            timeLabel.style.textAlign = "center";
            timeLabel.innerHTML = "Showing period: {time_periods_str[0]}";
            slider.oninput = function() {{
                let selectedPeriod = "{time_periods_str}"[this.value];
                timeLabel.innerHTML = "Showing period: " + selectedPeriod;
                
                network.body.data.edges.update(
                    network.body.data.edges.get().map(edge => {{
                        let isVisible = edge.time_period === selectedPeriod;
                        return {{ id: edge.id, hidden: !isVisible }};
                    }})
                );

                network.body.data.nodes.update(
                    network.body.data.nodes.get().map(node => {{
                        let isNodeVisible = network.body.data.edges.get().some(edge => 
                            (edge.from === node.id || edge.to === node.id) && !edge.hidden
                        );
                        return {{ id: node.id, hidden: !isNodeVisible }};
                    }})
                );
            }};
            
            document.getElementsByTagName("body")[0].appendChild(timeLabel);
            document.getElementsByTagName("body")[0].appendChild(slider);
        </script>
    """

    # Customize the graph's physics layout
    net.force_atlas_2based()

    # Add JavaScript to HTML
    html = net.generate_html()
    html = html.replace('</body>', slider_js + '</body>')  # Inject the slider JavaScript before the closing body tag

    # Save the interactive graph with slider
    with open(output_file, 'w') as f:
        f.write(html)
    print(f"Interactive graph with temporal slider saved to {output_file}")

# Example usage
visualize_with_time_slider(df, output_file='data/temporal_graph.html', time_unit='month')


In [None]:
import pandas as pd
import networkx as nx
from pyvis.network import Network
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors

def visualize_forwarded_messages_with_slider(df, output_file='interactive_graph.html', time_unit='month'):
    """
    Visualize forwarded messages in a DataFrame using PyVis with a temporal slider at the top.
    
    Parameters:
    - df (pd.DataFrame): DataFrame containing messages with 'peer_id', 'fwd_from', 'id', 'messageText', 'messageDate', 'forwards', and 'chat' columns.
    - output_file (str): File name for the output HTML file with the interactive graph.
    - time_unit (str): Time unit for slider filtering ('month' or 'week').
    """
    
    # Ensure messageDate is in datetime format
    df['messageDate'] = pd.to_datetime(df['messageDate'])
    
    # Step 1: Create the peer_id_to_chat dictionary
    peer_id_to_chat = df[['peer_id', 'chat']].drop_duplicates().set_index('peer_id')['chat'].to_dict()

    # Step 2: Assign unique colors to each unique chat
    unique_chats = set(peer_id_to_chat.values())

    # Choose a colormap
    cmap = plt.get_cmap('tab20')
    num_colors = len(unique_chats)

    # If number of unique chats exceeds the colormap range, switch to 'hsv'
    if num_colors > cmap.N:
        cmap = plt.get_cmap('hsv')
        colors = [mcolors.rgb2hex(cmap(i / num_colors)) for i in range(num_colors)]
    else:
        colors = [mcolors.rgb2hex(cmap(i)) for i in range(num_colors)]

    # Create a dictionary mapping chat names to colors
    chat_to_color = {chat: colors[i] for i, chat in enumerate(unique_chats)}

    # Assign a default color for nodes with unknown names
    default_node_color = '#C0C0C0'  # Silver

    # Step 3: Filter to include only forwarded messages
    forwarded_messages = df[df['fwd_from'].notna()]

    # Define time periods with year included
    if time_unit == 'month':
        forwarded_messages['time_period'] = forwarded_messages['messageDate'].dt.to_period('M')  # Monthly periods (e.g., '2024-01')
    elif time_unit == 'week':
        forwarded_messages['time_period'] = forwarded_messages['messageDate'].dt.to_period('W')  # Weekly periods (e.g., '2024-W01')

    # Get unique time periods for the slider
    time_periods = sorted(forwarded_messages['time_period'].unique())

    # Convert time periods to strings for JavaScript
    time_periods_str = [str(tp) for tp in time_periods]

    # Step 4: Initialize a directed MultiDiGraph
    G = nx.MultiDiGraph()

    # Step 5: Iterate over each forwarded message to add edges to the graph
    for index, row in forwarded_messages.iterrows():
        source_peer_id = row['fwd_from']
        target_peer_id = row['peer_id']
        message_id = row['id']
        message_text = row['messageText'] if pd.notna(row['messageText']) else ''
        message_date = row['messageDate'].strftime('%Y-%m-%d')  # Convert to string
        time_period = str(row['time_period'])
        
        # Get chat names or use 'unknown name'
        source_chat = peer_id_to_chat.get(source_peer_id, "unknown name")
        target_chat = peer_id_to_chat.get(target_peer_id, "unknown name")
        
        # Use peer_id as unique identifier (converted to string)
        source_node = str(source_peer_id)
        target_node = str(target_peer_id)
        
        # Determine node colors based on chat names
        source_color = chat_to_color.get(source_chat, default_node_color)
        target_color = chat_to_color.get(target_chat, default_node_color)
        
        # Add nodes with label as chat names and assigned colors
        if not G.has_node(source_node):
            G.add_node(source_node, label=source_chat, color=source_color)
        if not G.has_node(target_node):
            G.add_node(target_node, label=target_chat, color=target_color)
        
        # Add an edge from source to target with message details as edge attributes
        G.add_edge(
            source_node,
            target_node,
            message_id=message_id,
            text=message_text,
            date=message_date,
            time_period=time_period  # Add time period for filtering
        )

    # Step 6: Create a PyVis Network
    net = Network(height='750px', width='100%', notebook=True, directed=True)

    # Customize the physics layout (optional for better visualization)
    net.force_atlas_2based()

    # Add nodes with labels and colors
    for node, data in G.nodes(data=True):
        label = data.get('label', 'unknown name')
        color = data.get('color', default_node_color)
        net.add_node(
            node,
            label=label,
            title=label,  # Tooltip on hover
            color=color,  # Node color based on chat
            size=15
        )

    # Add edges with tooltips and time period
    for source, target, data in G.edges(data=True):
        message_id = data.get('message_id', '')
        text = data.get('text', '')
        date = data.get('date', '')
        time_period = data.get('time_period', '')
        tooltip = f"<b>Message ID:</b> {message_id}<br><b>Date:</b> {date}<br><b>Text:</b> {text}"
        
        net.add_edge(
            source,
            target,
            title=tooltip,  # Tooltip on hover
            time_period=time_period  # Add time period as edge attribute for filtering
        )

    # Optionally, enable additional features like showing physics controls
    net.show_buttons(filter_=['physics'])

    # Generate JavaScript for multi-year slider and filtering, inserting slider at the top
    time_periods_js_array = "[" + ", ".join([f"'{tp}'" for tp in time_periods_str]) + "]"
    slider_js = f"""
        <script type="text/javascript">
            let timePeriods = {time_periods_js_array};  // JavaScript array of time periods
            let sliderContainer = document.createElement("div");
            sliderContainer.style.margin = "10px";
            sliderContainer.style.textAlign = "center";

            let timeLabel = document.createElement("div");
            timeLabel.style.display = "inline-block";
            timeLabel.style.marginRight = "10px";
            timeLabel.innerHTML = "Showing period: " + timePeriods[0];

            let slider = document.createElement("input");
            slider.type = "range";
            slider.min = 0;
            slider.max = timePeriods.length - 1;
            slider.value = 0;
            slider.id = "timeSlider";
            slider.style.width = "70%";
            slider.style.verticalAlign = "middle";

            slider.oninput = function() {{
                let selectedPeriod = timePeriods[this.value];
                timeLabel.innerHTML = "Showing period: " + selectedPeriod;
                
                network.body.data.edges.update(
                    network.body.data.edges.get().map(edge => {{
                        let isVisible = edge.time_period === selectedPeriod;
                        return {{ id: edge.id, hidden: !isVisible }};
                    }})
                );

                network.body.data.nodes.update(
                    network.body.data.nodes.get().map(node => {{
                        let isNodeVisible = network.body.data.edges.get().some(edge => 
                            (edge.from === node.id || edge.to === node.id) && !edge.hidden
                        );
                        return {{ id: node.id, hidden: !isNodeVisible }};
                    }})
                );
            }};

            sliderContainer.appendChild(timeLabel);
            sliderContainer.appendChild(slider);

            // Insert the slider container before the network container
            let networkContainer = document.getElementById("mynetwork");
            networkContainer.parentNode.insertBefore(sliderContainer, networkContainer);
        </script>
    """

    # Generate and save the interactive graph to an HTML file
    net.save_graph('temp_graph.html')  # Save initial HTML to a temp file

    # Read the saved HTML and inject the slider JavaScript
    with open('temp_graph.html', 'r') as f:
        html = f.read()
    
    # Insert the slider JavaScript before the closing body tag
    html = html.replace('</body>', slider_js + '</body>')

    # Save the final HTML with the slider
    with open(output_file, 'w') as f:
        f.write(html)

    print(f"Interactive graph saved to {output_file}")

# Example usage
visualize_forwarded_messages_with_slider(df, output_file='interactive_graph_with_slider.html', time_unit='month')
