In [1]:
import os
import pandas as pd
from sqlalchemy import create_engine, inspect
from dotenv import load_dotenv
import warnings

# --- Setup ---
warnings.filterwarnings('ignore', category=UserWarning, module='pandas')
pd.set_option('display.max_columns', None)

# --- Configuration ---
# You can change this to any of the four tables you processed
TABLE_TO_CHECK = "russian_channels_messages"
EMBEDDING_COLUMN_TO_EXCLUDE = "multilingual_e5_large_instruct"

# --- Database Connection ---
# Load environment variables. Assumes your .env file is in the root of the 'apollolytics-network' directory.
load_dotenv(dotenv_path='../../../.env')

POSTGRES_HOST = os.getenv("POSTGRES_HOST")
POSTGRES_PORT = os.getenv("POSTGRES_PORT")
POSTGRES_DB = os.getenv("POSTGRES_DB")
POSTGRES_USER = os.getenv("POSTGRES_USER")
POSTGRES_PASSWORD = os.getenv("POSTGRES_PASSWORD")

db_url = f"postgresql://{POSTGRES_USER}:{POSTGRES_PASSWORD}@{POSTGRES_HOST}:{POSTGRES_PORT}/{POSTGRES_DB}"
engine = create_engine(db_url)

# --- Fetch and Display Data ---
try:
    with engine.connect() as connection:
        print(f"Successfully connected to the database '{POSTGRES_DB}'.")
        
        # Create an inspector object from the engine
        inspector = inspect(engine)
        
        # Get column information for the table
        all_columns = inspector.get_columns(TABLE_TO_CHECK)
        column_names = [col['name'] for col in all_columns]
        
        # Filter out the embedding column
        columns_to_select = [f'"{col}"' for col in column_names if col != EMBEDDING_COLUMN_TO_EXCLUDE]
        columns_sql = ', '.join(columns_to_select)
        
        # Construct the query to fetch the first 10 rows
        query = f'SELECT {columns_sql} FROM "{TABLE_TO_CHECK}" LIMIT 10'
        print(f"\nExecuting query:\n{query}")
        
        # Fetch the data into a pandas DataFrame
        df = pd.read_sql(query, connection)
        
        print("\nSuccessfully fetched data. Displaying results:")
        display(df)

except Exception as e:
    print(f"An error occurred: {e}")

Successfully connected to the database 'telegram_scraper'.

Executing query:
SELECT "chat_id", "id", "chat_name", "peer_id", "messagedatetime", "messagedate", "messagetext", "out", "mentioned", "media_unread", "silent", "post", "from_scheduled", "legacy", "edit_hide", "pinned", "noforwards", "invert_media", "offline", "from_id", "from_boosts_applied", "saved_peer_id", "fwd_from", "fwd_from_type", "via_bot_id", "via_business_bot_id", "reply_to", "reply_markup", "entities", "edit_date", "post_author", "grouped_id", "ttl_period", "quick_reply_shortcut_id", "effect", "factcheck", "views", "forwards", "replies", "reactions", "embedding", "denazificationofukraine_similarity", "protectionofrussianspeakers_similarity", "natoexpansionthreat_similarity", "biolabsconspiracy_similarity", "historicalunity_similarity", "westernrussophobia_similarity", "sanctionsaseconomicwarfare_similarity", "legitimizingannexedterritories_similarity", "discreditingukraineleadership_similarity" FROM "russian_channel

Unnamed: 0,chat_id,id,chat_name,peer_id,messagedatetime,messagedate,messagetext,out,mentioned,media_unread,silent,post,from_scheduled,legacy,edit_hide,pinned,noforwards,invert_media,offline,from_id,from_boosts_applied,saved_peer_id,fwd_from,fwd_from_type,via_bot_id,via_business_bot_id,reply_to,reply_markup,entities,edit_date,post_author,grouped_id,ttl_period,quick_reply_shortcut_id,effect,factcheck,views,forwards,replies,reactions,embedding,denazificationofukraine_similarity,protectionofrussianspeakers_similarity,natoexpansionthreat_similarity,biolabsconspiracy_similarity,historicalunity_similarity,westernrussophobia_similarity,sanctionsaseconomicwarfare_similarity,legitimizingannexedterritories_similarity,discreditingukraineleadership_similarity
0,1197865170,97284,https://t.me/ssigny,1197865170,2024-05-14 10:19:02,2024-05-14,"👣👣👣 «Возникает желание просто увидеть близких,...",False,False,False,False,True,False,False,True,False,False,False,False,,,,1538600000.0,channel,,,,,[<telethon.tl.types.MessageEntityBold object a...,2024-05-14 10:20:38,,,,,,,32374,1,0,"{'👍': 93, '🤡': 5, '🤣': 1}",,0.879423,0.845218,0.853852,0.844676,0.80767,0.823458,0.814146,0.858401,0.81294
1,1260622817,97284,https://t.me/readovkanews,1260622817,2025-06-06 13:24:41,2025-06-06,"Наши БПЛА являются самыми передовыми в мире, п...",False,False,False,False,True,False,False,True,False,False,False,False,,,,,,,,,,[<telethon.tl.types.MessageEntityBold object a...,2025-06-06 13:24:57,,,,,,,859910,977,297,"{'❤': 306, '👍': 5597, '👎': 162, '👏': 57, '🔥': ...",,0.879423,0.845218,0.853852,0.844676,0.80767,0.823458,0.814146,0.858401,0.81294
2,1036240821,126686,https://t.me/meduzalive,1036240821,2025-05-22 12:54:18,2025-05-22,В КНДР новый эсминец опрокинулся при спуске на...,False,False,False,True,True,False,False,False,False,False,False,False,,,,,,,,,,[<telethon.tl.types.MessageEntityBold object a...,NaT,,,,,,,201623,190,0,,,0.768197,0.766778,0.801701,0.792772,0.769586,0.784271,0.804475,0.797305,0.788694
3,1036362176,233272,https://t.me/rt_russian,1036362176,2025-03-12 16:42:38,2025-03-12,❗️Великобритания отзывает аккредитацию российс...,False,False,False,True,True,False,False,False,False,False,False,False,,,,,,,,,,[<telethon.tl.types.MessageEntityBold object a...,2025-03-12 16:49:17,,,,,,,194638,103,0,"{'❤': 11, '👍': 115, '👎': 231, '😁': 474, '😱': 2...",,0.798714,0.789468,0.818247,0.814519,0.781401,0.818301,0.823363,0.808185,0.791718
4,1394050290,177338,https://t.me/bbbreaking,1394050290,2024-03-14 09:30:06,2024-03-14,❗️Международное энергетическое агентство (МЭА)...,False,False,False,False,True,False,False,False,False,False,False,False,,,,,,,,,,,NaT,,,,,,,455932,107,0,,,0.837228,0.817458,0.828895,0.832179,0.811144,0.850527,0.82169,0.822128,0.82858
5,1050820672,177338,https://t.me/tass_agency,1050820672,2023-01-19 18:00:10,2023-01-19,Жозеп Боррель счел неприемлемыми высказывания ...,False,False,False,False,True,False,False,True,False,False,False,False,,,,,,,,177086.0,,[<telethon.tl.types.MessageEntityBold object a...,2023-01-19 18:00:52,,,,,,,48668,48,0,,,0.837228,0.817458,0.828895,0.832179,0.811144,0.850527,0.82169,0.822128,0.82858
6,1197865170,4603,https://t.me/ssigny,1197865170,2022-01-23 20:51:14,2022-01-23,Сегодня в Бельгии прошли масштабные протесты\n...,False,False,False,False,True,False,False,True,False,False,False,False,,,,,,,,,,[<telethon.tl.types.MessageEntityBold object a...,2022-01-23 20:56:31,,1.314377e+16,,,,,8130,11,0,"{'❤': 1, '👍': 8, '👎': 1, '🔥': 23}",,0.814091,0.808188,0.828004,0.821726,0.803705,0.815708,0.825126,0.835967,0.820333
7,1204082623,4603,https://t.me/proofzzz,1204082623,2022-09-15 17:15:07,2022-09-15,Украинский музыкант Олег Скрипка ( Вопли Видо...,False,False,False,False,True,False,False,True,False,False,False,False,,,,,,,,,,[<telethon.tl.types.MessageEntityTextUrl objec...,2022-10-23 15:01:53,,,,,,,35052,100,0,"{'❤': 4, '👍': 1522, '👏': 5, '💩': 142, '🔥': 16,...",,0.814091,0.808188,0.828004,0.821726,0.803705,0.815708,0.825126,0.835967,0.820333
8,1513431778,4603,https://t.me/dva_majors,1513431778,2022-10-28 14:38:44,2022-10-28,"Канал Выпускайте Кракена сообщает, что глава М...",False,False,False,False,True,False,False,True,False,False,False,False,,,,,,,,,,[<telethon.tl.types.MessageEntityTextUrl objec...,2024-01-22 13:15:16,,,,,,,78596,294,0,"{'👍': 158, '👎': 29, '🔥': 163, '😁': 60, '😢': 8,...",,0.814091,0.808188,0.828004,0.821726,0.803705,0.815708,0.825126,0.835967,0.820333
9,2022231015,4603,https://t.me/mediyca,2022231015,2025-02-04 18:54:49,2025-02-04,Россияне выбирают играть в The Sims вместо тог...,False,False,False,False,True,False,False,False,False,False,False,False,,,,,,,,,,[<telethon.tl.types.MessageEntityBold object a...,2025-02-04 18:54:59,,,,,,,196127,580,0,"{'❤': 64, '🌭': 10, '👍': 31, '👎': 8, '😁': 304, ...",,0.814091,0.808188,0.828004,0.821726,0.803705,0.815708,0.825126,0.835967,0.820333


In [None]:
df