In [1]:
import os
from dotenv import load_dotenv
import psycopg2
import pandas as pd

# Load environment variables from .env file
load_dotenv("../../.env")

POSTGRES_HOST = os.getenv("POSTGRES_HOST", "localhost")
POSTGRES_PORT = os.getenv("POSTGRES_PORT", "5432")
POSTGRES_DB = os.getenv("POSTGRES_DB", "telegram_scraper")
POSTGRES_USER = os.getenv("POSTGRES_USER", "postgres")
POSTGRES_PASSWORD = os.getenv("POSTGRES_PASSWORD", "")

TABLES = [
    "russian_channels_messages",
    "russian_groups_messages",
    "ukrainian_channels_messages",
    "ukrainian_groups_messages"
]

conn = psycopg2.connect(
    host=POSTGRES_HOST,
    port=POSTGRES_PORT,
    dbname=POSTGRES_DB,
    user=POSTGRES_USER,
    password=POSTGRES_PASSWORD
)

# Create empty list to store dataframes
dfs = []

# Read each table and add source column
for table in TABLES:
    print(f"Reading table: {table}")
    temp_df = pd.read_sql(f"SELECT * FROM {table}", conn)
    temp_df['source_table'] = table
    dfs.append(temp_df)

# Merge all dataframes
df = pd.concat(dfs, axis=0, ignore_index=True)
print(f"\nMerged dataframe shape: {df.shape}")
display(df)

conn.close()

Reading table: russian_channels_messages


  temp_df = pd.read_sql(f"SELECT * FROM {table}", conn)


Reading table: russian_groups_messages
Reading table: ukrainian_channels_messages


  temp_df = pd.read_sql(f"SELECT * FROM {table}", conn)
  temp_df = pd.read_sql(f"SELECT * FROM {table}", conn)


Reading table: ukrainian_groups_messages


  temp_df = pd.read_sql(f"SELECT * FROM {table}", conn)
  df = pd.concat(dfs, axis=0, ignore_index=True)



Merged dataframe shape: (1922161, 42)


Unnamed: 0,chat_id,id,chat_name,peer_id,messagedatetime,messagedate,messagetext,out,mentioned,media_unread,...,ttl_period,quick_reply_shortcut_id,effect,factcheck,views,forwards,replies,reactions,embedding,source_table
0,1708761316,64259,https://t.me/novosti_efir,1708761316,2024-12-31 09:36:07,2024-12-31,Все что не попадает сюда грузим во второй кана...,False,False,False,...,,,,,1706961,5015,0,"{'❤': 655, 'null': 47, '👍': 5539, '👎': 256, '🔥...",,russian_channels_messages
1,1708761316,64260,https://t.me/novosti_efir,1708761316,2024-12-31 10:38:53,2024-12-31,Прогноз на 2025 год от Financial Times.\n\nВ п...,False,False,False,...,,,,,1573614,2019,0,"{'❤': 548, 'null': 9, '👍': 4172, '👎': 117, '🔥'...",,russian_channels_messages
2,1708761316,64262,https://t.me/novosti_efir,1708761316,2024-12-31 11:40:06,2024-12-31,«Новогодний снегопад»: Роскосмос показал косми...,False,False,False,...,,,,,1136629,1100,0,"{'❤': 314, '👍': 4393, '👎': 46, '🔥': 65, '😁': 1...",,russian_channels_messages
3,1708761316,64263,https://t.me/novosti_efir,1708761316,2024-12-31 11:55:11,2024-12-31,Депутат Госдумы Милонов проведет новогодние ка...,False,False,False,...,,,,,999249,4201,0,"{'❤': 132, '👍': 1399, '👎': 235, '🔥': 90, '😁': ...",,russian_channels_messages
4,1708761316,64264,https://t.me/novosti_efir,1708761316,2024-12-31 12:11:31,2024-12-31,"Все будет хорошо, страна будет идти вперед — П...",False,False,False,...,,,,,976982,3244,0,"{'❤': 1007, 'null': 35, '👍': 10142, '👎': 356, ...",,russian_channels_messages
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1922156,1450213869,70004,https://t.me/odessa_infonews,1450213869,2025-05-28 00:54:33,2025-05-28,"🆘 🙏Друзья, огромная просьба, если у вас premiu...",False,False,False,...,,,,,190315,0,1,"{'❤': 266, 'null': 6, '👍': 214, '👎': 11, '🔥': ...",,ukrainian_channels_messages
1922157,1450213869,70005,https://t.me/odessa_infonews,1450213869,2025-05-28 05:15:42,2025-05-28,🅿️За стоянку на Золотом Берегу требуют 50 грив...,False,False,False,...,,,,,141588,210,6,"{'⚡': 12, '❤': 25, '👍': 242, '👎': 71, '💊': 8, ...",,ukrainian_channels_messages
1922158,1450213869,70006,https://t.me/odessa_infonews,1450213869,2025-05-28 07:00:13,2025-05-28,👁 В Офтальмологічному центрі ОДОС мікрохірургі...,False,False,False,...,,,,,94941,207,0,"{'❤': 23, 'null': 2, '👍': 188, '🔥': 6, '😨': 2,...",,ukrainian_channels_messages
1922159,1450213869,70007,https://t.me/odessa_infonews,1450213869,2025-05-28 07:17:06,2025-05-28,Лучшее место для спарринга выбрали…🥊🥊\n\nПрисл...,False,False,False,...,,,,,88078,68,1,"{'❤': 12, 'null': 7, '👍': 64, '🔥': 8, '😁': 115...",,ukrainian_channels_messages


In [5]:
len(df.chat_name.unique())


48

In [6]:
df.columns

Index(['chat_id', 'id', 'chat_name', 'peer_id', 'messagedatetime',
       'messagedate', 'messagetext', 'out', 'mentioned', 'media_unread',
       'silent', 'post', 'from_scheduled', 'legacy', 'edit_hide', 'pinned',
       'noforwards', 'invert_media', 'offline', 'from_id',
       'from_boosts_applied', 'saved_peer_id', 'fwd_from', 'fwd_from_type',
       'via_bot_id', 'via_business_bot_id', 'reply_to', 'reply_markup',
       'entities', 'edit_date', 'post_author', 'grouped_id', 'ttl_period',
       'quick_reply_shortcut_id', 'effect', 'factcheck', 'views', 'forwards',
       'replies', 'reactions', 'embedding', 'source_table'],
      dtype='object')