In [None]:
from telethon import TelegramClient
import telethon
from telethon import functions, utils
from telethon.tl.types import Message, Channel
from telethon.tl.types.messages import DialogFilters
from telethon.tl.custom.file import File
from tqdm import tqdm
from dotenv import load_dotenv
from typing import List, Dict, Any
import os
import asyncio
import json
from datetime import date, datetime
from telethon.errors import FloodWaitError
from sqlalchemy.engine.base import Engine
from sqlmodel import create_engine
from models.telegram import MediaItem, Channel


load_dotenv()
api_id = os.environ['TG_API_ID']
api_hash = os.environ['TG_API_HASH']
phone = os.environ['TG_PHONE']
session_file = os.path.join(os.getcwd() , "../user.session")

# Paths
MEDIA_PATH = '../static/media/'
DB_PATH = '../teledeck.db'
engine = create_engine(f"sqlite:///{DB_PATH}")

# Ensure media directory exists
os.makedirs(MEDIA_PATH, exist_ok=True)

class DownloadProgressBar(tqdm):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs, leave=False)
    def update_to(self, current, total):
        self.total = total
        self.update(current - self.n)


"""
async def download_media(client, message: Message) -> str:
    with DownloadProgressBar(unit='B', unit_scale=True) as pb:
        return await client.download_media(message, MEDIA_PATH, progress_callback=pb.update_to)
"""
async def download_media(client, message: Message) -> str:
    return await client.download_media(message, MEDIA_PATH)


# Flood prevention
MAX_CONCURRENT_TASKS = 5
semaphore = asyncio.Semaphore(MAX_CONCURRENT_TASKS)


async def delay():
    await asyncio.sleep(.1)

async def exponential_backoff(attempt):
    wait_time = 2 ** attempt
    print(f"Rate limit hit. Waiting for {wait_time} seconds before retrying.")
    await asyncio.sleep(wait_time)

async def process_with_semaphore(callback: asyncio.Task, task_id: int) -> None:
    async with semaphore:
        max_attempts = 5
        for attempt in range(max_attempts):
            try:
                await delay()
                await callback
                break
            except FloodWaitError as e:
                if attempt < max_attempts - 1:
                    await exponential_backoff(attempt)
                else:
                    print(f"Max attempts reached. Skipping: {task_id}")
                    return



## Experiments and scratch

In [None]:
# Init client
client = TelegramClient(session_file, api_id, api_hash)
await client.connect()

In [None]:
# Demo from documentation

async def main():
    # Getting information about yourself
    me = await client.get_me()

    # "me" is a user object. You can pretty-print
    # any Telegram object with the "stringify" method:
    print(me.stringify())

    # When you print something, you see a representation of it.
    # You can access all attributes of Telegram objects with
    # the dot operator. For example, to get the username:
    username = me.username
    print(username)
    print(me.phone)

    # You can print all the dialogs/conversations that you are part of:
    # async for dialog in client.iter_dialogs():
    #     print(dialog.name, 'has ID', dialog.id)

    # You can send messages to yourself...
    await client.send_message('me', f'Hello, {username}!')
    # ...to some chat ID
    # await client.send_message(-100123456, 'Hello, group!')
    # ...to your contacts
    # await client.send_message('+34600123123', 'Hello, friend!')
    # ...or even to any username
    # await client.send_message('username', 'Testing Telethon!')

    # You can, of course, use markdown in your messages:
    message = await client.send_message(
        'me',
        'This message has **bold**, `code`, __italics__ and '
        'a [nice website](https://example.com)!',
        link_preview=False
    )

    # Sending a message returns the sent message object, which you can use
    print(message.raw_text)

    # You can reply to messages directly if you have a message object
    await message.reply('Cool!')

    # Or send files, songs, documents, albums...
    # await client.send_file('me', '/home/me/Pictures/holidays.jpg')

    # You can print the message history of any chat:
    async for message in client.iter_messages('me', limit=10):
        print(message.id, message.text)

        # You can download media from messages, too!
        # The method will return the path where the file was saved.
        """
        if message.photo:
            path = await message.download_media()
            print('File saved to', path)  # printed after download is done
        """

async with client:
    await main()

In [None]:
# Browse all chats

dialogs = await client.get_dialogs()

dialog_by_id = {}
for d in dialogs:
    real_id, peer_type = utils.resolve_id(d.id)
    print(real_id, peer_type)
    dialog_by_id[real_id] = d

# client.get_channels(dialog_by_id.keys())

In [None]:
# Find recent messages in a chat
filtered = filter(lambda dialog: dialog.name.find('Abrahams') > -1, dialogs)
f: telethon.tl.custom.dialog.Dialog = next(filtered)
last_messages = await client.get_messages(f, limit=20)

[print(m) for m in last_messages]

In [None]:
# Delete most recent message
# await client.delete_messages(f, last_messages[0], revoke=True)

In [None]:
# Find folders
chatFolders: DialogFilters = await client(functions.messages.GetDialogFiltersRequest())
# print(chatFolders.stringify())

In [None]:
# Find chats in media folder
mediaFolder = None
for folder in chatFolders.filters:
    if not hasattr(folder, 'title'):
        continue
    if folder.title == 'MediaView':
        mediaFolder = folder
        break

# print(mediaFolder.stringify())


# target_channels = [dialog_by_id[peer.channel_id] for peer in mediaFolder.include_peers]
# target_channels = [client.get_entity(peer) for peer in mediaFolder.include_peers]
# await client.get_entity(peer)

target_channels = await asyncio.gather(*[client.get_entity(peer) for peer in mediaFolder.include_peers])
for channel in target_channels:
    print(channel.stringify())

In [None]:
# Test messages in the first two channels
"""
for channel_id in target_channels[0:2]:
    async for message in client.iter_messages(channel_id, limit=5):
        print(message.stringify())
"""

# Grab most recent post @ self to test twitter embed
test_message = None
async for message in client.iter_messages('me', limit=1):
    test_message = message

print(test_message.stringify())

# test_message.media = MessageMediaWebPage
print(test_message.document)

In [None]:
# Grab posts in channel

async def collect_posts(client, target_channels, limit=5):
    post_collection = []
    for channel in target_channels:
        async for message in client.iter_messages(channel, limit=limit):
            post_collection.append(message)
    return post_collection
posts = await collect_posts(client, target_channels, limit=5)

# Examine individual messages

In [None]:
client = TelegramClient(session_file, api_id, api_hash)
print(api_id, api_hash)
await client.connect()
print(client.is_connected())
is_authorized = await client.is_user_authorized()
print(is_authorized)
me = await client.get_me()
print(me.stringify())
engine = create_engine(f"sqlite:///{DB_PATH}")


In [None]:
channel_title = 'GIFs and Vids'

async def find_channel() -> telethon.types.Dialog | None:
    async for dialog in client.iter_dialogs():
        d: telethon.types.Dialog = dialog
        if d.title.find(channel_title) > -1:
            print(d)
            return d
    print("Channel not found.")
    return None

target_channel = await find_channel()


In [None]:
forwardMessage = None
async for message in client.iter_messages(target_channel, 500):
    if message.forward:
        forwardMessage = message
        print(message.stringify())
        break

In [None]:

print(await forwardMessage.forward.get_input_chat())

print(forwardMessage.peer_id.channel_id)
print(forwardMessage.forward.original_fwd.from_id)

In [None]:

messages = await client.get_messages(target_channel, 2):

In [None]:


message = messages[0]
print(message.stringify())

"""
if message.file:
    file: File = message.file
    file_id = file.media.id
    if file.size > 50_000_000:
        print(f"Skipping large file: {file_id}")
        return
    if file.sticker_set is not None:
        print(f"Skipping sticker: {file_id}")
        return

    # Check if file_id already exists in database
    with Session(engine) as session:
        statement = select(MediaItem).where(MediaItem.file_id == file_id)
        existing = session.exec(statement).first()
        if existing:
            print(f"Skipping existing file: {file_id}")
            if existing.channel_id is None:
                existing.channel_id = channel.id
                existing.message_id = message.id
                existing.file_id = file_id
                session.commit()

            print(f"Skipping download for existing file_id: {file_id}")
            return

    file_path = await download_media(client, message)
    if file_path:
        file_name = os.path.basename(file_path)

        if message.video or file_name.lower().endswith(".mp4"):
            media_type = "video"
        elif message.gif:
            media_type = "gif"
        elif message.photo:
            media_type = "photo"
        elif message.document:
            mime_type = message.document.mime_type
            media_type = (
                mime_type.split("/")[-1] if "image/" in mime_type else "document"
            )
        else:
            media_type = "unknown"

        with Session(engine) as session:
            session.add(MediaItem(
                file_id=file_id,
                channel_id=channel.id,
                message_id=message.id,
                date=message.date,
                text=message.text,
                type = media_type,
                file_name=file_name,
                file_size = file.size,
                url= f"/media/{file_name}",
            ))
            session.commit()
        print(f"Processed {media_type}: {file_id}")
elif message.web_preview:
    print(f"Skipping web preview: {message.id}")
else:
    print(f"No media found: {message.id}")

"""


In [None]:

message
print(message.media.webpage.has_large_media) # true
web_document = message.media.webpage.document
print(web_document.mime_type == 'video/mp4') # true
dir(web_document)
# dir(message)
print(message.file.mime_type)
web_document.id
# print(message.file)
# print(dir(message.file))
print(message.file.media.id)
#print(message.video.stringify()) # 5824722488059233779

In [None]:

dl = await client.download_media(web_document, MEDIA_PATH)

In [None]:
dl

# Search

In [None]:

client = TelegramClient(username, api_id, api_hash)
await client.connect()

In [None]:
twitter_users = json.load(open('./data/twitter_users.json'))
twitter_users

In [None]:
from telethon.tl.types import InputMessagesFilterEmpty, InputPeerEmpty, PeerChannel
from telethon.tl.types.messages import MessagesSlice


def makeSearchRequest(query: str, limit: int = 50):
    return telethon.functions.messages.SearchGlobalRequest(
        q=query,
        filter=InputMessagesFilterEmpty(),
        min_date=-1,
        max_date=-1,
        offset_rate=0,
        offset_peer=InputPeerEmpty(),
        offset_id=-1,
        limit=limit,
    )

channel_ids = set()


In [None]:
print(results.stringify())

In [None]:

for twitter_user in tqdm(twitter_users):
    searchRequest = makeSearchRequest(twitter_user, 50)
    results: MessagesSlice = await client(searchRequest)
    for message in results.messages:
        if isinstance(message.peer_id, PeerChannel):
            channel_ids.add(message.peer_id.channel_id)


In [None]:
channel_ids = sorted(list(channel_ids))
# with open('./data/searched_channels.json', 'w') as f:
    # json.dump(list(channel_ids), f, indent=2)

In [None]:

async def id_to_channel(client, channel_id):
    channel = await client.get_entity(channel_id)
    return channel


channels: List[Channel] = await asyncio.gather(*[id_to_channel(client, channel_id) for channel_id in channel_ids])


In [None]:
print(sorted([channel.username for channel in channels if channel.username is not None]))