In [3]:
import logging
from telethon import TelegramClient
import csv
import os
import json
from dotenv import load_dotenv

In [4]:
# Set up logging
logging.basicConfig(
    filename='scraping.log',
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)

In [5]:
# Load environment variables for Telegram API credentials
load_dotenv('.env')
api_id = os.getenv('TG_API_ID')
api_hash = os.getenv('TG_API_HASH')
phone = os.getenv('TG_PHONE_NUMBER')

In [6]:
# Function to read channels from a JSON file
def load_channels_from_json(file_path):
    try:
        with open(file_path, 'r') as f:
            data = json.load(f)
            return data.get('channels', [])
    except Exception as e:
        logging.error(f"Error reading channels from JSON: {e}")
        return []

In [7]:
# Function to scrape data from a single channel
async def scrape_channel(client, channel_username, writer, media_dir, num_messages):
    try:
        entity = await client.get_entity(channel_username)
        channel_title = entity.title
        
        message_count = 0
        async for message in client.iter_messages(entity):
            if message_count >= num_messages:
                break  # Stop after scraping the specified number of messages
            
            media_path = None
            if message.media:
                # Download media if present and save it
                filename = f"{channel_username}_{message.id}.jpg"  # Change extension based on media type
                media_path = os.path.join(media_dir, filename)
                await client.download_media(message.media, media_path)
                logging.info(f"Downloaded media for message ID {message.id}.")
            
            # Write message details to CSV
            writer.writerow([channel_title, channel_username, message.id, message.message, message.date, media_path])
            logging.info(f"Processed message ID {message.id} from {channel_username}.")
            
            message_count += 1

        if message_count == 0:
            logging.info(f"No messages found for {channel_username}.")

    except Exception as e:
        logging.error(f"Error while scraping {channel_username}: {e}")

# Initialize the client once with a session file
client = TelegramClient('scraping_session', api_id, api_hash)

# Main function to run the scraper
async def main():
    try:
        await client.start(phone)
        logging.info("Client started successfully.")
        
        media_dir = 'photos'
        os.makedirs(media_dir, exist_ok=True)

        # Load channels from JSON file
        channels = load_channels_from_json('channels.json')
        
        num_messages_to_scrape = 20  # Define how many messages to scrape from each channel

        for channel in channels:
            csv_filename = f"{channel[1:]}_data.csv"  # Create CSV file for each channel (removing '@' from name)
            with open(csv_filename, 'a', newline='', encoding='utf-8') as file:
                writer = csv.writer(file)
                writer.writerow(['Channel Title', 'Channel Username', 'Message ID', 'Message', 'Date', 'Media Path'])
                
                await scrape_channel(client, channel, writer, media_dir, num_messages_to_scrape)
                logging.info(f"Scraped data from {channel}.")

    except Exception as e:
        logging.error(f"Error in main function: {e}")

if __name__ == "__main__":
    import asyncio
    asyncio.run(main())

RuntimeError: asyncio.run() cannot be called from a running event loop

In [None]:
async def scrape_channel_messages(channel_username):
    channel = await client.get_entity(channel_username)
    async for message in client.iter_messages(channel, limit=100):
        print(f"Message: {message.text}, Date: {message.date}")
