In [None]:
# This is a template for the notebook - save as .ipynb file

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Telegram Scraping Demo\n",
    "\n",
    "This notebook demonstrates the Telegram scraping pipeline."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "sys.path.append('../')\n",
    "\n",
    "from src.scraper import TelegramScraper\n",
    "import asyncio\n",
    "\n",
    "# Initialize scraper\n",
    "scraper = TelegramScraper()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Run scraper for a single channel\n",
    "async def demo_scrape():\n",
    "    await scraper.connect()\n",
    "    \n",
    "    # Get channel info\n",
    "    channel_info = await scraper.get_channel_info('lobelia4cosmetics')\n",
    "    print(\"Channel Info:\")\n",
    "    print(json.dumps(channel_info, indent=2))\n",
    "    \n",
    "    # Scrape limited messages\n",
    "    messages = await scraper.scrape_channel_messages('lobelia4cosmetics', limit=10)\n",
    "    print(f\"\\nFirst 10 messages scraped: {len(messages)}\")\n",
    "    \n",
    "    await scraper.client.disconnect()\n",
    "\n",
    "# Run the demo\n",
    "await demo_scrape()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Data Structure\n",
    "\n",
    "Let's examine the structure of scraped data:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "from pathlib import Path\n",
    "import pandas as pd\n",
    "\n",
    "# Load a sample JSON file\n",
    "data_dir = Path('../data/raw/telegram_messages')\n",
    "json_files = list(data_dir.rglob('*.json'))\n",
    "\n",
    "if json_files:\n",
    "    with open(json_files[0], 'r', encoding='utf-8') as f:\n",
    "        sample_data = json.load(f)\n",
    "    \n",
    "    print(f\"File: {json_files[0]}\")\n",
    "    print(f\"Number of messages: {len(sample_data)}\")\n",
    "    print(\"\\nSample message:\")\n",
    "    print(json.dumps(sample_data[0], indent=2))\n",
    "    \n",
    "    # Convert to DataFrame for analysis\n",
    "    df = pd.DataFrame(sample_data)\n",
    "    print(f\"\\nDataFrame shape: {df.shape}\")\n",
    "    print(f\"\\nColumns: {df.columns.tolist()}\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "name": "python",
   "version": "3.9.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}