In [1]:
import load_env

In [2]:
import datetime as dt
import logging
import pathlib
import re
from collections import namedtuple

import pandas as pd
from fsspec.implementations.local import LocalFileSystem

from scanner import Scanner

logging.getLogger("pyrogram").setLevel(level="ERROR")
logging.getLogger("urllib3").setLevel(level="ERROR")

In [3]:
fs = LocalFileSystem()
scanner = Scanner(
    [
        "79852227949", 
        "79934962253", 
        "79037895690",
        "79934957590",
    ], 
    fs)

In [4]:
LIMIT_HISTORY = dt.timedelta(days=30)  # насколько лезть вглубь чата

In [5]:
Msg = namedtuple("Message", "username link reach reactions")


async def collect_stats(channel) -> int:
    msgs = []

    async for msg in scanner.get_chat_history(
        channel, min_date=dt.datetime.now() - LIMIT_HISTORY
    ):
        reactions = (
            (
                sum(reaction.count for reaction in msg.reactions.reactions)
                if msg.reactions
                else 0
            )
            + (msg.forwards or 0)
            + await scanner.get_discussion_replies_count(channel, msg.id)
        )
        msgs.append(
            Msg(
                username=channel,
                link=msg.link,
                reach=msg.views or 0,
                reactions=reactions,
            )
        )

    return msgs


In [6]:
CHANNEL = "@bekirovdanil"

async with scanner.session():
    results = await collect_stats(CHANNEL)

df = pd.DataFrame(results)

In [7]:
df["popularity"] = df.reactions / df.reach * 100

In [8]:
top = df.sort_values("popularity", ascending=False).head(20)
for col in ["reach", "reactions"]:
    top[col] = pd.to_numeric(top[col], errors="ignore", downcast="integer")
top

Unnamed: 0,username,link,reach,reactions,popularity
50,@bekirovdanil,https://t.me/BekirovDanil/277,347,45,12.9683
6,@bekirovdanil,https://t.me/BekirovDanil/335,54,5,9.259259
35,@bekirovdanil,https://t.me/BekirovDanil/294,319,26,8.15047
44,@bekirovdanil,https://t.me/BekirovDanil/284,371,26,7.008086
38,@bekirovdanil,https://t.me/BekirovDanil/291,482,31,6.431535
17,@bekirovdanil,https://t.me/BekirovDanil/314,468,30,6.410256
39,@bekirovdanil,https://t.me/BekirovDanil/290,389,24,6.169666
31,@bekirovdanil,https://t.me/BekirovDanil/298,308,14,4.545455
40,@bekirovdanil,https://t.me/BekirovDanil/288,438,18,4.109589
13,@bekirovdanil,https://t.me/BekirovDanil/327,274,11,4.014599


In [9]:
html = top.to_html()
html = re.sub(r'<td>(https://.+)</td>', r'<td><a href="\g<1>">ссылка</a></td>', html)
pathlib.Path("top.html").write_text(html, encoding="utf-8")

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>username</th>\n      <th>link</th>\n      <th>reach</th>\n      <th>reactions</th>\n      <th>popularity</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>50</th>\n      <td>@bekirovdanil</td>\n      <td><a href="https://t.me/BekirovDanil/277">ссылка</a></td>\n      <td>347</td>\n      <td>45</td>\n      <td>12.968300</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>@bekirovdanil</td>\n      <td><a href="https://t.me/BekirovDanil/335">ссылка</a></td>\n      <td>54</td>\n      <td>5</td>\n      <td>9.259259</td>\n    </tr>\n    <tr>\n      <th>35</th>\n      <td>@bekirovdanil</td>\n      <td><a href="https://t.me/BekirovDanil/294">ссылка</a></td>\n      <td>319</td>\n      <td>26</td>\n      <td>8.150470</td>\n    </tr>\n    <tr>\n      <th>44</th>\n      <td>@bekirovdanil</td>\n      <td><a href="https://t.me/BekirovDanil/284">ссылка</a></td>\n      <td>371<