# Подсчет статистики для Инвест-мэтров

In [1]:
import load_env

In [2]:
import datetime as dt
import logging
import os
from collections import namedtuple

import pandas as pd
import supabase
from tqdm.auto import tqdm

import supabasefs
from scanner import Scanner

logging.getLogger("pyrogram").setLevel("ERROR")
logging.getLogger("urllib3").setLevel("ERROR")

2023-04-30 17:35:52,434:INFO - Using TgCrypto


In [3]:
LIMIT_HISTORY = dt.timedelta(days=30)  # насколько лезть вглубь чата

In [4]:
client = supabase.create_client(os.environ["SUPABASE_URL"], os.environ["SUPABASE_KEY"])
fs = supabasefs.SupabaseTableFileSystem(client, "sessions")
scanner = Scanner(fs=fs, chat_cache=False)


## Взять список каналов

In [5]:
channels = {"@bekirovdanil", "@flipping_invest", "@atsogoev", "@ligainvestblog", "@d_smirnovv", "@thebestinvestru", "@na_remonte", "primetr_ru"}

## Взять статистику по каждому каналу

In [6]:
Msg = namedtuple("Message", "username link reach reactions")


async def collect_stats(channel) -> int:
    msgs = []

    async for msg in scanner.get_chat_history(
        channel, min_date=dt.datetime.now() - LIMIT_HISTORY
    ):
        reactions = (
            (
                sum(reaction.count for reaction in msg.reactions.reactions)
                if msg.reactions
                else 0
            )
            + (msg.forwards or 0)
            + await scanner.get_discussion_replies_count(channel, msg.id)
        )
        msgs.append(
            Msg(
                username=channel,
                link=msg.link,
                reach=msg.views or 0,
                reactions=reactions,
            )
        )

    return msgs


In [7]:
results = []

with tqdm(total=len(channels)) as pbar:
    async with scanner.session(pbar):
        for channel in channels:
            pbar.set_postfix_str(channel)
            results.extend(await collect_stats(channel))
            pbar.update()

        # tasks = [asyncio.create_task(collect_stats(channel)) for channel in channels]
        # for finished_task in asyncio.as_completed(tasks):
            # results.extend(await finished_task)
            # pbar.update()


  0%|          | 0/8 [00:00<?, ?it/s]

## Посчитать голоса и взносы

In [8]:
def calc_stats(msgs: pd.DataFrame):
    stats = msgs.groupby("username").agg({"reach": "mean"})
    stats["reach_percent_of_mean"] = stats["reach"] / stats["reach"].mean() * 100
    stats["votes"] = stats.reach / stats.reach.sum() * 100
    msgs["popularity"] = msgs.reactions / msgs.reach
    most_popular_idx = msgs.groupby("username").popularity.idxmax()
    most_popular = msgs.iloc[most_popular_idx].set_index("username")
    stats["post_for_digest"] = most_popular.link
    stats["post_for_digest_popularity"] = most_popular.popularity
    for col in ["reach", "reach_percent_of_mean", "votes"]:
        stats[col] = pd.to_numeric(stats[col].round(), downcast="integer")

    return stats.sort_values("reach", ascending=False).reset_index()

In [9]:
msgs = pd.DataFrame(results)
stats = calc_stats(msgs)
stats.to_clipboard()
stats

Unnamed: 0,username,reach,reach_percent_of_mean,votes,post_for_digest,post_for_digest_popularity
0,@atsogoev,2000,193,24,https://t.me/atsogoev/4033,0.079511
1,@na_remonte,1892,183,23,https://t.me/na_remonte/1639,0.192268
2,@d_smirnovv,1220,118,15,https://t.me/d_smirnovv/485,0.11925
3,@flipping_invest,1147,111,14,https://t.me/flipping_invest/241,0.111901
4,@ligainvestblog,968,94,12,https://t.me/ligainvestblog/1108,0.071742
5,primetr_ru,440,43,5,https://t.me/primetr_ru/257,0.282219
6,@bekirovdanil,321,31,4,https://t.me/BekirovDanil/277,0.127479
7,@thebestinvestru,286,28,3,https://t.me/thebestinvestru/194,0.079365


In [10]:
total_reach = stats.reach.sum()
total_reach

8274

In [25]:
msgs.sort_values("popularity", ascending=False).groupby("username")[["username", "link", "popularity"]].head(5)

Unnamed: 0,username,link,popularity
565,primetr_ru,https://t.me/primetr_ru/257,0.282219
42,@na_remonte,https://t.me/na_remonte/1639,0.192268
572,primetr_ru,https://t.me/primetr_ru/250,0.16317
41,@na_remonte,https://t.me/na_remonte/1640,0.159771
536,primetr_ru,https://t.me/primetr_ru/286,0.132931
675,@bekirovdanil,https://t.me/BekirovDanil/277,0.127479
535,primetr_ru,https://t.me/primetr_ru/287,0.121813
615,@d_smirnovv,https://t.me/d_smirnovv/485,0.11925
595,@flipping_invest,https://t.me/flipping_invest/241,0.111901
531,primetr_ru,https://t.me/primetr_ru/291,0.111111


In [None]:
new_channel = "@za_lubuyu_nedvizhku"
async with scanner.session():
    new_channel_msg = await collect_stats(new_channel)
new_channel_stats = calc_stats(new_channel_msg)
new_channel_stats

ConnectionError: Client has not been started yet

In [None]:
new_channel_depo = total_reach / new_channel_stats.iloc[0].reach * 1000 
new_channel_depo

10391.364902506964

In [None]:
new_channel_depo = total_reach * 2
new_channel_depo

14922