In [197]:
with open("result.json") as j:
    json_data = j.read()


In [198]:
from pydantic import BaseModel, ValidationError, Field
from typing import List, Optional

class TextEntity(BaseModel):
    type: str
    text: str

class Reaction(BaseModel):
    type: str
    count: int
    emoji: str

class ClassRecording(BaseModel):
    id: int
    type: str
    date: str
    date_unixtime: int
    edited: Optional[str] = ''
    edited_unixtime: Optional[str] = ''
    from_: str = Field(alias='from')  # –ò—Å–ø–æ–ª—å–∑—É–µ–º alias –¥–ª—è –ø–æ–ª—è 'from'
    from_id: str
    file: Optional[str] = ''
    file_name: str
    thumbnail: Optional[str] = ''
    media_type: str
    title: str
    mime_type: str
    performer: str
    duration_seconds: int
    text: Optional[list|str] = ''
    text_entities: Optional[list] = []
    reactions: Optional[list] = []

In [214]:
class All(BaseModel):
    name: str
    type: str
    messages: List[Message]

with open("result.json") as j:
    json_data = j.read()

import json

messages = json.loads(json_data)['messages']

classes = [ClassRecording(**m) for m in messages if m.get("media_type") == "audio_file"]




In [272]:
lectors = set()
for c in classes:
    lectors.add(c.performer.strip())
lectors

171

In [275]:
# lector aliases

aliases = {
    "–≤—Ä–∞–¥–∂–∞ —Ö–∞—Ä–∏": "–í—Ä–∞–¥–∂–∞ –•–∞—Ä–∏ –¥–∞—Å",
    "—Ä–∞–¥—Ö–∞ –∫—Ä–∏–ø–∞": "–†–∞–¥—Ö–∞ –ö—Ä–∏–ø–∞ –¥.–¥.",
    "–ê–±—Ö–∞–π –ß": "–ê–±—Ö–∞–π –ß–∞–π—Ç–∞–Ω—å—è –¥–∞—Å",
    "–Æ–≥–∞ –ê–≤–∞—Ç–∞—Ä–∞": "–Æ–≥–∞ –ê–≤–∞—Ç–∞—Ä–∞ –¥–∞—Å",
    "–ü—Ä–µ–¥–∞–Ω–Ω—ã–µ": "–†–∞–∑–Ω—ã–µ –ø—Ä–µ–¥–∞–Ω–Ω—ã–µ",
    "–ú–∞–¥–∞–Ω–∞ –º—É–∫—É–Ω–¥–∞": "–ú–∞–¥–∞–Ω–∞ –ú—É–∫—É–Ω–¥–∞ –¥–∞—Å",
    "–°–∞—Ç—Ç–≤–∞ –ß–∞–Ω–¥—Ä–∞ –¥–∞—Å": "–°–∞—Ç—Ç–≤–∞—Ç–∞ –ß–∞–Ω–¥—Ä–∞ –¥–∞—Å",
    "–ì–∞—Ä—É—Ä–∞ –†–∞–¥–∂": "–ì–∞—Ä—É–¥–∞ –†–∞–¥–∂ –¥–∞—Å",
    "–ì–∞—Ä—É–¥–∞—Ä–∞–¥–∂": "–ì–∞—Ä—É–¥–∞ –†–∞–¥–∂ –¥–∞—Å",
    "–í–µ–¥–∞ –í—å—è—Å–∞": "–í–µ–¥–∞–≤—å—è—Å–∞ –¥–∞—Å",
    "–í–µ–¥–∞–≤—å—è—Å": "–í–µ–¥–∞–≤—å—è—Å–∞ –¥–∞—Å",
    "–ë—Ö–∞–∫—Ç–∞ –°–µ—Ä–≥–µ–π": "–†–∞–¥—Ö–∞-–∫–∞–Ω—Ç–∞ –¥–∞—Å",
    "–í—å—è—Å–∞ –®—É–Ω–æ": "–í—å—è—Å–∞ –¥–∞—Å",
    "–í—å—è—Å–∞-—à—É–Ω–æ": "–í—å—è—Å–∞ –¥–∞—Å",
    "–î–∞–º–æ–¥–∞—Ä –ê–Ω–∞–Ω–¥–∞": "–î–∞–º–æ–¥–∞—Ä –ê–Ω–∞–Ω–¥–∞ –¥–∞—Å",
    "–î–∞–º–æ–¥–∞—Ä–∞ –ê–Ω–∞–Ω–¥–∞": "–î–∞–º–æ–¥–∞—Ä –ê–Ω–∞–Ω–¥–∞ –¥–∞—Å",
    "–î–∞–º–æ–¥–∞—Ä–∞–Ω–∞–Ω–¥–∞": "–î–∞–º–æ–¥–∞—Ä –ê–Ω–∞–Ω–¥–∞ –¥–∞—Å",
    '–ó–∏–Ω–∫–µ–≤–∏—á': '–ê–≤–∞—Ç–∞—Ä–∏-–ö—Ä–∏—à–Ω–∞ –¥–∞—Å',
    '–ê–≤–∞—Ç–∞—Ä–∏-–∫—Ä–∏—à–Ω–∞ –¥–∞—Å': '–ê–≤–∞—Ç–∞—Ä–∏-–ö—Ä–∏—à–Ω–∞ –¥–∞—Å',
    "–ê–º—Ä–∏—Ç–∞ –í–∏—à–∞–∫—Ö–∞": "–ê–º—Ä–∏—Ç–∞ –í–∏—à–∞–∫—Ö–∞ –¥.–¥.",
    "–ê–Ω–∞–Ω—Ç–∞-—à–∞–π–∏ –¥–∞—Å": "–ê–Ω–∞–Ω—Ç–∞ –®–∞–π–∏ –¥–∞—Å",
    "–ë—É–ª–∞–≤–∏–Ω": "–í–∞–π—à–Ω–∞–≤ –¢—Ö–∞–∫—É—Ä –¥–∞—Å",
    "–í–∞–∏—à–Ω–∞–≤ –¢—Ö–∞–∫—É—Ä": "–í–∞–π—à–Ω–∞–≤ –¢—Ö–∞–∫—É—Ä –¥–∞—Å",
    "–ë—Ö–∞–∫—Ç–∏–≤–µ–¥–∞–Ω—Ç–∞ –°–≤–∞–º–∏": "–®—Ä–∏–ª–∞ –ü—Ä–∞–±—Ö—É–ø–∞–¥–∞",
    "–í–∞–Ω–∞–Ω–∏–Ω–∞—Ç—Ö–∞": "–í–∞–Ω–∏–Ω–∞—Ç—Ö–∞ –í–∞—Å—É –¥–∞—Å",
    "–í–∞–Ω–∞–Ω–∏–Ω–∞—Ç—Ö–∞": "–í–∞–Ω–∏–Ω–∞—Ç—Ö–∞ –í–∞—Å—É –¥–∞—Å",
    "–í–∞–Ω–∏–Ω–∏—Ç—Ö–∞": "–í–∞–Ω–∏–Ω–∞—Ç—Ö–∞ –í–∞—Å—É –¥–∞—Å",
    "–í–∞–Ω–∏–Ω–∞–Ω—Ç—Ö–∞": "–í–∞–Ω–∏–Ω–∞—Ç—Ö–∞ –í–∞—Å—É –¥–∞—Å",
    "–í–∞–Ω–∏–∫—Ä–∏—à–Ω–∞": "–í–∞–Ω–∏–∫—Ä–∏—à–Ω–∞–¥–∞—Å –¥–∞—Å",
    "–í–∞–Ω–∏ –ö—Ä–∏—à–Ω–∞": "–í–∞–Ω–∏–∫—Ä–∏—à–Ω–∞–¥–∞—Å –¥–∞—Å",
    "–í—Å—Ç—Ä–µ—á–∞ –û–±—â–∏–Ω—ã": "–í—Å—Ç—Ä–µ—á–∞ –æ–±—â–∏–Ω—ã",
    "–ì–∞—É—Ä–∞–Ω–≥–∞ –í–∏–ª–∞—Å": "–ì–∞—É—Ä–∞–Ω–≥–∞ –í–∏–ª–∞—Å–∞ –¥–∞—Å",
    "–ì—É—Ä–∞–Ω–≥–∞ –í–∏–ª–∞—Å": "–ì–∞—É—Ä–∞–Ω–≥–∞ –í–∏–ª–∞—Å–∞ –¥–∞—Å",
    "–ì–∏—Ä–∏–¥—Ö–∞—Ä": "–ì–æ—É—Ä–∞ –ì–∏—Ä–∏–¥—Ö–∞—Ä–∏ –¥–∞—Å",
    '–î—Ö–∞–º–µ—Ä—à–≤–∞—Ä–∞ –ú–∞—Ö–∞–ø—Ä–∞–±—Ö—É': "–î—Ö–∞–º–µ—à–≤–∞—Ä–∞ –ú–∞—Ö–∞–ø—Ä–∞–±—Ö—É –¥–∞—Å",
    '–î—Ö–∞–º–µ—à–≤–∞—Ä–∞ –ú–∞—Ö–∞–ø—Ä–∞–±—Ö—Å': "–î—Ö–∞–º–µ—à–≤–∞—Ä–∞ –ú–∞—Ö–∞–ø—Ä–∞–±—Ö—É –¥–∞—Å",
    '–î—Ö–∞–º–µ—à–≤–∞—Ä–∞ –ú–∞—Ö–ø—Ä–∞–±—Ö—É': "–î—Ö–∞–º–µ—à–≤–∞—Ä–∞ –ú–∞—Ö–∞–ø—Ä–∞–±—Ö—É –¥–∞—Å",
    "–ê–¥—Ö–æ–∫—à–∞–¥–∂–∞": "–ê–¥—Ö–æ–∫—à–∞–¥–∂–∞ –°–≤–∞–º–∏",
    "–ê–¥—Ö–æ–∫—à–∞–¥–∞": "–ê–¥—Ö–æ–∫—à–∞–¥–∂–∞ –°–≤–∞–º–∏",
    "–í–∏–≥—å—è–Ω–∞": "–ë—Ö–∞–∫—Ç–∏ –í–∏–≥—å—è–Ω–∞ –ì–æ—Å–≤–∞–º–∏",
    "–ö—Ä–∏—à–Ω–∞ –ö–∞—Ä—É–Ω–∞": "–ö—Ä–∏—à–Ω–∞ –ö–∞—Ä—É–Ω–∞ –¥–∞—Å",
    "–ö—Ä–∏—à–Ω–∞ –ü—Ä–∏—è": "–ö—Ä–∏—à–Ω–∞ –ü—Ä–∏—è .–¥.–¥.",
    "–ö—É–Ω–¥–∂–∞ –ú–∞–ª–∏": "–ö—É–Ω–¥–∂–∞–º–∞–ª–∏ –¥–∞—Å",
    "–ú–∞–¥–∞–Ω–∞ –ú–æ—Ö–∞–Ω": "–ú–∞–¥–∞–Ω–∞-–º–æ—Ö–∞–Ω –¥–∞—Å",
    "–ú–æ–¥–∞–Ω–∞ –ú–æ—Ö–∞–Ω": "–ú–∞–¥–∞–Ω–∞-–º–æ—Ö–∞–Ω –¥–∞—Å",
    "–ú–∞–¥–∞–Ω–∞-–º–æ—Ö–∞–Ω": "–ú–∞–¥–∞–Ω–∞-–º–æ—Ö–∞–Ω –¥–∞—Å",
    "–ú–∞–∑–∞ –ì–æ–≤–∞—Ä–¥—Ö–∞–Ω": "–ú–∞—Ö–∞ –ì–æ–≤–∞—Ä–¥—Ö–∞–Ω –¥–∞—Å",
    "–ú–∞–ª–ª–∏–∫–∞- –º–∞–ª–∞": "–ú–∞–ª–∏–∫–∞ –ú–∞–ª–∞ –¥.–¥.",
    "–ù–∞—Ä–æ—Ç—Ç–∞–º –í–∏–ª–∞—Å": "–ù–∞—Ä–æ—Ç—Ç–∞–º –í–∏–ª–∞—Å –¥–∞—Å",
    "–ü—Ä–∏–π–∞ –°–∞–∫—Ö–∏": "–ü—Ä–∏—è –°–∞–∫—Ö–∏ –¥.–¥.",
    "–ü—Ä–∏—è –°–∞–∫—Ö–∏": "–ü—Ä–∏—è –°–∞–∫—Ö–∏ –¥.–¥.",
    "–†–∞–¥—Ö–∞ –î–∂–∏–≤–∞–Ω": "–†–∞–¥—Ö–∞ –î–∂–∏–≤–∞–Ω–∞ –¥–∞—Å",
    "–°–∞–Ω–¥—Ö–π–∞ –ê–≤–∞—Ç–∞—Ä–∞": "–°–∞–Ω–¥—Ö—å—è –ê–≤–∞—Ç–∞—Ä–∞ –¥–∞—Å",
    "–°–∞–Ω–¥—Ömz –ê–≤–∞—Ç–∞—Ä–∞": "–°–∞–Ω–¥—Ö—å—è –ê–≤–∞—Ç–∞—Ä–∞ –¥–∞—Å",
    "–®–∞—á–∏ –°—É—Ç–∞": "–®–∞—á–∏ –°—É—Ç–∞ –¥–∞—Å",
    "–®—Ä–∏ –ù–∏—Ç–∞": "–®—Ä–∏ –ù–∏—Ç–∞–π –ì–∞—É—Ä–∞ –¥–∞—Å",
    "Dasa": "–ê–º–∞–ª–∞ –ö—Ä–∏—à–Ω–∞ –¥–∞—Å",
}

def replace_lector_name(record_item: ClassRecording) -> ClassRecording:
    # print(record_item.performer)

    if len(record_item.performer.split(',')) > 1 or len(record_item.performer.split(' –∏ ')) > 1:
        # print(record_item.performer)
        record_item.performer = '–†–∞–∑–Ω—ã–µ –ø—Ä–µ–¥–∞–Ω–Ω—ã–µ'
        return record_item
        
    for a, n in aliases.items():
        # print(a)
        if a.lower() in record_item.performer.lower():
            record_item.performer = n
            # print(record_item.performer)
            return record_item
    return record_item
    
            
cl = [replace_lector_name(c.model_copy()) for c in classes]
lectors = set()
for c in cl:
    lectors.add(c.performer.strip())
len(lectors)

90

In [276]:
classes[0]

ClassRecording(id=14, type='message', date='2021-06-24T09:56:07', date_unixtime=1624517767, edited='', edited_unixtime='', from_='–õ–µ–∫—Ü–∏–∏ –∏–∑ —Ö—Ä–∞–º–∞ –ù—å—é –î–∂–∞–π–ø—É—Ä (–ò–°–ö–ö–û–ù –ú–∏–Ω—Å–∫ –•–∞—Ä–µ –ö—Ä–∏—à–Ω–∞)', from_id='channel1426928424', file='(File not included. Change data exporting settings to download.)', file_name='–®–ë_6_4_49_50_–°–∏–ª–∞_–∞—Å–∫–µ–∑—ã_–ê–º–∞–ª–∞_–ö—Ä–∏—à–Ω–∞_–¥–∞—Å_23_06_2021_n2nFwFxviDk.mp3', thumbnail='', media_type='audio_file', title='–®–ë 6.4.49-50. –°–∏–ª–∞ –∞—Å–∫–µ–∑—ã. –ê–º–∞–ª–∞ –ö—Ä–∏—à–Ω–∞ –¥–∞—Å 23.06.2021', mime_type='audio/mpeg', performer='Dasa', duration_seconds=5964, text='', text_entities=[], reactions=[])

In [277]:
[c for c in classes if '–ù–∞—Ä–∞–¥–∞' in c.performer]

[ClassRecording(id=949, type='message', date='2023-07-26T09:17:29', date_unixtime=1690352249, edited='2023-07-26T20:30:50', edited_unixtime='1690392650', from_='–õ–µ–∫—Ü–∏–∏ –∏–∑ —Ö—Ä–∞–º–∞ –ù—å—é –î–∂–∞–π–ø—É—Ä (–ò–°–ö–ö–û–ù –ú–∏–Ω—Å–∫ –•–∞—Ä–µ –ö—Ä–∏—à–Ω–∞)', from_id='channel1426928424', file='(File not included. Change data exporting settings to download.)', file_name='–®–ë_7_14_11_–ù–∞—Ä–∞–¥–∞_–ú—É–Ω–∏_–¥–∞—Å_26_07_2023_2f0EEbt067w.mp3', thumbnail='(File not included. Change data exporting settings to download.)', media_type='audio_file', title='–®–ë 7.14.11 - 26.07.23', mime_type='audio/mpeg', performer='–ù–∞—Ä–∞–¥–∞ –ú—É–Ω–∏ –¥–∞—Å', duration_seconds=4576, text=['–ù–∞—Ä–∞–¥–∞ –ú—É–Ω–∏ –¥–∞—Å\n–®–ë 7.14.11 - 26.07.23\n–õ–µ–∫—Ü–∏–∏ ', {'type': 'mention', 'text': '@newjaipur'}, ''], text_entities=[{'type': 'plain', 'text': '–ù–∞—Ä–∞–¥–∞ –ú—É–Ω–∏ –¥–∞—Å\n–®–ë 7.14.11 - 26.07.23\n–õ–µ–∫—Ü–∏–∏ '}, {'type': 'mention', 'text': '@newjaipur'}, {'type': 'plain', 'text': ''}], reactions=[{'t