In [None]:
!pip install pandas matplotlib

In [None]:
import json

with open("result.json", "r", encoding="utf-8") as f:
    data = json.load(f)

messages = data["messages"]

In [None]:
import pandas as pd

df = pd.DataFrame(messages)
df.head()


In [None]:
df = df[df["type"] == "message"]
df["date"] = pd.to_datetime(df["date"])
df["text"] = df["text"].apply(lambda x: x if isinstance(x, str) else "")


In [None]:
df['date_only'] = df['date'].dt.date
daily = df.groupby('date_only').size()

daily.plot(kind='line', figsize=(12, 4))


In [None]:
df['from'] = df['from'].replace({'your nickname': 'your name'})# if needed

top_authors = df['from'].value_counts().head(10)

top_authors.plot(
    kind='barh',
    figsize=(10, 5),
).invert_yaxis()


In [None]:
df["hour"] = df["date"].dt.hour
df["hour"].value_counts().sort_index().plot(kind="bar", title="by hours")


In [None]:
media_df = df[df['media_type'].notna()]
media = media_df.groupby(['from', 'media_type']).size().unstack(fill_value=0)
media.plot(kind='bar', stacked=True, figsize=(10, 6), title='types of media')

In [None]:
def extract_clean_text(text):
    if isinstance(text, str):
        return text
    elif isinstance(text, list):
        return ' '.join([t for t in text if isinstance(t, str)])
    else:
        return ''

df['clean_text'] = df['text'].apply(extract_clean_text)

In [None]:
import re

words = re.findall(r'\b[а-яa-zё]+\b', ' '.join(df['clean_text'].tolist()).lower())

stopwords = set([
    'и','в','не','на','а','я','с','что','это','у','так','то','ты','мы','он','она','они','бы',
    'как','за','по','ну','да','его','её','ещё','меня','тебя','для','был','были','будет','есть',
    'нет','о','же','все','тут','там','тоже','то','к','ой','ахах','аха','ли','или','где','ну','если',
    'но','вот','тогда','уже','такой','такая','очень','при','сам','нам','вас','ваш','моё','ещё',
    'блин','ладно','ага','ок','короче','щас','чё','чел','нууу','ммм','типо','себя','них','них','ниче',
    'потом','чтоб','мне','тебе','тебя','себе','себя','хз', 'из', 'от', 'че', 'мб'
])

blacklist = {'false', 'true', 'owner', 'ownership', 'company', 'gmbh', 'mvz', 'entity', 'practice', 'child', 'augenzentrum', 'n'}# do not delete

filtered_words = [w for w in words if w not in stopwords and w not in blacklist]

In [None]:
from collections import Counter
top_words = Counter(filtered_words).most_common(30)

In [None]:
import matplotlib.pyplot as plt

w, c = zip(*top_words)

plt.figure(figsize=(10, 6))
plt.barh(w[::-1], c[::-1], color='coral')
plt.tight_layout()
plt.show()

In [None]:
authors = []# enter your names

for author in authors:
    author_texts = df[df['from'] == author]['text'].apply(extract_clean_text)
    words = re.findall(r'\b[а-яa-zё]+\b', ' '.join(author_texts.tolist()).lower())
    
    filtered_words = [w for w in words if w not in stopwords and w not in blacklist]
    word_counts = Counter(filtered_words).most_common(15)
    
    if word_counts:
        w, c = zip(*word_counts)
        plt.figure(figsize=(9, 5))
        plt.barh(w[::-1], c[::-1], color='skyblue')
        plt.title(f"top from {author}", fontsize=14)
        plt.tight_layout()
        plt.show()
    else:
        print(f"not found for {author}")
