## EX 09

In [78]:
import pandas as pd
import sqlite3
import plotly.graph_objects as go

# Подключение к базе и фильтрация
conn = sqlite3.connect("../data/checking-logs.sqlite")
df = pd.read_sql("SELECT uid, status, timestamp, numTrials, labname FROM checker", conn)

df = df[
    df['uid'].str.startswith("user_") &
    (df['status'] == 'ready') &
    (df['labname'] == 'project1')
].copy()

if df.empty:
    raise ValueError("❌ После фильтрации не осталось записей. Проверь labname и status!")

df['timestamp'] = pd.to_datetime(df['timestamp'])
df = df.sort_values(['uid', 'timestamp'])

# Вычисление изменений и накопление
df['numTrials_diff'] = df.groupby('uid')['numTrials'].diff()
df['numTrials_diff'] = df['numTrials_diff'].fillna(df['numTrials'])
df.loc[df['numTrials_diff'] < 0, 'numTrials_diff'] = 0
df['numTrials'] = df['numTrials_diff'].round().astype(int)

df['timestamp'] = df['timestamp'].dt.date

# Группировка по пользователям и датам
grouped = df.groupby(['timestamp', 'uid'])['numTrials'].sum().reset_index()

# Создание полного набора всех возможных дат и пользователей
all_dates = pd.date_range(start=min(grouped['timestamp']), end=max(grouped['timestamp'])).date
all_users = grouped['uid'].unique()
full_index = pd.MultiIndex.from_product([all_dates, all_users], names=['timestamp', 'uid'])

# Объединение с нулями и накопление
full_data = grouped.set_index(['timestamp', 'uid']).reindex(full_index, fill_value=0).reset_index()
full_data['cumulative_trials'] = full_data.groupby('uid')['numTrials'].cumsum()

# Только активные дни
active_days = full_data.groupby('timestamp')['numTrials'].sum()
active_days = active_days[active_days > 0].index
active_dates_sorted = sorted(active_days)

# Ограничим до 21 дня (чтобы были подписи от 0 до 20)
max_days = 21
active_dates_sorted = active_dates_sorted[:max_days]
date_to_idx_scaled = {date: i * 2 for i, date in enumerate(active_dates_sorted)}

# Назначаем индексы активным дням
full_data['time_idx_scaled'] = full_data['timestamp'].map(date_to_idx_scaled)
full_data = full_data[full_data['time_idx_scaled'].notna()].copy()
full_data['time_idx_scaled'] = full_data['time_idx_scaled'].astype(int)

# Подготовка данных
users = sorted(full_data['uid'].unique())
init_data = [
    go.Scatter(x=[], y=[], mode='lines+markers', name=user)
    for user in users
]

# Анимационные кадры
frames = []
for idx_scaled in sorted(full_data['time_idx_scaled'].unique()):
    frame_data = []
    for user in users:
        user_data = full_data[(full_data['uid'] == user) & (full_data['time_idx_scaled'] <= idx_scaled)]
        if not user_data.empty:
            frame_data.append(go.Scatter(
                x=user_data['time_idx_scaled'],
                y=user_data['cumulative_trials'],
                mode='lines+markers',
                name=user
            ))
    frames.append(go.Frame(data=frame_data, name=str(idx_scaled)))

# Автоматическое определение диапазона по Y
y_max = full_data['cumulative_trials'].max()
y_range_max = int(y_max * 1.1 // 20 * 20)

# Создание графика
fig = go.Figure(
    data=init_data,
    layout=go.Layout(
        title="Dynamic of commits per user in project1",
        xaxis=dict(
            zeroline=True,
            range=[0, 2 * (max_days - 1)],
            dtick=2,
            fixedrange=True,
            tickmode='array',
            tickvals=list(date_to_idx_scaled.values()),
            ticktext=[str(i) for i in range(len(date_to_idx_scaled))]
        ),
        yaxis=dict(
            zeroline=True,
            dtick=20,
            range=[0, y_range_max],
            fixedrange=True
        ),
        updatemenus=[
            dict(
                type="buttons",
                buttons=[dict(label="Play", method="animate", args=[None])]
            )
        ]
    ),
    frames=frames
)

fig.show()
