# Exercise 04 : A/B-testing
## Create a connection to the database using the library sqlite3


In [95]:
import pandas as pd
import sqlite3

# Подключение к базе
conn = sqlite3.connect('../data/checking-logs.sqlite')  # укажи свой путь к базе

## Получаем средний timestamp первого просмотра из тестовой группы

In [96]:
query_avg_first_view = """
SELECT AVG(julianday(first_view_ts)) AS avg_first_view_jd
FROM test
WHERE labname != 'project1' AND first_view_ts IS NOT NULL;
"""

avg_first_view_jd = pd.read_sql(query_avg_first_view, conn).iloc[0, 0]
avg_first_view_jd

2458966.651581563

## Запрос и получение результатов для тестовой группы

In [97]:
query_test_results = """
WITH filtered_users AS (
    SELECT uid
    FROM test
    WHERE labname != 'project1'
    GROUP BY uid
    HAVING SUM(CASE WHEN first_commit_ts < first_view_ts THEN 1 ELSE 0 END) > 0
       AND SUM(CASE WHEN first_commit_ts >= first_view_ts THEN 1 ELSE 0 END) > 0
)
SELECT 'before' AS time,
       AVG((julianday(first_commit_ts) - julianday(deadlines.deadlines, 'unixepoch')) * 24) AS avg_diff
FROM test
JOIN deadlines ON test.labname = deadlines.labs
JOIN filtered_users ON test.uid = filtered_users.uid
WHERE test.labname != 'project1' AND julianday(first_commit_ts) < julianday(first_view_ts)

UNION

SELECT 'after' AS time,
       AVG((julianday(first_commit_ts) - julianday(deadlines.deadlines, 'unixepoch')) * 24) AS avg_diff
FROM test
JOIN deadlines ON test.labname = deadlines.labs
JOIN filtered_users ON test.uid = filtered_users.uid
WHERE test.labname != 'project1' AND julianday(first_commit_ts) >= julianday(first_view_ts);
"""

test_results = pd.read_sql(query_test_results, conn)
test_results


Unnamed: 0,time,avg_diff
0,after,-105.229101
1,before,-61.156438


## Запрос и получение результатов для контрольной группы

In [98]:
query_control_results = f"""
WITH filtered_users AS (
    SELECT uid
    FROM control
    WHERE labname != 'project1'
    GROUP BY uid
    HAVING SUM(CASE WHEN julianday(first_commit_ts) < {avg_first_view_jd} THEN 1 ELSE 0 END) > 0
       AND SUM(CASE WHEN julianday(first_commit_ts) >= {avg_first_view_jd} THEN 1 ELSE 0 END) > 0
)
SELECT 'before' AS time,
       AVG((julianday(first_commit_ts) - julianday(deadlines.deadlines, 'unixepoch')) * 24) AS avg_diff
FROM control
JOIN deadlines ON control.labname = deadlines.labs
JOIN filtered_users ON control.uid = filtered_users.uid
WHERE control.labname != 'project1' AND julianday(first_commit_ts) < {avg_first_view_jd}

UNION ALL

SELECT 'after' AS time,
       AVG((julianday(first_commit_ts) - julianday(deadlines.deadlines, 'unixepoch')) * 24) AS avg_diff
FROM control
JOIN deadlines ON control.labname = deadlines.labs
JOIN filtered_users ON control.uid = filtered_users.uid
WHERE control.labname != 'project1' AND julianday(first_commit_ts) >= {avg_first_view_jd};
"""

control_results = pd.read_sql(query_control_results, conn)
control_results


Unnamed: 0,time,avg_diff
0,before,-99.901295
1,after,-118.144425


## Закрываем соединение

In [99]:
conn.close()

💡 Интерпретация результата:
```
Test Group:
time	avg_diff (часы)
before	-61.16
after	-105.23
```
→ После показа новостной ленты студенты стали сдавать раньше (отрицательная разница стала больше по модулю) — положительный эффект.

```
Control Group:
time	avg_diff (часы)
before	-99.90
after	-118.14
```
→ У контрольной группы тоже наблюдается ускорение, но оно меньше, чем у тестовой группы.

📌 Вывод: новостная лента оказала положительное влияние, так как ускорение выполнения заданий у тестовой группы значительно больше.