# Introduction
This notebook performs the tasks in the exercise

In [50]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [51]:
from migrator import Migrator
from database import Database
from environs import Env
import tasks as t

## Part 1
Setting up and seeding the database.

In [52]:
env = Env()
env.read_env(".env")

In [53]:
database = Database(
    host=env.str("DB_HOST"),
    port=3307,
    user=env.str("DB_USER"),
    password=env.str("DB_PASSWORD"),
    database=env.str("DB_DATABASE")
)

Connected to: 8.1.0
You are connected to the database: ('mysql',)
-----------------------------------------------



### Create tables

In [43]:
migrator = Migrator(database, 500)

In [44]:
migrator.migrate()

--------------------------------Starting migrate--------------------------------
Found migration files:  ['001_users.sql', '002_activities.sql', '003_trackpoints.sql']
Running migration:   001_users.sql       ✅
Running migration:   002_activities.sql  ✅
Running migration:   003_trackpoints.sql ✅
-------------------------Finished migrate in 45.674 ms--------------------------



### (DANGER) Wipe the DB
🚨 THIS WILL WIPE ALL DATA IN THE TABLES 🚨

In [41]:
migrator.wipe()

---------------------------------Starting wipe----------------------------------
Wiping TrackPoints
Wiping Activities
Wiping Users
---------------------------Finished wipe in 51.69 ms----------------------------



### Seed Database from Data Set

In [45]:
migrator.seed_users()

------------------------------Starting seed_users-------------------------------
Seeded [(182,)] Users
------------------------Finished seed_users in 8.554 ms-------------------------



In [46]:
migrator.seed_activities()

----------------------------Starting seed_activities----------------------------
Generating seed data for user: 135	✅
Generating seed data for user: 132	✅
Generating seed data for user: 104	✅
Generating seed data for user: 103	✅
Generating seed data for user: 168	✅
Generating seed data for user: 157	✅
Generating seed data for user: 150	✅
Generating seed data for user: 159	✅
Generating seed data for user: 166	✅
Generating seed data for user: 161	✅
Generating seed data for user: 102	✅
Generating seed data for user: 105	✅
Generating seed data for user: 133	✅
Generating seed data for user: 134	✅
Generating seed data for user: 160	✅
Generating seed data for user: 158	✅
Generating seed data for user: 167	✅
Generating seed data for user: 151	✅
Generating seed data for user: 169	✅
Generating seed data for user: 156	✅
Generating seed data for user: 024	✅
Generating seed data for user: 023	✅
Generating seed data for user: 015	✅
Generating seed data for user: 012	✅
Generating seed data for user: 

In [47]:
migrator.seed_track_points()

---------------------------Starting seed_track_points---------------------------
Generating seed data for user: 135	✅
Generating seed data for user: 132	✅
Generating seed data for user: 104	✅
Generating seed data for user: 103	✅
Generating seed data for user: 168	✅
Generating seed data for user: 157	✅
Generating seed data for user: 150	✅
Generating seed data for user: 159	✅
Generating seed data for user: 166	✅
Generating seed data for user: 161	✅
Generating seed data for user: 102	✅
Generating seed data for user: 105	✅
Generating seed data for user: 133	✅
Generating seed data for user: 134	✅
Generating seed data for user: 160	✅
Generating seed data for user: 158	✅
Generating seed data for user: 167	✅
Generating seed data for user: 151	✅
Generating seed data for user: 169	✅
Generating seed data for user: 156	✅
Generating seed data for user: 024	✅
Generating seed data for user: 023	✅
Generating seed data for user: 015	✅
Generating seed data for user: 012	✅
Generating seed data for user: 

In [48]:
migrator.create_indices()

----------------------------Starting create_indices-----------------------------
Executing statement
 ALTER TABLE TrackPoints ADD INDEX datetime_idx (datetime); ✅
---------------------Finished create_indices in 2218.913 ms---------------------



## Part 2

In [11]:
tasks = t.Task(database)

### Task 1

In [None]:
tasks.task1()

### Task 2

In [None]:
tasks.task2()

### Task 3

In [None]:
tasks.task3()

### Task 4

In [None]:
tasks.task4()

### Task 5

In [None]:
tasks.task5()

### Task 7

In [None]:
tasks.task7a()

In [None]:
tasks.task7b()

### Task 8

In [None]:
tasks.task8()



In [None]:
# query = """
#     SELECT DISTINCT left.user_id AS UserID1, right.user_id AS UserID2
#     FROM full AS left
#     INNER JOIN full AS right
#         ON left.user_id != right.user_id
#         AND left.datetime <= right.datetime
#         AND MBRContains(ST_BUFFER(left.geom, 50), right.geom)
#         AND TIME_TO_SEC(TIMEDIFF(right.datetime, left.datetime)) <= 30
# """



query = """
SELECT DISTINCT p1.user_id as UserID1, p2.user_id AS UserID2
FROM full AS p1
INNER JOIN full AS p2
    ON p1.user_id != p2.user_id
LIMIT 10
"""

database.query(query)

In [None]:
database.cursor.fetchall()


In [None]:
left_table = """
    CREATE TEMPORARY TABLE p1 AS
        SELECT u.id as user_id, tp.datetime as datetime, tp.geom as geom
        FROM TrackPoints as tp
        INNER JOIN Activities as a
            ON a.id = tp.activity_id
        INNER JOIN Users as u
            on u.id = a.user_id
"""
database.query(left_table)

In [None]:
right_table = """
    CREATE TEMPORARY TABLE p2 AS
        SELECT u.id as user_id, tp.datetime as datetime, tp.geom as geom
        FROM TrackPoints as tp
        INNER JOIN Activities as a
            ON a.id = tp.activity_id
        INNER JOIN Users as u
            on u.id = a.user_id
"""
database.query(right_table)

In [54]:
# query = """
# SELECT COUNT(DISTINCT a1.user_id, a2.user_id) as num_users
# FROM Activities AS a1
# JOIN Activities AS a2 ON a1.user_id < a2.user_id
# JOIN TrackPoints AS tp1 ON a1.id = tp1.activity_id
# JOIN TrackPoints AS tp2 ON a2.id = tp2.activity_id AND tp1.id < tp2.id
# WHERE MBRContains(ST_BUFFER(tp1.geom, 50), tp2.geom)
# AND ABS(TIME_TO_SEC(TIMEDIFF(tp1.datetime, tp2.datetime))) <= 30
# AND ST_Distance_Sphere(tp1.geom, tp2.geom) <= 50
# """
# query = """
# WITH user_pairs AS (
#     SELECT a1.user_id AS user_id1, a2.user_id AS user_id2
#     FROM Activities AS a1
#     JOIN Activities AS a2 ON a1.user_id < a2.user_id
#     JOIN TrackPoints AS tp1 ON a1.id = tp1.activity_id
#     JOIN TrackPoints AS tp2 ON a2.id = tp2.activity_id AND tp1.id < tp2.id
#     WHERE MBRContains(ST_BUFFER(tp1.geom, 50), tp2.geom)
#     AND ABS(TIME_TO_SEC(TIMEDIFF(tp1.datetime, tp2.datetime))) <= 30
#     AND ST_Distance_Sphere(tp1.geom, tp2.geom) <= 50
# )
# SELECT COUNT(DISTINCT user_id) as num_users
# FROM (
#     SELECT user_id1 AS user_id FROM user_pairs
#     UNION
#     SELECT user_id2 FROM user_pairs
# ) AS user_ids;
# """
query = """
WITH user_pairs AS (
    SELECT a1.user_id AS user_id1, a2.user_id AS user_id2
    FROM Activities AS a1
    JOIN Activities AS a2 ON a1.user_id < a2.user_id
    JOIN TrackPoints AS tp1 ON a1.id = tp1.activity_id
    JOIN TrackPoints AS tp2 ON a2.id = tp2.activity_id AND tp1.id < tp2.id
    WHERE ST_Distance_Sphere(tp1.geom, tp2.geom) <= 50
    AND ABS(TIME_TO_SEC(TIMEDIFF(tp1.datetime, tp2.datetime))) <= 30
)
SELECT COUNT(DISTINCT user_id) as num_users
FROM (
    SELECT user_id1 AS user_id FROM user_pairs
    UNION
    SELECT user_id2 FROM user_pairs
) AS user_ids;
"""

database.query(query)

Running statement:
 
WITH user_pairs AS (
    SELECT a1.user_id AS user_id1, a2.user_id AS user_id2
    FROM Activities AS a1
    JOIN Activities AS a2 ON a1.user_id < a2.user_id
    JOIN TrackPoints AS tp1 ON a1.id = tp1.activity_id
    JOIN TrackPoints AS tp2 ON a2.id = tp2.activity_id AND tp1.id < tp2.id
    WHERE ST_Distance_Sphere(tp1.geom, tp2.geom) <= 50
    AND ABS(TIME_TO_SEC(TIMEDIFF(tp1.datetime, tp2.datetime))) <= 30
)
SELECT COUNT(DISTINCT user_id) as num_users
FROM (
    SELECT user_id1 AS user_id FROM user_pairs
    UNION
    SELECT user_id2 FROM user_pairs
) AS user_ids;



OperationalError: 2013 (HY000): Lost connection to MySQL server during query