# Introduction
This notebook performs the tasks in the exercise

In [9]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [10]:
from migrator import Migrator
from database import Database
from environs import Env
import tasks as t

## Part 1
Setting up and seeding the database.

In [11]:
env = Env()
env.read_env(".env")

In [12]:
database = Database(
    host=env.str("DB_HOST"),
    port=env.int("DB_PORT"),
    user=env.str("DB_USER"),
    password=env.str("DB_PASSWORD"),
    database=env.str("DB_DATABASE")
)

Connected to: 8.1.0
You are connected to the database: ('mysql',)
-----------------------------------------------



### Create tables

In [13]:
migrator = Migrator(database)

In [14]:
migrator.migrate()

--------------------------------Starting migrate--------------------------------
Found migration files:  ['001_users.sql', '002_activities.sql', '003_trackpoints.sql']
Running migration:   001_users.sql       ✅
Running migration:   002_activities.sql  ✅
Running migration:   003_trackpoints.sql ✅
-------------------------Finished migrate in 42.202 ms--------------------------



### (DANGER) Wipe the DB
🚨 THIS WILL WIPE ALL DATA IN THE TABLES 🚨

In [8]:
migrator.wipe()

---------------------------------Starting wipe----------------------------------
Wiping TrackPoints
-------------------------Finished wipe in 132590.648 ms-------------------------



KeyboardInterrupt: 

### Seed Database from Data Set

In [15]:
migrator.seed_users()

------------------------------Starting seed_users-------------------------------
Seeded [(182,)] Users
------------------------Finished seed_users in 10.724 ms------------------------



In [16]:
migrator.seed_activities()


----------------------------Starting seed_activities----------------------------
Generating seed data for user: 135	✅
Generating seed data for user: 132	✅
Generating seed data for user: 104	✅
Generating seed data for user: 103	✅
Generating seed data for user: 168	✅
Generating seed data for user: 157	✅
Generating seed data for user: 150	✅
Generating seed data for user: 159	✅
Generating seed data for user: 166	✅
Generating seed data for user: 161	✅
Generating seed data for user: 102	✅
Generating seed data for user: 105	✅
Generating seed data for user: 133	✅
Generating seed data for user: 134	✅
Generating seed data for user: 160	✅
Generating seed data for user: 158	✅
Generating seed data for user: 167	✅
Generating seed data for user: 151	✅
Generating seed data for user: 169	✅
Generating seed data for user: 156	✅
Generating seed data for user: 024	✅
Generating seed data for user: 023	✅
Generating seed data for user: 015	✅
Generating seed data for user: 012	✅
Generating seed data for user: 

In [17]:
migrator.seed_track_points()

---------------------------Starting seed_track_points---------------------------
Generating seed data for user: 135	✅
Generating seed data for user: 132	✅
Generating seed data for user: 104	✅
Generating seed data for user: 103	✅
Generating seed data for user: 168	✅
Generating seed data for user: 157	✅
Generating seed data for user: 150	✅
Generating seed data for user: 159	✅
Generating seed data for user: 166	✅
Generating seed data for user: 161	✅
Generating seed data for user: 102	✅
Generating seed data for user: 105	✅
Generating seed data for user: 133	✅
Generating seed data for user: 134	✅
Generating seed data for user: 160	✅
Generating seed data for user: 158	✅
Generating seed data for user: 167	✅
Generating seed data for user: 151	✅
Generating seed data for user: 169	✅
Generating seed data for user: 156	✅
Generating seed data for user: 024	✅
Generating seed data for user: 023	✅
Generating seed data for user: 015	✅
Generating seed data for user: 012	✅
Generating seed data for user: 

## Part 2

In [18]:
tasks = t.Task(database)

### Task 1

In [18]:
tasks.task1()

---------------------------------Starting task1---------------------------------
Running statement:
 
        SELECT
            (SELECT Count(*) AS UsersCount FROM Users) AS '# Users',
            (SELECT Count(*) AS UsersCount FROM Activities) AS '# Activities',
            (SELECT Count(*) AS UsersCount FROM TrackPoints) AS '# TrackPoints';
        
-------------------------Finished task1 in 2807.861 ms--------------------------



Unnamed: 0,# Users,# Activities,# TrackPoints
0,182,16048,38727024


### Task 2

In [19]:
tasks.task2()

---------------------------------Starting task2---------------------------------
Running statement:
 
            SELECT CAST(ROUND(AVG(count), 0) AS SIGNED) AS Avg, MAX(count) AS Max, MIN(count) AS Min
            FROM (
                SELECT COUNT(*) AS count
                FROM TrackPoints as tp
                LEFT JOIN Activities as a
                    ON tp.activity_id = a.id
                GROUP BY a.user_id
            ) as counts;
        
-------------------------Finished task2 in 31231.369 ms-------------------------



Unnamed: 0,Avg,Max,Min
0,223856,4041300,68


### Task 3

In [20]:
tasks.task3()

---------------------------------Starting task3---------------------------------
Running statement:
 
            SELECT UserId, ActivityCount
            FROM
                (
                    SELECT Count(*) as ActivityCount, u.id as UserId
                    FROM Activities as a
                    LEFT JOIN Users as u
                        on a.user_id = u.id
                    GROUP BY u.id
                ) as activityCounts
            ORDER BY ActivityCount DESC
            LIMIT 15;
        
--------------------------Finished task3 in 13.717 ms---------------------------



Unnamed: 0,UserId,ActivityCount
0,128,2102
1,153,1793
2,25,715
3,163,704
4,62,691
5,144,563
6,41,399
7,85,364
8,4,346
9,140,345


### Task 4

In [21]:
tasks.task4()

---------------------------------Starting task4---------------------------------
Running statement:
 
            SELECT DISTINCT u.id AS UserId
            FROM Users AS u
            LEFT JOIN Activities AS a
                ON u.id = a.user_id
            WHERE a.transportation_mode LIKE 'Bus';
        
---------------------------Finished task4 in 4.352 ms---------------------------



Unnamed: 0,UserId
0,10
1,20
2,52
3,62
4,68
5,73
6,80
7,81
8,84
9,85


### Task 5

In [23]:
tasks.task5()

---------------------------------Starting task5---------------------------------
Running statement:
 
            SELECT DISTINCT u.id AS UserID, Count(DISTINCT a.transportation_mode) as '# Transportation Modes', GROUP_CONCAT(DISTINCT a.transportation_mode SEPARATOR ', ') AS 'Transportation Modes'
            FROM Activities AS a
            LEFT JOIN Users AS u
                ON a.user_id = u.id
            WHERE a.transportation_mode != ""
            GROUP BY u.id
            ORDER BY Count(DISTINCT a.transportation_mode) DESC
            LIMIT 10;
        
--------------------------Finished task5 in 16.997 ms---------------------------



Unnamed: 0,UserID,# Transportation Modes,Transportation Modes
0,128,9,"airplane, bike, boat, bus, car, subway, taxi, ..."
1,62,9,"bike, boat, bus, car, run, subway, taxi, train..."
2,85,4,"bus, subway, taxi, walk"
3,65,4,"bike, subway, taxi, walk"
4,112,3,"bike, bus, walk"
5,78,3,"subway, taxi, walk"
6,80,3,"bike, bus, taxi"
7,81,3,"bike, bus, walk"
8,111,3,"bike, car, taxi"
9,10,3,"bus, taxi, train"


### Task 7

In [37]:
tasks.task7a()

--------------------------------Starting task7a---------------------------------
Running statement:
 
            SELECT COUNT(DISTINCT user_id) as '# Users With Overnight Activities'
            FROM Activities
            WHERE DATEDIFF(end_datetime, start_datetime) = 1;
        
--------------------------Finished task7a in 13.845 ms--------------------------



Unnamed: 0,# Users With Overnight Activities
0,98


In [38]:
tasks.task7b()

--------------------------------Starting task7b---------------------------------
Running statement:
 
            SELECT transportation_mode AS 'Transportation Mode', user_id AS UserId, TIMEDIFF(end_datetime, start_datetime) as Duration
            FROM Activities
            WHERE DATEDIFF(end_datetime, start_datetime) = 1;
        
--------------------------Finished task7b in 20.327 ms--------------------------



Unnamed: 0,Transportation Mode,UserId,Duration
0,,000,0 days 04:09:15
1,,000,0 days 00:10:45
2,,000,0 days 11:03:55
3,,000,0 days 01:43:20
4,,000,0 days 16:24:06
...,...,...,...
1006,,168,0 days 01:13:02
1007,,172,0 days 04:20:47
1008,,174,0 days 02:19:51
1009,,174,0 days 01:31:59


### Task 8

In [21]:
tasks.task8()

---------------------------------Starting task8---------------------------------
Running statement:
 
            SELECT tp1.id as FirstId, tp2.ID as SecondId, tp1.datetime as FirstDatetime, tp2.datetime as SecondDatetime, ABS(TIME_TO_SEC(TIMEDIFF(tp1.datetime, tp2.datetime))) as Difference
            FROM TrackPoints as tp1
            CROSS JOIN TrackPoints as tp2
            WHERE tp1.activity_id != tp2.activity_id
                AND tp1.id != tp2.id
                AND ABS(TIME_TO_SEC(TIMEDIFF(tp1.datetime, tp2.datetime))) < 30
            LIMIT 20
        
Query Finished
-------------------------Finished task8 in 5343.976 ms--------------------------



Unnamed: 0,FirstId,SecondId,FirstDatetime,SecondDatetime,Difference
0,2865,2345,2007-08-08 00:21:09,2007-08-08 00:20:40,29
1,2864,2345,2007-08-08 00:21:01,2007-08-08 00:20:40,21
2,2863,2345,2007-08-08 00:20:54,2007-08-08 00:20:40,14
3,2862,2345,2007-08-08 00:20:46,2007-08-08 00:20:40,6
4,2861,2345,2007-08-08 00:20:38,2007-08-08 00:20:40,2
5,2860,2345,2007-08-08 00:20:26,2007-08-08 00:20:40,14
6,2859,2345,2007-08-08 00:20:18,2007-08-08 00:20:40,22
7,2868,2346,2007-08-08 00:22:03,2007-08-08 00:22:18,15
8,2868,2347,2007-08-08 00:22:03,2007-08-08 00:22:23,20
9,2869,2348,2007-08-08 00:22:54,2007-08-08 00:22:40,14
