## RUN
```
select count(*) from run;
```
```
   count   
-----------
 140,057,634
```


### Optimal ordering for most operations
```
cluster verbose run using run_workoutid_idx;
```

### REMOVE DUPES.

#### Count the number of dupes

```
select count(*) from (select count(*) from run group by altitude, heart_rate, latitude, longitude, speed, workoutid, time having count(*) > 1) as foo;
```
```
 count  
--------
 136326
```

#### Remove the dupes

```
ALTER TABLE run ADD COLUMN id SERIAL PRIMARY KEY;
DELETE FROM run
WHERE id IN (SELECT id
              FROM (SELECT id,
                             ROW_NUMBER() OVER (partition BY altitude, heart_rate, latitude, longitude, speed, workoutid, time ORDER BY id) AS rnum
                     FROM run) t
              WHERE t.rnum > 1);
ALTER TABLE run drop column id;
```
```
DELETE 136868
```

```select count(*) from (select count(*) from run group by altitude, heart_rate, latitude, longitude, speed, workoutid, time having count(*) > 1) as foo;```

```
 count 
-------
     0
```

### Vacuum
```vacuum full run;```

## Speed

### Value given in kph. Range should be from 0-50.

```
select * from histogram('speed', 'run');
```
```
 bucket |                range                |   freq   |       bar
--------+-------------------------------------+----------+-----------------
      1 | [-1056.5865000000,1951.4200000000]  | 62909689 | ***************
      2 | [2209.0300000000,4585.4600000000]   |        5 |
      3 | [5865.0600000000,5865.0600000000]   |        1 |
      4 | [8118.3600000000,8118.3600000000]   |        1 |
      5 | [12446.2000000000,13525.5000000000] |        5 |
      6 | [14311.3000000000,14311.3000000000] |        1 |
     10 | [27818.9000000000,27818.9000000000] |        1 |
     17 | [47554.0000000000,47554.0000000000] |        1 |
     21 | [59500.2000000000,59500.2000000000] |        1 |
     ```

#### Simply removing data has implications, there is a relatively small amount of outliers so we will just smooth them.

#### Lets find the average speed and smooth them to that.

```
SELECT avg(speed) AS average FROM run where speed < 50 and speed > 0;
```
```
       average       
---------------------
 11.1156684465806305
```

### Still too many > 50kph (30mph, no one that fast).

```
update run set speed = 11 where speed > 50;
```
```
UPDATE 28903
```

```
select * from histogram('speed', 'run');
```
```
 bucket |                range                |   freq   |       bar       
--------+-------------------------------------+----------+-----------------
      1 | [-1056.5865000000,-1012.0039000000] |        2 | 
      3 | [-909.6679700000,-909.6679700000]   |        1 | 
     13 | [-365.4199200000,-365.4199200000]   |        1 | 
     16 | [-218.5532200000,-188.3548100000]   |        3 | 
     17 | [-142.0299200000,-142.0299200000]   |        1 | 
     18 | [-107.4442140000,-72.7328700000]    |        3 | 
     19 | [-60.1526680000,-5.3368900000]      |      544 | 
     20 | [-5.3074465000,49.9968000000]       | 62909148 | ***************
     21 | [50.0000000000,50.0000000000]       |        2 | 
```

### Still too many < 0kph.

```
update run set speed = 11 where speed < 0;
```
```
UPDATE 28903
```

```
select * from histogram('speed', 'run');
```
```
 bucket |                range                |   freq   |       bar       
--------+-------------------------------------+----------+-----------------
      1 | [-1056.5865000000,-1012.0039000000] |        2 | 
      3 | [-909.6679700000,-909.6679700000]   |        1 | 
     13 | [-365.4199200000,-365.4199200000]   |        1 | 
     16 | [-218.5532200000,-188.3548100000]   |        3 | 
     17 | [-142.0299200000,-142.0299200000]   |        1 | 
     18 | [-107.4442140000,-72.7328700000]    |        3 | 
     19 | [-60.1526680000,-5.3368900000]      |      544 | 
     20 | [-5.3074465000,49.9968000000]       | 62909148 | ***************
     21 | [50.0000000000,50.0000000000]       |        2 | 
```


#### Much better lets generate first derivative of speed.
##### Need to split to finish, 

```
vacuum full verbose run; with dev_list as (
    select round((speed_difference / time_difference),5) as deriv,
           time,
           workoutid
           from (
        select speed_difference,
               case when time_difference = 0 then 1 else time_difference end as time_difference,
               time,
               workoutid
               from (
            select speed - lag(speed) over (partition by workoutid order by time) as speed_difference,
                   time - lag(time) over (partition by workoutid order by time) as time_difference,
                   speed,
                   time,
                   workoutid
                from run order by time)
        as foo)
    as bar
    order by workoutid,
             time )
update run r1
  set speed_first = d1.deriv
  from dev_list as d1
  where d1.workoutid = r1.workoutid and
        d1.time = r1.time and r1.workoutid > 674628540 / 2;
```
```
vacuum full verbose run; with dev_list as (
    select round((speed_difference / time_difference),5) as deriv,
           time,
           workoutid
           from (
        select speed_difference,
               case when time_difference = 0 then 1 else time_difference end as time_difference,
               time,
               workoutid
               from (
            select speed - lag(speed) over (partition by workoutid order by time) as speed_difference,
                   time - lag(time) over (partition by workoutid order by time) as time_difference,
                   speed,
                   time,
                   workoutid
                from run order by time)
        as foo)
    as bar
    order by workoutid,
             time )
update run r1
  set speed_first = d1.deriv
  from dev_list as d1
  where d1.workoutid = r1.workoutid and
        d1.time = r1.time and r1.workoutid <= 674628540 / 2;
```


### Sanity check values
```
select * from histogram('speed_first', 'run');
```
```
 bucket |         range         |   freq   |       bar       
--------+-----------------------+----------+-----------------
      1 | [-47.73240,-42.98813] |      234 | 
      2 | [-42.96152,-38.20114] |      868 | 
      3 | [-38.19191,-33.42419] |      937 | 
      4 | [-33.41972,-28.65955] |      609 | 
      5 | [-28.64880,-23.89011] |      601 | 
      6 | [-23.87152,-19.11500] |     1339 | 
      7 | [-19.11278,-14.34612] |     4012 | 
      8 | [-14.34480,-9.57550]  |    13702 | 
      9 | [-9.57527,-4.80565]   |    66119 | 
     10 | [-4.80561,-0.03601]   | 21209799 | ********
     11 | [-0.03600,4.73362]    | 41337319 | ***************
     12 | [4.73367,9.50317]     |    74429 | 
     13 | [9.50350,14.27250]    |    15557 | 
     14 | [14.27307,19.03956]   |     6717 | 
     15 | [19.04283,23.80343]   |     3242 | 
     16 | [23.81516,28.58019]   |     1401 | 
     17 | [28.58248,33.34351]   |     1072 | 
     18 | [33.35423,38.11226]   |      963 | 
     19 | [38.12325,42.88530]   |      912 | 
     20 | [42.89200,46.93680]   |     3453 | 
     21 | [47.66040,47.66040]   |        1 | 
```

#### (Re)Generate moving averages

##### Over 50 points
```
vacuum full verbose run;

with dev_list as (
	select time, 
	       workoutid, 
	       avg(speed) over (partition by workoutid order by time rows between 50 preceding and current row) as mavg 
	from run 
	order by time
)
update run r1 set speed_ma_50 = d1.mavg from dev_list as d1 where d1.workoutid = r1.workoutid and 
   d1.time = r1.time and
    r1.workoutid <= 674628540 / 2;    
```

```
vacuum full verbose run;

with dev_list as (
	select time, 
	       workoutid, 
	       avg(speed) over (partition by workoutid order by time rows between 50 preceding and current row) as mavg 
	from run 
	order by time
)
update run r1 set speed_ma_50 = d1.mavg from dev_list as d1 where d1.workoutid = r1.workoutid and 
   d1.time = r1.time and
    r1.workoutid > 674628540 / 2;
    
```


```
select * from histogram('speed_ma_50', 'run');
```
```
 bucket |        range        |   freq   |       bar
--------+---------------------+----------+-----------------
      1 | [0.00000,2.49974]   |   723959 |
      2 | [2.49975,4.99948]   |  1065301 | *
      3 | [4.99949,7.49923]   |  3474418 | **
      4 | [7.49924,9.99897]   | 15263434 | ********
      5 | [9.99898,12.49872]  | 28202884 | ***************
      6 | [12.49873,14.99846] | 11414483 | ******
      7 | [14.99847,17.49821] |  1854616 | *
      8 | [17.49823,19.99793] |   318389 |
      9 | [19.99796,22.49767] |   154493 |
     10 | [22.49772,24.99744] |   116133 |
     11 | [24.99746,27.49719] |    91617 |
     12 | [27.49720,29.99693] |    79134 |
     13 | [29.99694,32.49668] |    65711 |
     14 | [32.49669,34.99622] |    38678 |
     15 | [34.99645,37.49591] |    19543 |
     16 | [37.49623,39.99531] |     9560 |
     17 | [39.99690,42.49418] |     5338 |
     18 | [42.49669,44.99466] |     5570 |
     19 | [44.99922,47.47320] |     6339 |
     20 | [47.49917,49.85640] |      103 |
     21 | [49.99490,49.99490] |        2 |
```




##### Over 100 points
```
vacuum full verbose run;
with dev_list as (
	select time, 
	       workoutid, 
	       avg(speed) over (partition by workoutid order by time rows between 100 preceding and current row) as mavg 
	from run 
	order by time
)
update run r1 set speed_ma_100 = d1.mavg from dev_list as d1 where d1.workoutid = r1.workoutid and d1.time = r1.time and r1.workoutid > 674628540 / 2;
```

```
vacuum full verbose run;
with dev_list as (
	select time, 
	       workoutid, 
	       avg(speed) over (partition by workoutid order by time rows between 100 preceding and current row) as mavg 
	from run 
	order by time
)
update run r1 set speed_ma_100 = d1.mavg from dev_list as d1 where d1.workoutid = r1.workoutid and d1.time = r1.time and r1.workoutid <= 674628540 / 2;
```

```
select * from histogram('speed_ma_100', 'run');
```
```
 bucket |        range        |   freq   |       bar
--------+---------------------+----------+-----------------
      1 | [0.00000,2.49974]   |   685770 |
      2 | [2.49975,4.99948]   |   989860 | *
      3 | [4.99949,7.49923]   |  3381266 | **
      4 | [7.49924,9.99897]   | 15627434 | ********
      5 | [9.99898,12.49872]  | 28507149 | ***************
      6 | [12.49873,14.99846] | 11105122 | ******
      7 | [14.99847,17.49821] |  1722350 | *
      8 | [17.49823,19.99795] |   308469 |
      9 | [19.99796,22.49770] |   161878 |
     10 | [22.49771,24.99742] |   112155 |
     11 | [24.99749,27.49717] |    88815 |
     12 | [27.49720,29.99688] |    81664 |
     13 | [29.99695,32.49654] |    67296 |
     14 | [32.49679,34.99641] |    34831 |
     15 | [34.99649,37.49596] |    14112 |
     16 | [37.49623,39.99556] |     6428 |
     17 | [39.99609,42.49466] |     4578 |
     18 | [42.49733,44.99406] |     5261 |
     19 | [44.99608,47.47320] |     5162 |
     20 | [47.49917,49.85640] |      103 |
     21 | [49.99490,49.99490] |        2 |
```

## Altitude

### Big range of altitudes, some too low, some too high


```
select * from histogram('altitude', 'run');
```
```
 bucket |                 range                 |   freq   |       bar       
--------+---------------------------------------+----------+-----------------
      1 | [-18016.0000000000,-18016.0000000000] |        1 | 
      3 | [-8052.0000000000,-5147.0000000000]   |        2 | 
      4 | [-2733.9600000000,-155.1500000000]    |    89496 | 
      5 | [-155.1200000000,4310.0000000000]     | 97717633 | ***************
      6 | [4310.6000000000,8775.2000000000]     |   291498 | 
      7 | [8775.4000000000,12606.6000000000]    |    74682 | 
      8 | [14122.7000000000,17673.0000000000]   |       51 | 
      9 | [18293.7000000000,18293.7000000000]   |        1 | 
     21 | [71288.4000000000,71288.4000000000]   |        1 | 
```

#### Lets see how many rows are more thant two standard deviations  from the mean

```
select count(*) from (
	with dev_list as ( 
		select avg(altitude), stddev(altitude), workoutid from run group by workoutid )
select altitude, r1.workoutid 
  from run r1 
  join dev_list d1 on (d1.workoutid = r1.workoutid) 
  where r1.altitude < d1.avg - d1.stddev * 2 or r1.altitude > d1.avg + d1.stddev * 2) 
as foo;
```

#####  3,634,305 / 140,057,634 => ~2.5%
##### We should just smooth them to be two standard deviations from the mean

##### Fix values too low
```
with dev_list as ( 
	select avg(altitude), stddev(altitude), workoutid from run group by workoutid )
update run as r1 
  set altitude = d1.avg - (d1.stddev * 2) 
  from dev_list as d1 
  where d1.workoutid = r1.workoutid and r1.altitude < d1.avg - (d1.stddev * 2);
```
```
UPDATE 1363506
```

##### Fix values too high
```
with dev_list as ( 
	select avg(altitude), stddev(altitude), workoutid from run group by workoutid )
update run as r1 
  set altitude = d1.avg + (d1.stddev * 2) 
  from dev_list as d1 
  where d1.workoutid = r1.workoutid and r1.altitude > d1.avg + (d1.stddev * 2);
```
```
UPDATE 2364011
```

#### Sanity check

```
select * from histogram('altitude', 'run');
```
```
      1 | [-18016.0000000000,-18016.0000000000] |        1 | 
     12 | [-1074.2132609321,355.3988616260]     | 86070368 | ***************
     13 | [355.4000000000,1886.3100000000]      | 11326164 | **
     14 | [1886.3500000000,3417.2000000000]     |   385321 | 
     15 | [3417.4000000000,4948.2000000000]     |    33985 | 
     16 | [4948.4000000000,6479.0949213073]     |    33402 | 
     17 | [6479.2000000000,8009.6000000000]     |   239941 | 
     18 | [8010.8000000000,9540.2000000000]     |    18669 | 
     19 | [9541.4000000000,11072.0000000000]    |    41989 | 
     20 | [11072.2000000000,12602.8000000000]   |    23524 | 
     21 | [12603.0000000000,12603.0000000000]   |        1 | 
```

##### Everest is at 8,850 meters, and the most extreme high altitude ultra marathon only gets up to 5,300 meters. The lowest below sea level land is -413 meters. Probably a lot of errant sensor data

##### Lets look at workouts that include altitudes greater than 5300
```
select altitude, time, latitude, longitude, workoutid from run where workoutid in (select distinct(workoutid) from run where altitude > 5300) order by workoutid, time;
```

###### Small sample, look up lat / lng and check altitude. Spot check lat / lngs looks like the data is bad. 
```
   518.4000000000 | 1335966811 |  41.6079340000 |   14.2548520000 |  52472782
   518.9980000000 | 1335966817 |  41.6078950000 |   14.2550230000 |  52472782
   520.0980000000 | 1335966822 |  41.6079070000 |   14.2552210000 |  52472782
   520.4000000000 | 1335966823 |  41.6079570000 |   14.2553840000 |  52472782
  9387.0505831777 | 1335966835 |  38.0929500000 |   12.5515930000 |  52472782
  9387.0505831777 | 1335967083 |  37.7106640000 |   12.3753560000 |  52472782
  9387.0505831777 | 1335967149 |  37.5904250000 |   12.3189460000 |  52472782
  9387.0505831777 | 1335967575 |  37.7246440000 |   12.3759740000 |  52472782
  5867.8278045868 | 1336789061 |  44.4906810000 |   11.3197250000 |  54861944
  5867.8278045868 | 1336789071 |  44.4906330000 |   11.3194720000 |  54861944
  5867.8278045868 | 1336789077 |  44.4906750000 |   11.3193130000 |  54861944
  5867.8278045868 | 1336789087 |  44.4906150000 |   11.3190350000 |  54861944
  5867.8278045868 | 1336789095 |  44.4906350000 |   11.3189010000 |  54861944
  5867.8278045868 | 1336789103 |  44.4906300000 |   11.3185810000 |  54861944
  5867.8278045868 | 1336789125 |  44.4905970000 |   11.3184610000 |  54861944
```

###### How many workouts is it?
```
select count(*) from  (select distinct(workoutid) from run where altitude > 5300) as foo;
```

###### 826 / 347,556. ~0.24%
```
select count(*) from run where altitude > 5300;
```
##### 347,459 probably should just drop the altitude for those workouts, as that is ~420 per workout.
```
update run set altitude = NULL where workoutid in (select distinct(workoutid) from run where altitude > 5300);
```


##### (Re)Generate first derivative
```
vacuum full verbose run;
with dev_list as (
    select round((alt_difference / time_difference),5) as deriv, time, workoutid from ( 
        select alt_difference, case when time_difference = 0 then 1 else time_difference end as time_difference, time, workoutid from (
            select altitude - lag(altitude) over (partition by workoutid order by time) as alt_difference, time - lag(time) over (partition by workoutid order by time) as time_difference, time, workoutid from run order by time) 
        as foo) 
    as bar order by workoutid, time )
update run r1 
  set altitude_first = d1.deriv 
  from dev_list as d1 
  where d1.workoutid = r1.workoutid and d1.time = r1.time  and r1.workoutid <= 674628540 / 2;
```
```
vacuum full verbose run;
with dev_list as (
    select round((alt_difference / time_difference),5) as deriv, time, workoutid from ( 
        select alt_difference, case when time_difference = 0 then 1 else time_difference end as time_difference, time, workoutid from (
            select altitude - lag(altitude) over (partition by workoutid order by time) as alt_difference, time - lag(time) over (partition by workoutid order by time) as time_difference, time, workoutid from run order by time) 
        as foo) 
    as bar order by workoutid, time )
update run r1 
  set altitude_first = d1.deriv 
  from dev_list as d1 
  where d1.workoutid = r1.workoutid and d1.time = r1.time  and r1.workoutid > 674628540 / 2;
```

##### (Re)Generate second derivative
```
vacuum full verbose run;
with dev_list as (
    select round((alt_difference / time_difference),5) as deriv, time, workoutid from ( 
        select alt_difference, case when time_difference = 0 then 1 else time_difference end as time_difference, time, workoutid from (
            select altitude_first - lag(altitude_first) over (partition by workoutid order by time) as alt_difference, time - lag(time) over (partition by workoutid order by time) as time_difference, time, workoutid from run order by time) 
        as foo) 
    as bar order by workoutid, time )
update run r1 
  set altitude_second = d1.deriv 
  from dev_list as d1 
  where d1.workoutid = r1.workoutid and d1.time = r1.time and r1.workoutid <= 674628540 / 2;
```

```
vacuum full verbose run;
with dev_list as (
    select round((alt_difference / time_difference),5) as deriv, time, workoutid from ( 
        select alt_difference, case when time_difference = 0 then 1 else time_difference end as time_difference, time, workoutid from (
            select altitude_first - lag(altitude_first) over (partition by workoutid order by time) as alt_difference, time - lag(time) over (partition by workoutid order by time) as time_difference, time, workoutid from run order by time) 
        as foo) 
    as bar order by workoutid, time )
update run r1 
  set altitude_second = d1.deriv 
  from dev_list as d1 
  where d1.workoutid = r1.workoutid and d1.time = r1.time and r1.workoutid > 674628540 / 2;
```


## Series length in run_by_workout for use in elapsed distance calculation
```
update run_by_workout as rbw set series_length = (select count(*) from run r where r.workoutid = rbw.workoutid); 
```

```
select count(*) from run_by_workout where series_length < 2;
 count 
-------
  3182
```

#### 3,182 / 347,556. ~0.9%.  Just ignore them when updating

## Elapsed distance


```
vacuum full verbose run;
with elap_dist as (select time,
       r1.workoutid,
       (row_number() over (partition by r1.workoutid order by time) - 1) * (distance / (series_length - 1)) as elapsed_distance
       from run r1 join run_by_workout r2 on (r1.workoutid = r2.workoutid) where series_length > 1 order by time)   
update run as r1 
  set elapsed_distance = round(d1.elapsed_distance,10)
  from elap_dist as d1 
  where d1.workoutid = r1.workoutid and d1.time = r1.time and r1.workoutid <= 674628540 / 2;
```
```
vacuum full verbose run;
with elap_dist as (select time,
       r1.workoutid,
       (row_number() over (partition by r1.workoutid order by time) - 1) * (distance / (series_length - 1)) as elapsed_distance
       from run r1 join run_by_workout r2 on (r1.workoutid = r2.workoutid) where series_length > 1 order by time) 
update run as r1 
  set elapsed_distance = round(d1.elapsed_distance,10)
  from elap_dist as d1 
  where d1.workoutid = r1.workoutid and d1.time = r1.time and r1.workoutid > 674628540 / 2;
```

## Elapsed time

```
vacuum full verbose run;
with elap_time as (select time, workoutid, time - first_value(time) over (partition by workoutid order by time) as time_elap from run order by time)
update run as r1 
  set elapsed_time = d1.time_elap 
  from elap_time as d1 
  where r1.workoutid = d1.workoutid and r1.time = d1.time and r1.workoutid <= 674628540 / 2;
```
```
vacuum full verbose run;
with elap_time as (select time, workoutid, time - first_value(time) over (partition by workoutid order by time) as time_elap from run order by time)
update run as r1 
  set elapsed_time = d1.time_elap 
  from elap_time as d1 
  where r1.workoutid = d1.workoutid and r1.time = d1.time and r1.workoutid > 674628540 / 2;
```


## Heart Rate

```
select * from histogram('heart_rate', 'run');
```
```
 bucket |           range           |   freq   |       bar
--------+---------------------------+----------+-----------------
      1 | [-2604.00000,-2390.00000] |       26 |
      2 | [-2381.00000,-2168.00000] |       27 |
      3 | [-2160.00000,-1947.00000] |       27 |
      4 | [-1939.00000,-1729.00000] |       27 |
      5 | [-1721.00000,-1508.00000] |       27 |
      6 | [-1499.00000,-1289.00000] |       27 |
      7 | [-1281.00000,-1068.00000] |       27 |
      8 | [-1060.00000,-848.00000]  |       27 |
      9 | [-840.00000,-626.00000]   |       30 |
     10 | [-619.00000,-404.00000]   |       49 |
     11 | [-399.00000,-184.00000]   |      170 |
     12 | [-183.00000,36.00000]     |   147780 |
     13 | [37.00000,255.00000]      | 67328561 | ***************
     14 | [257.00000,473.00000]     |      187 |
     15 | [478.00000,693.00000]     |       84 |
     16 | [700.00000,913.00000]     |       73 |
     17 | [918.00000,1135.00000]    |       66 |
     18 | [1142.00000,1351.00000]   |       35 |
     19 | [1362.00000,1572.00000]   |       31 |
     20 | [1584.00000,1787.00000]   |       18 |
     21 | [1797.00000,1797.00000]   |        1 |
```

##### Lots of garbage data. Heart rate should never be above 240 or below 40. Even those are uncommon. Use the same two standard deviation logic as altitude. 

##### Fix values too high
```
with dev_list as ( 
	select avg(heart_rate), stddev(heart_rate), workoutid from run group by workoutid )
update run as r1 
  set heart_rate = d1.avg + (d1.stddev * 2) 
  from dev_list as d1 
  where d1.workoutid = r1.workoutid and r1.heart_rate > d1.avg + (d1.stddev * 2);
```
```
UPDATE 495953
```


##### Fix values too low
```
with dev_list as ( 
	select avg(heart_rate), stddev(heart_rate), workoutid from run group by workoutid )
update run as r1 
  set heart_rate = d1.avg + (d1.stddev * 2) 
  from dev_list as d1 
  where d1.workoutid = r1.workoutid and r1.heart_rate < d1.avg - (d1.stddev * 2);
```
```
UPDATE 2305944
```

#### Better, but still too many below 40 and above 250
```
select * from histogram('heart_rate', 'run');
```
```
      1 | [-2578.00000,-2390.00000] |       24 | 
      2 | [-2381.00000,-2185.00000] |       25 | 
      3 | [-2176.00000,-1996.00000] |       23 | 
      4 | [-1988.00000,-1792.00000] |       25 | 
      5 | [-1785.00000,-1598.00000] |       24 | 
      6 | [-1590.00000,-1402.00000] |       24 | 
      7 | [-1394.00000,-1207.00000] |       24 | 
      8 | [-1200.00000,-1011.00000] |       24 | 
      9 | [-1003.00000,-816.00000]  |       24 | 
     10 | [-807.00000,-619.00000]   |       24 | 
     11 | [-611.00000,-421.00000]   |       24 | 
     12 | [-413.00000,-227.00000]   |       24 | 
     13 | [-218.00000,-23.00000]    |      536 | 
     14 | [-19.00000,173.78621]     | 61414170 | ***************
     15 | [173.78703,368.00000]     |  6061960 | *
     16 | [371.00000,564.00000]     |      108 | 
     17 | [573.00000,762.00000]     |       84 | 
     18 | [764.00000,954.00000]     |       69 | 
     19 | [964.00000,1152.00000]    |       24 | 
     20 | [1162.00000,1350.00000]   |       17 | 
     21 | [1353.12415,1353.12415]   |       43 | 
```

##### Lets look at workouts that include heart_rates greater than 500 to find outliers
```
select heart_rate, time, workoutid from run where workoutid in (select distinct(workoutid) from run where heart_rate > 370) order by workoutid, time;
```


###### Looks like some workouts have garbage data or other units.

###### How many workouts is it?
```
select count(*) from  (select distinct(workoutid) from run where heart_rate > 370) as foo;
```
```
 count 
-------
   6
```

##### 6 impacted workouts  just drop the heart_rate for those workouts.
```
update run set heart_rate = NULL where workoutid in (select distinct(workoutid) from run where heart_rate > 370);
```
```
UPDATE 2588
```


##### Lets look at workouts that include heart_rates less than 40 to find outliers
```
select heart_rate, time, workoutid from run where workoutid in (select distinct(workoutid) from run where heart_rate < 40 and heart_rate != 0) order by workoutid, time;
```
```
```


###### Found 21948, Looks like some workouts have garbage data or other units.

###### How many workouts is it?
```
select count(*) from  (select distinct(workoutid) from run where heart_rate < 40) as foo;
```
```
 count 
-------
   386
```

##### 386 impacted workouts  just drop the heart_rate for those workouts.
```
update run set heart_rate = NULL where workoutid in (select distinct(workoutid) from run where heart_rate < 40);
```
```
UPDATE 165896
```


```
select * from histogram('heart_rate', 'run');
```
```
 bucket |         range         |   freq   |       bar       
--------+-----------------------+----------+-----------------
      1 | [40.00000,53.01118]   |     7917 | 
      2 | [53.70816,66.00000]   |    30898 | 
      3 | [66.26454,79.31991]   |   132706 | 
      4 | [79.51359,92.34629]   |   394801 | 
      5 | [92.45286,105.53394]  |   891418 | *
      6 | [105.60919,118.63141] |  2348922 | **
      7 | [118.64713,131.74957] |  6796043 | *****
      8 | [131.75328,144.85615] | 15256396 | ************
      9 | [144.85843,157.96446] | 19151395 | ***************
     10 | [157.96517,171.07162] | 14849415 | ************
     11 | [171.07201,184.17886] |  5632441 | ****
     12 | [184.17908,197.28610] |  1243028 | *
     13 | [197.28629,210.38916] |   316159 | 
     14 | [210.39501,223.46777] |   133062 | 
     15 | [223.50918,236.56273] |    80148 | 
     16 | [236.62393,249.70201] |    35657 | 
     17 | [249.84802,262.80312] |     5539 | 
     18 | [262.82886,275.81747] |     2236 | 
     19 | [275.94231,288.05198] |      577 | 
     20 | [289.16319,297.90789] |       41 | 
     21 | [302.14367,302.14367] |       17 | 
```

#### Count those greater than 250.
```
select count(*) from run where heart_rate > 250;
```
```
 count 
-------
  7730
```

```
select count(*) from (
    with dev_list as ( 
        select avg(heart_rate), stddev(heart_rate), workoutid from run group by workoutid )
select heart_rate, r1.workoutid 
  from run r1 
  join dev_list d1 on (d1.workoutid = r1.workoutid) 
  where r1.heart_rate > 250 and (r1.heart_rate > d1.avg + d1.stddev * 2)) 
as foo;
```
```
 count 
-------
  3368
```

#### Lets smooth those above 250 and 2 std devs to 2 std devs. Set those that remain down to 250.,
```
with dev_list as ( 
	select avg(heart_rate), stddev(heart_rate), workoutid from run group by workoutid )
update run as r1 
  set heart_rate = d1.avg + (d1.stddev * 2) 
  from dev_list as d1 
  where r1.heart_rate > 250 and d1.workoutid = r1.workoutid and r1.heart_rate > d1.avg + (d1.stddev * 2);
```

```
UPDATE 3368
```

```
update run 
  set heart_rate = 250 where heart_rate > 250;
```
```

```


vacuum full verbose run;

#### (Re)Generate moving heart rate average over last 25 points

```
vacuum full verbose run;
with dev_list as (
	select time,
	       workoutid, 
	       avg(heart_rate) over (partition by workoutid order by time rows between 25 preceding and current row) as mavg 
	from run 
	order by time
)
update run r1 set heart_rate_ma_25 = d1.mavg from dev_list as d1 where d1.workoutid = r1.workoutid and d1.time = r1.time  and r1.workoutid > 674628540 / 2;
```

```
```

```
vacuum full verbose run;
with dev_list as (
	select time,
	       workoutid, 
	       avg(heart_rate) over (partition by workoutid order by time rows between 25 preceding and current row) as mavg 
	from run 
	order by time
)
update run r1 set heart_rate_ma_25 = d1.mavg from dev_list as d1 where d1.workoutid = r1.workoutid and d1.time = r1.time  and r1.workoutid <= 674628540 / 2;
```


```
```

```
select * from histogram('heart_rate_ma_25', 'run');
```

```

```

#### geo_distance
```
alter table run add column numeric(20,10);
```

```
vacuum full verbose run;
with gd as ( 
  select time, sqrt(power(lat_difference, 2) + power(long_difference, 2)) as geo_distance 
  from (select time, latitude - lag(latitude) over (partition by workoutid order by time) as lat_difference, longitude - lag(longitude) over (partition by workoutid order by time) as long_difference from run order by time) 
  as foo ) 
)
update run r1 
  set altitude_second = d1.deriv 
  from gd as d1 
  where d1.workoutid = r1.workoutid and d1.time = r1.time and r1.workoutid > 674628540 / 2;
```

```
vacuum full verbose run;
with gd as ( 
  select time, sqrt(power(lat_difference, 2) + power(long_difference, 2)) as geo_distance 
  from (select time, latitude - lag(latitude) over (partition by workoutid order by time) as lat_difference, longitude - lag(longitude) over (partition by workoutid order by time) as long_difference from run order by time) 
  as foo ) 
)
update run r1 
  set altitude_second = d1.deriv 
  from gd as d1 
  where d1.workoutid = r1.workoutid and d1.time = r1.time and r1.workoutid <= 674628540 / 2;
```


## RUN_BY_WORKOUT derived fields

#### altitude_max. Not derived, worth checking if out changes impact this.

```
with alt as (select max(altitude) as mx, min(altitude) as mn, workoutid from run group by workoutid)
select rbw.workoutid, altitude_max, mx from run_by_workout rbw join alt on (rbw.workoutid = alt.workoutid) where round(mx, 5) != round(rbw.altitude_max, 5); 
```

#### altitude_min. Not derived, worth checking if out changes impact this.

```
with alt as (select max(altitude) as mx, min(altitude) as mn, workoutid from run group by workoutid)
select rbw.workoutid, altitude_min, mn from run_by_workout rbw join alt on (rbw.workoutid = alt.workoutid) where round(mn, 5) != round(rbw.altitude_min, 5); 
```

#### Add second, derived, altitude_max & altitude min.
```
alter table run_by_workout add column altitude_max2 numeric(20,10);
alter table run_by_workout add column altitude_min2 numeric(20,10);
```

```
with alt as (select max(altitude) as mx, min(altitude) as mn, workoutid from run group by workoutid)
update run_by_workout as r1
  set altitude_max2 = d1.mx
  from alt as d1 where d1.workoutid = r1.workoutid;
```

```
with alt as (select max(altitude) as mx, min(altitude) as mn, workoutid from run group by workoutid)
update run_by_workout as r1
  set altitude_min2 = d1.mn
  from alt as d1 where d1.workoutid = r1.workoutid;
```


### heart_rate_avg, heart_rate_max

```
with alt as (select max(heart_rate) as mx, avg(heart_rate) as av, workoutid from run group by workoutid)
update run_by_workout as r1
  set heart_rate_avg = d1.av
  from alt as d1 where d1.workoutid = r1.workoutid;
```

```
with alt as (select max(heart_rate) as mx, avg(heart_rate) as av, workoutid from run group by workoutid)
update run_by_workout as r1
  set heart_rate_max = d1.mx
  from alt as d1 where d1.workoutid = r1.workoutid;
```


### speed_avg, speed_max

```
with alt as (select max(speed) as mx, avg(speed) as av, workoutid from run group by workoutid)
update run_by_workout as r1
  set speed_avg = d1.av
  from alt as d1 where d1.workoutid = r1.workoutid;
```

```
with alt as (select max(speed) as mx, avg(speed) as av, workoutid from run group by workoutid)
update run_by_workout as r1
  set speed_max = d1.mx
  from alt as d1 where d1.workoutid = r1.workoutid;
```


### elapsed_time
```
with alt as (select max(time) - min(altitude) as td, workoutid from run group by workoutid)
update run_by_workout as r1
  set elapsed_time = d1.td
  from alt as d1 where d1.workoutid = r1.workoutid;
```

### geo_distance

```
with alt as (select sum(geo_distance) as su, workoutid from run group by workoutid)
update run_by_workout as r1
  set geo_distance = d1.su
  from alt as d1 where d1.workoutid = r1.workoutid;
```
