In [None]:
#%matplotlib inline
%matplotlib notebook

## Counts By Minute Visualization

There are lots of data that can be best represented over time in terms of how they fit in a daily pattern. E.g. Temperature is lower at night, and higher in the day even as one night in June could be warmer than many days in January in North America. Comparing peaks and troughs is less useful without considering time of day, and the day relative to multi-day weather patterns.

This can be applied to user sign-ups in a website, or the speed of computation on background jobs, or .... you name it. This visualization intends to show data over time.

Before you get started, be sure you `brew install ffmpeg` or whatever you need to do on your system.

In [None]:
import datetime

from matplotlib import pyplot as plt
from matplotlib import animation
from matplotlib import patches
from matplotlib import rc
from matplotlib import ticker

import numpy
import pandas

from IPython.display import HTML

### The input
The simplest way to understand the data we are considering is through a query like this: 

```
select 
  date_trunc('minute', created_at)
  , count(*) 
from some_record_type 
group by 1 
order by 1;
```

In [None]:
df = pandas.read_csv('counts_by_minute.csv')
df.head()

## Data Enrichment 
We will benefit from some extra fields here. The key fields are `age` and `minute_mark` for later calculations, but others might help your visualization.

In [None]:
df['datetime']= pandas.to_datetime(df['date_trunc'], infer_datetime_format=True)
df['age'] = df.apply(lambda row: (datetime.datetime.now() - row['datetime']).days, axis=1)

df['time'] = df.apply(lambda row: row['date_trunc'].split()[1], axis=1)
df['minute_mark'] =  df.apply(lambda row: int(row['time'].split(':')[0])*60 + int(row['time'].split(':')[1]), axis=1)

df['date'] = df.apply(lambda row: row['date_trunc'].split()[0], axis=1)

df.set_index('datetime')
df.head()

## Helper Functions

We want pretty data output, these helpers will aid in outputting data correctly. Note some comments have been left to help as you run into data issues. Ideally, we'd not have a magic string in `color_from_percent`...but trust me.

In [None]:
@ticker.FuncFormatter
def major_formatter(x, pos):
    return '{}:{}'.format(x // 60, int(x % 60))

#15 point decimal scale - 0-F
MAX_GREYSCALE = 10
def color_from_percent(row):
    #try:
        rgb = 3 * [ str(hex(int(10 * (1 - row['normalized_age'])))[2:]) * 2 ]
        return '#{}'.format(''.join(rgb))
    #except Exception as e:
    #    print(row)
    #    raise e


In [None]:
class RecordAnimator(object):
    DEFAULT_COLOR = "#0000FF"

    def __init__(self, dataframe, slice_width=7, framerate=300):
        self.dataframe = dataframe
        self.max_y = dataframe['count'].max()
        self.max_x = 60*24
        self.max_age = dataframe['age'].max()
        self.min_age = dataframe['age'].min()
        self.slice_width = slice_width
        
        self.figure = plt.figure(figsize=(15,10))
        print('fig size: {0} DPI, size in inches {1}'.format(
            self.figure.get_dpi(), self.figure.get_size_inches()
        ))
        self.axes = plt.axes()
        
        self.animation = animation.FuncAnimation(
            self.figure, 
            self.animate, 
            frames=self.max_age - self.min_age - slice_width,
            interval=framerate
        )

    def clear(self):
        for ax in self.figure.get_axes():
            ax.clear()
        plt.legend(handles=[])
    
    def configure_axes(self):
        self.axes.tick_params(which='major', width=1.00, length=8)
        plt.xlim(0, self.max_x)
        plt.ylim(0, self.max_y)
        plt.xticks(rotation='vertical')
        plt.margins(0.6)
        plt.subplots_adjust(bottom=0.15)
        self.axes.xaxis.set_major_formatter(major_formatter)
        self.axes.xaxis.set_major_locator(ticker.MultipleLocator(60))
        self.axes.xaxis.set_minor_locator(ticker.MultipleLocator(15))      

        
    def get_segment(self, i):
        stop = self.max_age - i
        start = stop - self.slice_width
        if start % self.slice_width == 0:
            print("Getting segment from {} to {}".format(start, stop))
        return self.dataframe \
            .loc[df['age'] >= start] \
            .loc[self.dataframe['age'] <= stop]

    def enrich_segment(self, segment):
        min_age, max_age = segment['age'].min(), segment['age'].max()
        segment['normalized_age'] = segment.apply(lambda row: (max_age - row['age']) / (max_age - min_age), axis=1)
        if min_age != max_age:
            segment['color'] = segment.apply(color_from_percent, axis=1) 

    def plot_legend(self, oldest_date, newest_date):
        red_patch = patches.Patch(color='#A0A0A0', label='Oldest: {}'.format(oldest_date))
        blue_patch = patches.Patch(color='#000000', label='Most Recent: {}'.format(newest_date))
        plt.legend(handles=[red_patch, blue_patch])        
    
    def animate(self, i):
        segment = self.get_segment(i)
        if len(segment) == 0:
            print("null dataframe")
            return  
        
        self.clear()
        self.enrich_segment(segment)
        self.plot_legend(segment['date'].min(), segment['date'].max())
        
        color = self.DEFAULT_COLOR
        if 'color' in segment:
            color = segment['color']
        scatter = self.axes.scatter(segment['minute_mark'], segment['count'], s=1, c=color, marker='o')
        self.configure_axes()
        
        return scatter,
    
    def show(self):
        HTML(self.animation.to_jshtml())        

my_animation = RecordAnimator(df, framerate=600)
my_animation.show()

In [None]:
my_animation.animation.save('count_by_minute.mp4', fps=3, extra_args=['-vcodec', 'libx264'])