#### Stage 2
# Flow Data - Temporal Overview - Anomalies
with matplotlib

#### Generating Temporal Plots of Anomalies
This notebook contains the code for the generation of **radial and horizontal bar charts** from [anomCycledf.csv](https://www.mcloud.de/downloads/ingrid-group_ige-iplug-mcloud/9A101FEC-3502-495A-9D93-BD8329A9D8AC/anomCycledf.csv).
Again, two separate plotting functions for the horizontal and the radial version are written while a third function filters and aggregates the dataset according to the cell filter options and decides which and how many plots to generate.

This notebook incorporates the following changes proposed during the user test:

#### Radial Bar Chart
A small pie chart was added in the middle of the plot, its area representing the total number of movements durng the plotted day. Its area depends on the dimension of the total daily number of anomalies.
The light gray portion represents the average number of movements on the three weekdays taken for reference while a green or charcoal grey portion represents the summed anomalies throughout the day.
The maximum is plotted next to the bar corresponding to it while the day's total number of anomalies is plotted on the top left of each plot.
Plots for two directions have now the same scale, making it easier to see in which direction the numbers are higher.

#### Versions of the used packages:
- pandas: 0.24.2
- numpy: 1.16.4
- matplotlib: 3.2.1
- ipywidgets: 7.5.1

In [1]:
import pandas as pd
# from ast import literal_eval
import matplotlib.pyplot as plt
from math import pi
import numpy as np
import ipywidgets as pyw

In [2]:
def csvtodf_SC(path):
    data = pd.read_csv('data/'+path+'.csv',
                       delimiter=';',
                       skipinitialspace=True,
                       skiprows=0)
    df = pd.DataFrame(data)
    return df;

def csvtodf_C(path):
    data = pd.read_csv('data/'+path+'.csv',
                       delimiter=',',
                       skipinitialspace=True,
                       skiprows=0)
    df = pd.DataFrame(data)
    return df;

## load data
processed in 24.0

In [3]:
anomCycledf = csvtodf_SC('anomCycledf')
anomCycledf.head()

Unnamed: 0,flowID,flowIdDir,from_cell,to_cell,hour,moves,privat,public,Rail,UBahn,...,UBahnMDD,TramMDD,BusMDD,movesAnom,privatAnom,publicAnom,RailAnom,UBahnAnom,TramAnom,BusAnom
0,100_101,1,100,101,18,1.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0
1,100_101,1,100,101,19,1.0,0.0,1.0,1.0,0.0,...,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0
2,100_102,-1,102,100,10,1.0,0.0,1.0,1.0,0.0,...,0.0,0.0,0.0,-0.5,-1.5,1.0,1.0,0.0,0.0,0.0
3,100_102,-1,102,100,11,2.0,2.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0
4,100_102,-1,102,100,14,1.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0


## set constant parameters

In [4]:
fromCellsStr = str(anomCycledf.from_cell.unique().copy().tolist()).strip('[]')
toCellsStr = str(anomCycledf.to_cell.unique().copy().tolist()).strip('[]')

modes = ['moves', 'privat', 'public', 'Rail', 'UBahn', 'Tram', 'Bus']
modesAnom = ['']*7
for i in range(0,7):
    modesAnom[i] = modes[i]+'Anom'
modesMDD = ['']*7
for i in range(0,7):
    modesMDD[i] = modes[i]+'MDD'

modesColor = {'privat':'#999999',#'#BFBFBF',
              'Rail':'#4daf4a',
              'UBahn':'#377eb8',
              'Tram':'#e41a1c',
              'Bus':'#984ea3',
              'public':'#ff7f00',
              'moves':'#a65628'}
PosNegCol = {True:'#1a9641',
             False:'#404040'}
# including ColorBrewer Colors from: https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=9

d = {}
modesAggMean = dict((key, ['mean']) for key in modes+modesMDD+modesAnom)
modesAggSum = dict((key, ['sum']) for key in modes+modesMDD+modesAnom)
d.update(modesAggSum)

hours = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]
ticks = hours.copy()
for i in range(0,24):
    ticks[i] = hours[i]*(360/24)*pi/180

# for horizontal plotting:
def moveMidnight2List(lst):
    lst.append(lst.pop(0))
    lst.append(lst.pop(0))
    return lst;
hours2 = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 1, 2]
ticksPos = [-.5,.5,1.5,2.5,3.5,4.5,5.5,6.5,7.5,8.5,9.5,10.5,11.5,12.5,13.5,14.5,15.5,16.5,17.5,18.5,19.5,20.5,21.5,22.5,23.5]

## plotting function for circular plot
function to fill ax circular

In [5]:
def plotAnomCycle(plotXdf, axX, mode, biggerMax, offset=0):
    modeMDD = mode+'MDD'
    modeAnom = mode+'Anom'
    
    valuesXmdd = list(plotXdf[modeMDD])
    valuesXanom = list(plotXdf[modeAnom])
    signs = np.sign(plotXdf[modeAnom])
    # maxX = max([plotXdf[modeMDD].max(), plotXdf[mode].max()])
    minAnom = plotXdf[modeAnom].min()
    maxAnomSign = plotXdf[modeAnom].max()
    maxAnom = max(-minAnom,maxAnomSign)
    minAnom = maxAnom if (maxAnom > (-minAnom)) else minAnom
    maxX = biggerMax
    baseX = .6*maxX
    
    hoursX = plotXdf.hour
    angles = list(hoursX*(360/24)*pi/180+(pi/24))
    
    axX.set_theta_direction(-1)
    axX.set_theta_offset(-pi)
    # axX.set_rlabel_position(90)
    axX.set_rlabel_position((plotXdf.iloc[plotXdf[modeAnom].idxmax()].hour+0.5)*(360/24))
    axX.yaxis.grid(linestyle = (0,(1,5)))
    axX.spines['polar'].set_visible(False)
    axX.set_rlim([-baseX,maxX])
    axX.set_xticks(ticks)
    axX.set_xticklabels(hours, color='grey', size=8)
    axX.set_yticks([1.25*maxX, maxX, .75*maxX, .5*maxX, .25*maxX, ])

    anomColor = PosNegCol[True] if maxAnomSign>0 else PosNegCol[False]
    if (maxAnom>1000):
        axX.set_yticklabels([str(np.round(minAnom/1000,decimals=1))+' Tsd','','','',''], color=anomColor)
    else:
        axX.set_yticklabels([str(np.int(minAnom)),'','','',''], color=anomColor)
    axX.add_artist(plt.Circle((0, 0), baseX, transform=axX.transData._b,
                             color='#FFFFFF', alpha=1, zorder=5))
    axX.add_artist(plt.Circle((0, 0), .95*baseX, transform=axX.transData._b,
                             fill=False, edgecolor=modesColor[mode], linewidth=5, alpha=1, zorder=10))
    axX.add_artist(plt.Circle((0, 0), maxX+baseX, transform=axX.transData._b,
                             fill=False, edgecolor='#777777', linewidth=1.5, alpha=1, zorder=10))
    axX.bar(angles, valuesXmdd, width=pi/14, color='#BFBFBF', alpha=1, zorder=10)
    axX.bar(angles, valuesXanom, width=pi/19, color=(plotXdf[modeAnom] > 0).map(PosNegCol), bottom = valuesXmdd, alpha=1, zorder=20)
    
    # add piechart in the middle representing the day's total amount of moves and anomalies
    totalMode = plotXdf[mode].sum()
    totalAnom = plotXdf[modeAnom].sum()
    anomColor = PosNegCol[True] if totalAnom>0 else PosNegCol[False]
    totalAnom = np.abs(totalAnom)
    angleAnom = (360/totalMode)*totalAnom*pi/180
    totalModeDim = len(str(int(totalMode)))
    totalAnomDim = len(str(int(np.abs(totalAnom))))
    # circle for mode
    axX.add_artist(plt.Circle((0,0), 0.8*(baseX/np.float_power(7,1.4))*np.float_power(totalModeDim,1.4), transform=axX.transData._b,
                              fill=True, color='#BFBFBF', alpha=1, zorder=20))
    # bar for anoms
    axX.bar(pi/2, 0.8*(baseX/np.float_power(7,1.4))*np.float_power(totalModeDim,1.4),
            width=angleAnom, alpha=1, color=anomColor, bottom=-baseX, zorder=30)
    
    if (np.abs(totalAnom) > 1000):
        plt.gcf().text(0.11+offset, 0.85, 'total Anomalies '+str(np.round(totalAnom/1000,decimals=1))+' Tsd', color=anomColor)
    else:
        plt.gcf().text(0.11+offset, 0.85, 'total Anomalies '+str(np.int(totalAnom)), color=anomColor)
    return;

## plotting function for horizontal plot
function to fill ax horizontal

In [6]:
def plotAnomHorizontal(ax, plotXdf, plotYdf, mode, double=True):    
    modeMDD = mode+'MDD'
    modeAnom = mode+'Anom'
    
    valuesX = moveMidnight2List(list(plotXdf[mode]))
    valuesXmdd = moveMidnight2List(list(plotXdf[modeMDD]))
    valuesXanom = moveMidnight2List(list(plotXdf[modeAnom]))
    signs = np.sign(plotXdf[modeAnom])
    maxX = max(max(valuesXmdd), max(valuesX))
    maxXY = maxX
    base = -.08*maxXY

    negticks = []
    negticklabels = []
    if (double):
        valuesY = moveMidnight2List(list(-plotYdf[mode]))
        valuesYmdd = moveMidnight2List(list(-plotYdf[modeMDD]))
        valuesYanom = moveMidnight2List(list(-plotYdf[modeAnom]))
        signs = np.sign(plotYdf[modeAnom])
        maxY = min(min(valuesYmdd), min(valuesY))
        maxXY = min(maxX,-maxY)
        base = -.08*maxXY
        negticks = [base, base+.25*maxY, base+.5*maxY, base+.75*maxY, base+maxY]
        if (maxY<-1000):
            negticklabels = [0,'','','',str(np.round(-maxY/1000,decimals=1))+' Tsd']
        else:
            negticklabels = [0,'','','',str(np.int(-maxY))]

    hoursX = plotXdf.hour
    hoursY = plotYdf.hour
    
    ax.yaxis.grid(linestyle = (0,(1,5)))
    for spine in ['left','right','top','bottom']:
        ax.spines[spine].set_visible(False)
    ax.spines['bottom'].set_position('zero')
    
    plt.xticks(ticksPos, hours2, color='#FFFFFF', size=8, zorder=1)#'#777777'
    plt.tick_params(axis='both', which='both', length=0)
    
    width=0.9
    alpha=1 
    
    ax.set_yticks([maxX, .75*maxX, .5*maxX, .25*maxX, 0]+negticks)
    ax.set_yticklabels([str(int(maxX/1000))+' Tsd','','','',0]+negticklabels, color=modesColor['Tram'])
    ax.spines['bottom'].set_linestyle((0,(1,5)))
    ax.spines['bottom'].set_color('#777777')
    
    if (maxX>1000):
        ax.set_yticklabels([str(np.round(maxX/1000,decimals=1))+' Tsd','','','',0]+negticklabels, color='#1a9641')
    else:
        ax.set_yticklabels([str(np.int(maxX)),'','','',0]+negticklabels, color='#1a9641')
    
    ax.bar(list(hoursX), valuesXmdd, width=width, color='#BFBFBF', alpha=1, zorder=10)
    ax.bar(list(hoursX), valuesXanom, width=.8*width, color=(pd.DataFrame(valuesXanom)[0] > 0).map(PosNegCol), bottom = valuesXmdd, alpha=1, zorder=20)
    plt.axhline(y=maxX, color='#1a9641', linewidth=1)
    
    if (double):
        bottoms = [base]*len(valuesYmdd)
        for i in range(0,len(valuesYmdd)):
            bottoms[i] += valuesYmdd[i]
        ax.bar(list(hoursY), valuesYmdd, width=width, color='#BFBFBF', alpha=1, zorder=10, bottom=base)
        ax.bar(list(hoursY), valuesYanom, width=.8*width, color=(pd.DataFrame(valuesYanom)[0] < 0).map(PosNegCol), bottom = bottoms, alpha=1, zorder=20)
        plt.axhline(y=maxY+base, color='#1a9641', linewidth=1)
        plt.axhline(y=.5*base, color=modesColor[mode], linewidth=12, zorder=0)
    else:
        plt.axhline(y=.5*base, color=modesColor[mode], linewidth=10, zorder=0)
    return;

## function to choose between circular and horizontal
function to extract data and plot one or two circular plots or a horizontal one

In [7]:
def plotAnomaliesOnNormalsCycle (fromCells='', toCells='', mode='moves', circular=True):
    # initialize plot
    fig = plt.figure(figsize=(16,8))
        
    if(fromCells): fromCells = list(map(int, fromCells.split(',')))
    if(toCells): toCells = list(map(int, toCells.split(',')))
    
    # aggregate cycledf according to filter choices for fromCells and toCells.
    # mean between days and dayTypes, sum over fromCells and toCells.
    d.update(modesAggSum)
    if (fromCells):
        if (toCells): # extract 2 dfs with flows from fromCells to toCells and vice versa
            # sum over flowIDs and directions for each hour within each df
            plotAdf = anomCycledf[(anomCycledf.from_cell.isin(fromCells))&
                                  (anomCycledf.to_cell.isin(toCells))].groupby(['hour']).agg(d).copy().reset_index()
            plotAdf.columns = plotAdf.columns.get_level_values(0)
            plotBdf = anomCycledf[(anomCycledf.from_cell.isin(toCells))&
                                  (anomCycledf.to_cell.isin(fromCells))].groupby(['hour']).agg(d).copy().reset_index()
            plotBdf.columns = plotBdf.columns.get_level_values(0)
        else: # extract 2 dfs with flows from fromCells to everywhere and vice versa
            plotAdf = anomCycledf[(anomCycledf.from_cell.isin(fromCells))].groupby(['hour']).agg(d).copy().reset_index()
            plotAdf.columns = plotAdf.columns.get_level_values(0)
            plotBdf = anomCycledf[(anomCycledf.to_cell.isin(fromCells))].groupby(['hour']).agg(d).copy().reset_index()
            plotBdf.columns = plotBdf.columns.get_level_values(0)
        
        modeMDD = mode+'MDD'
        maxA = max([plotAdf[modeMDD].max(), plotAdf[mode].max()])
        maxB = max([plotBdf[modeMDD].max(), plotBdf[mode].max()])
        biggerMax = max(maxA, maxB)

        if (circular):
            ax1 = fig.add_subplot(121, projection='polar')
            plotAnomCycle(plotAdf, ax1, mode, biggerMax)
            ax2 = fig.add_subplot(122, projection='polar')
            plotAnomCycle(plotBdf, ax2, mode, biggerMax, 0.42)
        else:
            ax = fig.add_subplot(111)
            plotAnomHorizontal(ax, plotAdf, plotBdf, mode)
        
    elif (toCells): # extract 2 dfs with flows to toCells from everywhere and vice versa
        plotAdf = anomCycledf[(anomCycledf.to_cell.isin(toCells))].groupby(['hour']).agg(d).copy().reset_index()
        plotAdf.columns = plotAdf.columns.get_level_values(0)
        plotBdf = anomCycledf[(anomCycledf.from_cell.isin(toCells))].groupby(['hour']).agg(d).copy().reset_index()
        plotBdf.columns = plotBdf.columns.get_level_values(0)
        
        modeMDD = mode+'MDD'
        maxA = max([plotAdf[modeMDD].max(), plotAdf[mode].max()])
        maxB = max([plotBdf[modeMDD].max(), plotBdf[mode].max()])
        biggerMax = max(maxA, maxB)
        
        if (circular):
            ax1 = fig.add_subplot(121, projection='polar')
            plotAnomCycle(plotAdf, ax1, mode, biggerMax)
            ax2 = fig.add_subplot(122, projection='polar')
            plotAnomCycle(plotBdf, ax2, mode, biggerMax, 0.42)
        else:
            ax = fig.add_subplot(111)
            plotAnomHorizontal(ax, plotAdf, plotBdf, mode)
        
    else: # extraxt one df with all flows
        # sum over flowIDs and directions for each hour
        plotABdf = anomCycledf.groupby(['hour']).agg(d).copy().reset_index()
        plotABdf.columns = plotABdf.columns.get_level_values(0)
        
        modeMDD = mode+'MDD'
        maxAB = max([plotABdf[modeMDD].max(), plotABdf[mode].max()])
        
        if (circular):
            ax1 = fig.add_subplot(121, projection='polar')
            plotAnomCycle(plotABdf, ax1, mode, maxAB)
        else:
            ax = fig.add_subplot(211)
            plotAnomHorizontal(ax, plotABdf, plotABdf, mode, False)
        
    return;

In [8]:
pyw.interact_manual(plotAnomaliesOnNormalsCycle, fromCells='', toCells='', mode=modes)

interactive(children=(Text(value='', description='fromCells'), Text(value='', description='toCells'), Dropdown…

<function __main__.plotAnomaliesOnNormalsCycle(fromCells='', toCells='', mode='moves', circular=True)>