# Animation

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from folium.plugins import HeatMap
import folium
from folium.features import DivIcon
import datetime
import warnings
import os
import shutil
import os
import time
import imageio
import branca
import urllib.request
from selenium import webdriver
from IPython.display import Image
from branca.utilities import split_six
warnings.filterwarnings('ignore')
%matplotlib inline

In [2]:
records=pd.read_csv('random_sample_data_MQ.csv',iterator=True,chunksize=10**5,low_memory=False)
mylist=[]
for chunk in records:
    mylist.append(chunk[['Type','StartTime(UTC)','City','LocalTimeZone','StartPoint_Lat','StartPoint_Lng','ZipCode','County','State']])
records=pd.concat(mylist)

# Show the time correctly.
records['StartTime(UTC)']=pd.to_datetime(records['StartTime(UTC)'])
records=records[records['LocalTimeZone'].notnull()]
dic={'EDT':datetime.timedelta(hours=-4), 'EST':datetime.timedelta(hours=-5),
     'CDT':datetime.timedelta(hours=-5), 'CST':datetime.timedelta(hours=-6), 
     'MDT':datetime.timedelta(hours=-6), 'MST':datetime.timedelta(hours=-7),
     'PDT':datetime.timedelta(hours=-7), 'PST':datetime.timedelta(hours=-8)}
records['StartTime(UTC)']=records['StartTime(UTC)']+records['LocalTimeZone'].map(lambda x:dic[x])
start=datetime.datetime(2016,2,1)
records=records[(records['StartTime(UTC)']>=start)]

# Transfer the form of zipcode.
records['ZipCode']=records['ZipCode'].map(lambda x:str(x).split('-')[0] if '-' in str(x) else x)

location=records[['StartPoint_Lat','StartPoint_Lng']].mean().values.tolist()

In [3]:
kinds=records.Type.unique()


## heatmap

In [78]:
# example
Image(url='Traffic-Event-Distrubution/rst/heatmap/overall heatmap for diffrent monthes.gif')   

In [4]:
def generate_gif(title,group):
    PATH_NAME='temp/'
    images = []
    filenames = sorted([fn for fn in os.listdir(PATH_NAME) if fn.endswith('.png')],
                       key=lambda x:int(x[-6:-4]) if x[-6].isdigit() else int(x[-5]))
    
    for filename in filenames:
        images.append(imageio.imread(PATH_NAME + filename))
        if not os.path.isdir('rst'):
            os.mkdir('rst')
        if not os.path.isdir('rst/{}'.format(group)):
            os.mkdir('rst/{}'.format(group))
        imageio.mimsave('rst/{path}/{title}.gif'.format(path=group,title=title), images, duration=0.5,loop=5)

In [5]:

def heatmap(matrix,max_val,title):
    result = folium.Map(location=location,zoom_start = 4)
    HeatMap(matrix,min_opacity=0.45,max_val=max_val,radius=13,blur=15,max_zoom=2).add_to(result)
    folium.map.Marker(
        [location[0]+15,location[1]+30],
        icon=DivIcon(
            icon_size=(150,36),
            icon_anchor=(0,0),
            html='<div style="font-size: 24pt">{}</div>'.format(title),
            )
        ).add_to(result)
    result.save('temp/{}.html'.format(title))
    
def serial_heatmap(df,ptr,title):
    if ptr=='month':
        df['group']=df['StartTime(UTC)'].map(lambda x:x.month)
    elif ptr=='hour':
        df['group']=df['StartTime(UTC)'].map(lambda x:x.hour)
    elif ptr=='day':
        df['group']=df['StartTime(UTC)'].map(lambda x:x.isoweekday())
    groups=df.groupby('group')
    matrixs=dict()
    max_val=0
    for name,group in groups:
        zip_pos=group.groupby('ZipCode')['StartPoint_Lat','StartPoint_Lng'].mean()
        zip_pos['count']=group.groupby('ZipCode')['Type'].count()
        matrixs[name]=zip_pos.values.tolist()
        max_val=max(max_val,zip_pos['count'].max())
    PATH_NAME='temp'
    try:
        shutil.rmtree(PATH_NAME)
    except FileNotFoundError:pass
    os.makedirs(PATH_NAME)
    for name in matrixs:
        heatmap(matrixs[name],max_val,'{type}-{order}'.format(type=ptr,order=name))
    browser = webdriver.Firefox(executable_path=r'C:\Program Files\geckodriver.exe')
    for name in matrixs:
        tmpurl='file://{path}/{mapfile}'.format(path=os.getcwd(),mapfile='temp/{type}-{order}.html'.format(type=ptr,order=name))
        browser.get(tmpurl)
        time.sleep(0.2)
        browser.save_screenshot('temp/{type}-{order}.png'.format(type=ptr,order=name))
    browser.quit()
    generate_gif(title,'heatmap')
    shutil.rmtree(PATH_NAME)



In [41]:
serial_heatmap(records,'month','overall heatmap for diffrent monthes')
serial_heatmap(records,'hour','overall heatmap for diffrent hours')
serial_heatmap(records,'day','overall heatmap for diffrent day of a week')

In [7]:
kinds=records.Type.unique()
for kind in kinds:
    serial_heatmap(records[records['Type']==kind],'month','{}\'s heatmap for different monthes'.format(kind.replace('/','&')))
    serial_heatmap(records[records['Type']==kind],'hour','{}\'s heatmap for different hours'.format(kind.replace('/','&')))
    serial_heatmap(records[records['Type']==kind],'day','{}\'s heatmap for different day of a week'.format(kind.replace('/','&')))    

### regulated heatmap

In [6]:
def serial_regulated_heatmap(df,ptr,kind,title):
    location=df[['StartPoint_Lat','StartPoint_Lng']].mean().values.tolist()
    if ptr=='month':
        df['group']=df['StartTime(UTC)'].map(lambda x:x.month)
    elif ptr=='hour':
        df['group']=df['StartTime(UTC)'].map(lambda x:x.hour)
    elif ptr=='day':
        df['group']=df['StartTime(UTC)'].map(lambda x:x.isoweekday())
    groups=df.groupby('group')
    matrixs=dict()
    max_val=0
    for name,group in groups:
        temp=pd.crosstab(group.ZipCode,group.Type,margins=True)
        temp[kind]/=temp.All
        zip_pos=group.groupby('ZipCode')['StartPoint_Lat','StartPoint_Lng'].mean()
        zip_pos['ratio']=temp[kind]
        matrixs[name]=zip_pos.values.tolist()
        max_val=max(max_val,zip_pos['ratio'].max())
    PATH_NAME='temp'
    try:
        shutil.rmtree(PATH_NAME)
    except FileNotFoundError:pass
    os.makedirs(PATH_NAME)
    for name in matrixs:
        heatmap(matrixs[name],max_val,'{type}-{order}'.format(type=ptr,order=name))
    browser = webdriver.Firefox(executable_path=r'C:\Program Files\geckodriver.exe')
    for name in matrixs:
        tmpurl='file://{path}/{mapfile}'.format(path=os.getcwd(),mapfile='temp/{type}-{order}.html'.format(type=ptr,order=name))
        browser.get(tmpurl)
        time.sleep(0.2)
        browser.save_screenshot('temp/{type}-{order}.png'.format(type=ptr,order=name))
    browser.quit()
    generate_gif(title,'heatmap/regulated')
    shutil.rmtree(PATH_NAME)

In [11]:
for kind in kinds:
    serial_regulated_heatmap(records,'month',kind,'{}\'s regulated heatmap for different monthes'.format(kind.replace('/','&')))
    serial_regulated_heatmap(records,'hour',kind,'{}\'s regulated heatmap for different hours'.format(kind.replace('/','&')))
    serial_regulated_heatmap(records,'day',kind,'{}\'s regulated heatmap for different day of a week'.format(kind.replace('/','&')))    

## choropleth map for states

In [79]:
# example
Image(url='Traffic-Event-Distrubution/rst/state_choropleth/overall state choropleth for different monthes.gif')   

In [19]:
def state_choropleth(df,legend_name,title,scale):
    state_geo ='https://raw.githubusercontent.com/python-visualization/folium/master/examples/data/us-states.json'
    result = folium.Map(location=location, zoom_start=4)
    result.choropleth(
        geo_data=state_geo,
        name='choropleth',
        data=df,
        columns=list(df.columns),
        key_on='feature.id',
        fill_color='YlGn',
        fill_opacity=0.7,
        line_opacity=0.2,
        legend_name=legend_name,
        threshold_scale=scale
    )
    folium.map.Marker(
    [location[0]+15,location[1]+30],
    icon=DivIcon(
        icon_size=(150,36),
        icon_anchor=(0,0),
        html='<div style="font-size: 24pt"><h1>{title}<h1><h2>scale=<h2><h2>{scale}<h2></div>'.format(title=title,scale=str(scale)),
        )
    ).add_to(result)
    result.save('temp/{}.html'.format(title))

In [20]:
def serial_state(df,ptr,title):
    if ptr=='month':
        df['group']=df['StartTime(UTC)'].map(lambda x:x.month)
    elif ptr=='hour':
        df['group']=df['StartTime(UTC)'].map(lambda x:x.hour)
    elif ptr=='day':
        df['group']=df['StartTime(UTC)'].map(lambda x:x.isoweekday())
    groups=df.groupby('group')
    matrixs=dict()
    maxval=0
    for name,group in groups:
        matrixs[name]=group.groupby('State')['Type'].count().reset_index(inplace=False)
        a=np.array(split_six(matrixs[name]['Type'])) if 'a' not in locals() else (a+np.array(split_six(matrixs[name]['Type'])))
        maxval=max(maxval,max(matrixs[name]['Type']))
    a/=len(matrixs)
    a[0]=0
    scale=a.astype(int).tolist()+[maxval+1]
    PATH_NAME='temp'
    try:
        shutil.rmtree(PATH_NAME)
    except FileNotFoundError:pass
    os.makedirs(PATH_NAME)
    for name in matrixs:
        state_choropleth(matrixs[name],'the frequency of states','{type}-{order}'.format(type=ptr,order=name),scale)
    browser = webdriver.Firefox(executable_path=r'C:\Program Files\geckodriver.exe')
    for name in matrixs:
        tmpurl='file://{path}/{mapfile}'.format(path=os.getcwd(),mapfile='temp/{type}-{order}.html'.format(type=ptr,order=name))
        browser.get(tmpurl)
        time.sleep(0.2)
        browser.save_screenshot('temp/{type}-{order}.png'.format(type=ptr,order=name))
        os.remove('temp/{type}-{order}.html'.format(type=ptr,order=name))
    browser.quit()
    generate_gif(title,'state_choropleth')
    shutil.rmtree(PATH_NAME)

In [22]:
serial_state(records,'month','overall state choropleth for different monthes')
serial_state(records,'hour','overall state choropleth for different hours')
serial_state(records,'day','overall state choropleth for different day of a week')

In [23]:
for kind in kinds:
    serial_state(records[records['Type']==kind],'month','{}\'s state choropleth for different monthes'.format(kind.replace('/','&')))
    serial_state(records[records['Type']==kind],'hour','{}\'s state choropleth for different hours'.format(kind.replace('/','&')))
    serial_state(records[records['Type']==kind],'day','{}\'s state choropleth for different day of a week'.format(kind.replace('/','&')))    

### regulated state choropleth map

In [24]:
def serial_regulated_state(df,ptr,kind,title):
    if ptr=='month':
        df['group']=df['StartTime(UTC)'].map(lambda x:x.month)
    elif ptr=='hour':
        df['group']=df['StartTime(UTC)'].map(lambda x:x.hour)
    elif ptr=='day':
        df['group']=df['StartTime(UTC)'].map(lambda x:x.isoweekday())
    groups=df.groupby('group')
    matrixs=dict()
    for name,group in groups:        
        temp=pd.crosstab(group.State,group.Type,margins=True)
        temp[kind]/=temp.All
        matrixs[name]=temp[kind].reset_index(inplace=False)
        a=np.array(split_six(matrixs[name][kind])) if 'a' not in locals() else (a+np.array(split_six(matrixs[name][kind])))
    a/=len(matrixs)
    a[0]=0
    scale=a.round(3).tolist()+[1.001]

    PATH_NAME='temp'
    try:
        shutil.rmtree(PATH_NAME)
    except FileNotFoundError:pass
    os.makedirs(PATH_NAME)
    for name in matrixs:
        state_choropleth(matrixs[name],'the frequency of states','{type}-{order}'.format(type=ptr,order=name),scale)

    browser = webdriver.Firefox(executable_path=r'C:\Program Files\geckodriver.exe')
    for name in matrixs:
        tmpurl='file://{path}/{mapfile}'.format(path=os.getcwd(),mapfile='temp/{type}-{order}.html'.format(type=ptr,order=name))
        browser.get(tmpurl)
        time.sleep(0.2)
        browser.save_screenshot('temp/{type}-{order}.png'.format(type=ptr,order=name))
        os.remove('temp/{type}-{order}.html'.format(type=ptr,order=name))
    browser.quit()
    generate_gif(title,'state_choropleth/regulated')
    shutil.rmtree(PATH_NAME)

In [25]:
for kind in kinds:
    serial_regulated_state(records,'month',kind,'{}\'s regulated state choropleth for different monthes'.format(kind.replace('/','&')))
    serial_regulated_state(records,'hour',kind,'{}\'s regulated state choropleth for different hours'.format(kind.replace('/','&')))
    serial_regulated_state(records,'day',kind,'{}\'s regulated state choropleth for different day of a week'.format(kind.replace('/','&')))    

## choropleth map for counties

In [80]:
# example
Image(url='Traffic-Event-Distrubution/rst/county_choropleth/overall county choropleth for different monthes.gif')   

In [51]:
colorscale = branca.colormap.linear.YlGnBu_09.scale(0, 30)
def county_choropleth(df,title):
    def style_function(feature):
        value = df.get(int(feature['id'][-5:]), None)
        return {
            'fillOpacity': 0.5,
            'weight': 0,
            'fillColor': '#black' if value is None else colorscale(value)
        }
    temp=urllib.request.urlopen('https://raw.githubusercontent.com/python-visualization/folium/master/examples/data/us_counties_20m_topo.json')
    rst=folium.Map(location=location,tiles='cartodbpositron',zoom_start=3)
    folium.TopoJson(temp,'objects.us_counties_20m',style_function=style_function).add_to(rst)
    folium.map.Marker(
    [location[0]+15,location[1]+30],
    icon=DivIcon(
        icon_size=(150,36),
        icon_anchor=(0,0),
        html='<div style="font-size: 24pt">{}</div>'.format(title)
        )
    ).add_to(rst)
    rst.save('temp/{}.html'.format(title))
def serial_county(df,ptr,title):
    if ptr=='month':
        df['group']=df['StartTime(UTC)'].map(lambda x:x.month)
    elif ptr=='hour':
        df['group']=df['StartTime(UTC)'].map(lambda x:x.hour)
    elif ptr=='day':
        df['group']=df['StartTime(UTC)'].map(lambda x:x.isoweekday())
    groups=df.groupby('group')
    matrixs=dict()
    cmp=pd.read_csv('https://raw.githubusercontent.com/python-visualization/folium/master/tests/us_county_data.csv')
    #     edit the fips code
    cmp['County']=cmp['Area_name']
    cmp['County']=cmp.County.map(lambda x:x.split(' ')[0])
    a=[]
    for name,group in groups:
        temp=pd.merge(cmp,group.groupby(['County','State'])['Type'].count().reset_index(inplace=False),how='left',on=['County','State']).fillna(0)
        matrixs[name]=temp.set_index('FIPS_Code')['Type']
        a.append(np.percentile(matrixs[name].values,80))
    colorscale = branca.colormap.linear.YlGnBu_09.scale(0, np.mean(np.array(a)))
    PATH_NAME='temp'
    try:
        shutil.rmtree(PATH_NAME)
    except FileNotFoundError:pass
    os.makedirs(PATH_NAME)
    for name in matrixs:
        county_choropleth(matrixs[name],'{type}-{order}'.format(type=ptr,order=name))
    browser = webdriver.Firefox(executable_path=r'C:\Program Files\geckodriver.exe')
    for name in matrixs:
        tmpurl='file://{path}/{mapfile}'.format(path=os.getcwd(),mapfile='temp/{type}-{order}.html'.format(type=ptr,order=name))
        browser.get(tmpurl)
        time.sleep(0.2)
        browser.save_screenshot('temp/{type}-{order}.png'.format(type=ptr,order=name))
        os.remove('temp/{type}-{order}.html'.format(type=ptr,order=name))
    browser.quit()
    generate_gif(title,'county_choropleth')
    shutil.rmtree(PATH_NAME)

In [52]:
serial_county(records,'month','overall county choropleth for different monthes')
serial_county(records,'hour','overall county choropleth for different hours')
serial_county(records,'day','overall county choropleth for different day of a week')

In [53]:
for kind in kinds:
    serial_county(records[records['Type']==kind],'month','{}\'s county choropleth for different monthes'.format(kind.replace('/','&')))
    serial_county(records[records['Type']==kind],'hour','{}\'s county choropleth for different hours'.format(kind.replace('/','&')))
    serial_county(records[records['Type']==kind],'day','{}\'s county choropleth for different day of a week'.format(kind.replace('/','&')))    

### regulated county choropleth map

In [63]:
colorscale = branca.colormap.linear.YlGnBu_09.scale(0, 1)

def serial_regulated_county(df,ptr,kind,title):
    if ptr=='month':
        df['group']=df['StartTime(UTC)'].map(lambda x:x.month)
    elif ptr=='hour':
        df['group']=df['StartTime(UTC)'].map(lambda x:x.hour)
    elif ptr=='day':
        df['group']=df['StartTime(UTC)'].map(lambda x:x.isoweekday())
    groups=df.groupby('group')
    matrixs=dict()
    cmp=pd.read_csv('https://raw.githubusercontent.com/python-visualization/folium/master/tests/us_county_data.csv')
    #     edit the fips code
    cmp['County']=cmp['Area_name']
    cmp['County']=cmp.County.map(lambda x:x.split(' ')[0])
    for name,group in groups:
        temp=pd.crosstab([group['County'],group['State']],group['Type'],margins=True)
        temp[kind]/=temp['All']
        temp=pd.merge(cmp,temp[kind].reset_index(inplace=False),how='left',on=['County','State']).fillna(0)
        matrixs[name]=temp.set_index('FIPS_Code')[kind]
    PATH_NAME='temp'
    try:
        shutil.rmtree(PATH_NAME)
    except FileNotFoundError:pass
    os.makedirs(PATH_NAME)
    for name in matrixs:
        county_choropleth(matrixs[name],'{type}-{order}'.format(type=ptr,order=name))
    browser = webdriver.Firefox(executable_path=r'C:\Program Files\geckodriver.exe')
    for name in matrixs:
        tmpurl='file://{path}/{mapfile}'.format(path=os.getcwd(),mapfile='temp/{type}-{order}.html'.format(type=ptr,order=name))
        browser.get(tmpurl)
        time.sleep(0.2)
        browser.save_screenshot('temp/{type}-{order}.png'.format(type=ptr,order=name))
        os.remove('temp/{type}-{order}.html'.format(type=ptr,order=name))
    browser.quit()
    generate_gif(title,'county_choropleth/regulated')
    shutil.rmtree(PATH_NAME)

In [64]:
for kind in kinds:
    serial_regulated_county(records,'month',kind,'{}\'s regulated county choropleth for different monthes'.format(kind.replace('/','&')))
    serial_regulated_county(records,'hour',kind,'{}\'s regulated county choropleth for different hours'.format(kind.replace('/','&')))
    serial_regulated_county(records,'day',kind,'{}\'s regulated county choropleth for different day of a week'.format(kind.replace('/','&')))    

## bubble map

In [82]:
# example
Image(url='https://github.com/mingxinlu/Traffic-Event-Distrubution/blob/master/overall%20bubblemap%20for%20different%20monthes.gif')   
# If you can't see the example, you could download it from the first page of the github or open with the url.

Due to that folium wastes too much time on the construction of the image,(iplot is a limited modules which couldn't provide enough quota), I will try to find a better module to construct this kind of map in future. This is two totally workable code with my test. But I can't provide the final gif because it take too much times and have the risk of collapse of the module browser due to the limited delay time.

In [74]:
def bubblemap(df,value,title,scale):
    rst = folium.Map(location=location, tiles="Mapbox Bright", zoom_start=5)
    def color(i):
        if i<5:return 'crimson'
        if i<20:return '#8B00FF'
        if i<50:return '#FFE5B4'
        if i<100:return '#81D8D0'
        return'#C0C0C0'
    
    for i in range(len(df)-1,-1,-1):
        folium.Circle(
        location=df[i],
        radius=value[i]*scale,
        color=color(i),
        fill=True,
        fill_color=color(i)).add_to(rst)
        folium.map.Marker(
            [location[0]+15,location[1]+30],
            icon=DivIcon(
                icon_size=(150,36),
                icon_anchor=(0,0),
                html='<div style="font-size: 24pt">{}</div>'.format(title)
                )
            ).add_to(rst)
    rst.save('temp/{}.html'.format(title))
def serial_bubblemap(df,ptr,title):
    if ptr=='month':
        df['group']=df['StartTime(UTC)'].map(lambda x:x.month)
    elif ptr=='hour':
        df['group']=df['StartTime(UTC)'].map(lambda x:x.hour)
    elif ptr=='day':
        df['group']=df['StartTime(UTC)'].map(lambda x:x.isoweekday())
    groups=df.groupby('group')
    matrixs=dict()
    for name,group in groups:
        city_pos=group.groupby('City')['StartPoint_Lat','StartPoint_Lng'].mean()
        city_pos['count']=group.groupby('City')['Type'].count()
        city_pos=city_pos.sort_values(by='count',ascending=False)
        matrixs[name]=[]
        matrixs[name].append(city_pos[['StartPoint_Lat','StartPoint_Lng']].values.tolist())
        matrixs[name].append(city_pos['count'].values.tolist())
    
    scale=(5000*70)/max(max(matrixs[name][1])for name in matrixs)
    PATH_NAME='temp'
    try:
        shutil.rmtree(PATH_NAME)
    except FileNotFoundError:pass
    os.makedirs(PATH_NAME)
    for name in matrixs:
        bubblemap(matrixs[name][0],matrixs[name][1],'{type}-{order}'.format(type=ptr,order=name),scale)

    browser = webdriver.Firefox(executable_path=r'C:\Program Files\geckodriver.exe')
    for name in matrixs:
        tmpurl='file://{path}/{mapfile}'.format(path=os.getcwd(),mapfile='temp/{type}-{order}.html'.format(type=ptr,order=name))
        browser.get(tmpurl)
        time.sleep(0.2)
        browser.save_screenshot('temp/{type}-{order}.png'.format(type=ptr,order=name))
    browser.quit()
    generate_gif(title,'bubble_map')
    os.rmdir(PATH_NAME)

In [None]:
def regulated_bubblemap(df,ptr,kind,title):
    if ptr=='month':
        df['group']=df['StartTime(UTC)'].map(lambda x:x.hour)
    elif ptr=='hour':
        df['group']=df['StartTime(UTC)'].map(lambda x:x.month)
    elif ptr=='day':
        df['group']=df['StartTime(UTC)'].map(lambda x:x.isoweekday())
    groups=df.groupby('group')
    matrixs=dict()
    for name,group in groups:
        city_pos=group.groupby('City')['StartPoint_Lat','StartPoint_Lng'].mean()
        temp=pd.crosstab(group.City,group.Type,margins=True)
        temp[kind]/=temp['All']
        city_pos['value']=temp[kind]
        city_pos.sort_values(by=['value'])
        matrixs[name]=[]
        matrixs.append(city_pos[['StartPoint_Lat','StartPoint_Lng']].values.tolist())
        matrixs.append(city_pos['value'].values.tolist())
    location=df[['StartPoint_Lat','StartPoint_Lng']].mean().values.tolist()
    PATH_NAME='temp'
    shutil.rmtree(PATH_NAME)
    os.makedirs(PATH_NAME)
    for name in matrixs:
        bubble_map(matrixs[name][0],matrixs[name][1],'{type}-{order}'.format(type=ptr,order=name))
    browser = webdriver.Firefox(executable_path=r'C:\Program Files\geckodriver.exe')
    for name in matrixs:
        tmpurl='file://{path}/{mapfile}'.format(path=os.getcwd(),mapfile='temp/{type}-{order}.html'.format(type=ptr,order=name))
        browser.get(tmpurl)
        time.sleep(0.2)
        browser.save_screenshot('temp/{type}-{order}.png'.format(type=ptr,order=name))
    browser.quit()
    generate_gif(title,'bubble_map/regulated')
    os.rmdir(PATH_NAME)
