# ENSF 592 - FINAL PROJECT
## Calgary Accident Analysis
### By: Mike Lasby

# README
This notebook is the entry point of our project and functions as the View. The notebook depends on the Controller.py class to retrieve views of our data from the Model. 

In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import seaborn as sns
from geojson import Point, MultiLineString
import geopandas as gpdnum_points
import re
import math
from folium_0_12 import folium #using pre relased 0.12 due to heatmap issues 
# SEE:  https://github.com/python-visualization/folium/issues/1271
import matplotlib.pyplot as plt
from controller import Controller

In [2]:
yyc_map = pd.read_csv('City_Boundary_layer.csv')
geom = yyc_map.the_geom[0]
print(geom)
print(type(geom))
lines = re.findall(r'\((.*?)\)', geom)
for line in lines:
    this_line = list(map(float, re.findall(r'[\-?\d\.?]+', line)))
    # print(this_line)


5 51.00864, -114.165544 51.00864, -114.170295 51.008645, -114.176042 51.00865, -114.183384 51.008659, -114.187861 51.008664, -114.189647 51.008665, -114.190674 51.008666, -114.199894 51.008672, -114.203709 51.008672, -114.210787 51.008674, -114.211122 51.008674, -114.222585 51.008697, -114.234373 51.008713, -114.234372 51.008879, -114.234354 51.012808, -114.234354 51.012872, -114.234353 51.013189, -114.234351 51.01344, -114.23434 51.016102, -114.234329 51.01812, -114.234329 51.01827, -114.234325 51.018946, -114.23432 51.019997, -114.234303 51.023322, -114.234305 51.023759, -114.234304 51.024874, -114.234305 51.025656, -114.234305 51.026172, -114.234306 51.026645, -114.234306 51.027106, -114.234306 51.027581, -114.234307 51.028088, -114.234307 51.02866, -114.234307 51.029035, -114.234307 51.029251, -114.234309 51.030558, -114.234383 51.030558, -114.234395 51.036843, -114.234397 51.037709, -114.234318 51.037709, -114.234321 51.037938, -114.234313 51.043473, -114.234309 51.045192, -114.23

In [2]:
ctrl = Controller()
ctrl.load_data()
ctrl.add_geo_cols()
ctrl.add_cell_col()

Loading Data...
...Data Loaded.
Adding geometry column to speeds from multiline. Flip coords? True
Adding geometry column to volumes from multilinestring. Flip coords? True
Adding geometry column to incidents from location. Flip coords? False
Adding geometry column to cameras from None. Flip coords? True
Adding geometry column to signals from Point. Flip coords? True
Adding geometry column to signs from POINT. Flip coords? True
Adding geometry column to cells from cell_bounds. Flip coords? False
Adding cell column to speeds
Adding cell column to volumes
Adding cell column to incidents
Adding cell column to cameras
Adding cell column to signals
Adding cell column to signs


In [3]:
    def generate_map():
        width, height = 960, 600
        ne, sw = ctrl.mdl.get_yyc_bounds()
        print(ne, sw)
        mapa = folium.Map(location=ne, width=width,
                               height=height, toFront=True)

        for cell in ctrl.get_frame('cells')['cells']:
            cell.add_to(mapa)

        rect = folium.Rectangle(bounds=[ne, sw], weight=2, dash_array=(
            "4"), color='red', tooltip='Analysis Boundary').add_to(mapa)

        mapa.save('index.html')

        return(mapa)






In [5]:
#Speed Polylines Map

#GET speed WEIGHTED POLYLINE

import branca.colormap as cm

def get_speed_list(geom, speed, street_name):
    print(geom)
    point_cloud = []
    tooltip = f'{street_name} speed limit: {speed}'
    for lines in geom['coordinates']: 
        for points in lines: 
            lat = float(points[0])
            lon = float(points[1])
            point_cloud.append([[lat, lon], speed, tooltip])
    return point_cloud

def draw_speed_map2(mapa):
    '''get speed map
    @args: speed_frame :pd.DataFrame to map
    @return folium map NOTE: front end will simply render map.html from assets
    '''
    df = ctrl.get_frame('speeds')
    calgary_map = mapa
    color_map = cm.LinearColormap(colors = ['yellow', 'red'], vmin = df['SPEED'].min(), vmax =df['SPEED'].max())

    df['speed_lines'] = df.apply(lambda row: get_speed_list(row['geometry'], row['SPEED'], row['STREET_NAME']), axis = 1)

    df = df.sort_values(by = 'SPEED', ascending = False)

    print(df['multiline'].iloc[0])
    print(df['geometry'].iloc[0])

    display(df)

    for points in df['speed_lines'].tolist():
        locations  = []
        colors = []
        tooltip = points[0][2]
        for point in points: 
            loc = point[0]
            color = point[1]
            tooltip = point[2]
            locations.append(loc)
            colors.append(color)
        # this_line = folium.ColorLine(positions = locations, colors = colors, colormap = color_map, weight = 3, tooltip = tooltip)
        this_line = folium.PolyLine(locations = locations, tooltip = tooltip, color = color_map(color))
        this_line.add_to(mapa)   
    mapa.save('index.html')
    print("map saved")

def rgb_to_hex(rgb):
    '''Method converts (r,g,b) to hex color, helper for map functons'''
    # '%' format operator, '02x' hexadecimal form.
    # This line says get hexidecimal by parsing three items from tuple
    return '#'+'%02x%02x%02x' % rgb

mapa=generate_map()

draw_speed_map2(mapa)

[51.212425, -113.859905] [50.842822, -114.315796]
{"coordinates": [[[50.920573, -114.074939], [50.920779, -114.074939]]], "type": "MultiLineString"}


TypeError: float() argument must be a string or a number, not 'list'

In [None]:
#Speed Polylines Map V1

#GET speed WEIGHTED Colorline
    
def rgb_to_hex(rgb):
    '''Method converts (r,g,b) to hex color, helper for map functons'''
    # '%' format operator, '02x' hexadecimal form.
    # This line says get hexidecimal by parsing three items from tuple
    return '#'+'%02x%02x%02x' % rgb


def draw_speed_map(mapa, speed_frame):
    '''get speed map
    @args: speed_frame :pd.DataFrame to map
    @return folium map NOTE: front end will simply render map.html from assets
    '''
    calgary_map = mapa
    color_increments = 256  # 0-255
    color_steps = len(speed_frame)  # number of data entries
    n_color = color_steps // color_increments  # change color every n_color steps
    step_counter = 0
    step = 256//color_steps
    if step == 0:
        step = 1
    r = 255  # since we sorted above, the largest volume will be red, green lightest volume
    g = 0
    b = 102

    speed_frame = speed_frame.sort_values(by = 'SPEED', ascending = False)

    display(speed_frame)

    for index, items in speed_frame.iterrows():
        secname = items['STREET_NAME']
        speed = items['SPEED']
        the_geom = items['multiline']
        year = 2018
        if step_counter > n_color:  # Check if we need to change color
            r -= step
            g += step
            step_counter = 0
        point_cloud = []
        color = rgb_to_hex((r, g, b))
        location = re.search(r'\(\((.*?)\)\)', the_geom)
        for loc in location.groups(1):
            loc = loc.split(',')  # split coordinate pairs
            for point in loc:
                point = point.strip().split(' ')
                lon = point[0].strip(')').strip(
                    '(')  # remove any remaining parenthesis
                lon = float(lon)  # convert from string
                lat = point[1].strip('(').strip(')')
                lat = float(lat)
                point_cloud.append([lat, lon])
        polyline = folium.PolyLine(
            locations=point_cloud, tooltip=f'{secname}\n {speed} number of cars in {year}', color=color).add_to(calgary_map)
        step_counter += 1
    print(f"Saving total speed {year} map")
    calgary_map.save('index.html', zoom_start=5)
    return calgary_map

mapa = generate_map()
mapa = draw_speed_map(mapa, ctrl.get_frame('speeds'))

In [9]:
 #HEATMAP
# ctrl.gen_heatmap()

mapa = generate_map()

from folium.plugins import HeatMap

def get_heat_points(geom, volume, vol_normalization):
    point_cloud = []
    vol_norm = volume/vol_normalization
    for points in geom['coordinates']: 
        lat = float(points[0])
        lon = float(points[1])
        point_cloud.append([lat, lon, vol_norm])
    return point_cloud


def gen_heatmap(mapa):
    # mapa = ctrl.get_map()
    df = ctrl.get_frame('volumes')
    # df = self.get_frame('volumes')
    data = []  # lat, lng, weight

    vol_median = df['VOLUME'].median()


    df['heat_map_points'] = df.apply(lambda row: get_heat_points(row['geometry'], row['VOLUME'], vol_median), axis = 1)
    # print(df)

    data = []
    for points in df['heat_map_points'].tolist():
        for point in points: 
            data.append(point)
    heat_map = HeatMap(data, radius = 10, blur= 15)
    heat_map.add_to(mapa)
    mapa.save('index.html')
    print("map saved")

 
gen_heatmap(mapa)



[51.212425, -113.859905] [50.842822, -114.315796]
map saved


In [10]:
#GET VOLUME WEIGHTED POLYLINE
    
def rgb_to_hex(rgb):
    '''Method converts (r,g,b) to hex color, helper for map functons'''
    # '%' format operator, '02x' hexadecimal form.
    # This line says get hexidecimal by parsing three items from tuple
    return '#'+'%02x%02x%02x' % rgb


def draw_vol_map(mapa, volume_frame):
    '''get volume map
    @args: volume_frame :pd.DataFrame to map
    @return folium map NOTE: front end will simply render map.html from assets
    '''
    calgary_map = mapa
    color_increments = 256  # 0-255
    color_steps = len(volume_frame)  # number of data entries
    n_color = color_steps // color_increments  # change color every n_color steps
    step_counter = 0
    step = 256//color_steps
    if step == 0:
        step = 1
    r = 255  # since we sorted above, the largest volume will be red, green lightest volume
    g = 0
    b = 0

    volume_frame = volume_frame.sort_values(by = 'VOLUME', ascending = False)

    display(volume_frame)

    for index, items in volume_frame.iterrows():
        secname = items['SECNAME']
        volume = items['VOLUME']
        the_geom = items['multilinestring']
        # print(the_geom)
        # print(type(the_geom))
        year = items['YEAR']
        if step_counter > n_color:  # Check if we need to change color
            r -= step
            g += step
            step_counter = 0
        point_cloud = []
        color = rgb_to_hex((r, g, b))
        # strip everything outside of (( ))
        location = re.search(r'\(\((.*?)\)\)', the_geom)
        for loc in location.groups(1):
            loc = loc.split(',')  # split coordinate pairs
            for point in loc:
                point = point.strip().split(' ')
                lon = point[0].strip(')').strip(
                    '(')  # remove any remaining parenthesis
                lon = float(lon)  # convert from string
                lat = point[1].strip('(').strip(')')
                lat = float(lat)
                point_cloud.append([lat, lon])
        polyline = folium.PolyLine(
            locations=point_cloud, tooltip=f'{secname}\n {volume} number of cars in {year}', color=color).add_to(calgary_map)
        step_counter += 1
    print(f"Saving total volume {year} map")
    calgary_map.save('index.html', zoom_start=5)
    return calgary_map

# mapa = generate_map()
mapa = draw_vol_map(mapa, ctrl.get_frame('volumes'))

Unnamed: 0,YEAR,SECNAME,Shape_Leng,VOLUME,multilinestring,geometry,cell,heat_map_points
961,2018,DEERFOOT7,2304.003389,185000,MULTILINESTRING ((-114.01711820840292 51.04795...,"{'type': 'MultiLineString', 'coordinates': [[5...","{56: 63, 66: 2}","[[51.047955, -114.017118, 15.416666666666666],..."
962,2018,DEERFOOT8,1552.906420,179000,MULTILINESTRING ((-114.0046744311178 51.037095...,"{'type': 'MultiLineString', 'coordinates': [[5...",{56: 66},"[[51.037096, -114.004674, 14.916666666666666],..."
959,2018,DEERFOOT6,1710.508011,177000,MULTILINESTRING ((-114.02674663146523 51.06744...,"{'type': 'MultiLineString', 'coordinates': [[5...",{66: 26},"[[51.067447, -114.026747, 14.75], [51.067638, ..."
956,2018,DEERFOOT3,892.418996,174000,MULTILINESTRING ((-114.04688211128679 51.11052...,"{'type': 'MultiLineString', 'coordinates': [[5...",{75: 18},"[[51.110524, -114.046882, 14.5], [51.110861, -..."
957,2018,DEERFOOT4,1687.817536,171000,MULTILINESTRING ((-114.04060796493901 51.09607...,"{'type': 'MultiLineString', 'coordinates': [[5...","{66: 10, 65: 12, 75: 9}","[[51.096076, -114.040608, 14.25], [51.096089, ..."
...,...,...,...,...,...,...,...,...
671,2018,8AVS2_A,95.250200,0,MULTILINESTRING ((-114.05319192006705 51.04529...,"{'type': 'MultiLineString', 'coordinates': [[5...",{55: 2},"[[51.045297, -114.053192, 0.0], [51.045297, -1..."
283,2018,210AVSE1,2488.775051,0,MULTILINESTRING ((-114.07089463539455 50.86309...,"{'type': 'MultiLineString', 'coordinates': [[5...","{5: 69, 6: 9}","[[50.863099, -114.070895, 0.0], [50.863096, -1..."
594,2018,68STSE13,1095.881700,0,MULTILINESTRING ((-113.93516741818058 50.98949...,"{'type': 'MultiLineString', 'coordinates': [[5...",{38: 2},"[[50.989498, -113.935167, 0.0], [50.979646, -1..."
124,2018,144AVNW7,1630.372422,0,MULTILINESTRING ((-114.09458656705522 51.18331...,"{'type': 'MultiLineString', 'coordinates': [[5...",{94: 51},"[[51.183317, -114.094587, 0.0], [51.183317, -1..."


Saving total volume 2018 map


In [64]:
%%html
<iframe src="index.html" frameborder = "0" width = "960" height = "600" allowfullscreen="true" mozallowfullscreen="true" webkitallowfullscreen="true"></iframe>

In [11]:
df = ctrl.get_frame('volumes')
df

Unnamed: 0,YEAR,SECNAME,Shape_Leng,VOLUME,multilinestring,geometry,cell
0,2018,5AVS12,172.570274,22000,MULTILINESTRING ((-114.06036700906716 51.04831...,"{'type': 'MultiLineString', 'coordinates': [[5...",{55: 2}
1,2018,94AVSEWBON,189.451491,5000,MULTILINESTRING ((-114.06876825342002 50.96863...,"{'type': 'MultiLineString', 'coordinates': [[5...",{35: 2}
2,2018,12STNE29,261.453939,5000,MULTILINESTRING ((-114.03372206187294 51.05323...,"{'type': 'MultiLineString', 'coordinates': [[5...",{56: 2}
3,2018,100AVNE1,804.353006,2000,MULTILINESTRING ((-114.01318801277267 51.14336...,"{'type': 'MultiLineString', 'coordinates': [[5...",{86: 7}
4,2018,106AVSE1,507.320869,14000,MULTILINESTRING ((-113.98128756128924 50.95784...,"{'type': 'MultiLineString', 'coordinates': [[5...",{37: 55}
...,...,...,...,...,...,...,...
1764,2018,CNTRYHILLS9DEERFT,715.577099,44000,MULTILINESTRING ((-114.02797586216911 51.15420...,"{'type': 'MultiLineString', 'coordinates': [[5...",{86: 35}
1765,2018,BARLOW13SGLEN,797.555381,27000,MULTILINESTRING ((-113.98198296908507 50.97234...,"{'type': 'MultiLineString', 'coordinates': [[5...",{37: 40}
1766,2018,19STNE1B2,184.577040,20000,MULTILINESTRING ((-114.01338212545333 51.06538...,"{'type': 'MultiLineString', 'coordinates': [[5...",{66: 13}
1767,2018,WALDNGASE,139.327583,7000,MULTILINESTRING ((-114.03567148851693 50.87574...,"{'type': 'MultiLineString', 'coordinates': [[5...",{6: 2}


In [5]:
speeds = ctrl.get_frame('speeds')
cells = ctrl.get_frame('cells')
# display(speeds)
# display(cells)


def get_avg_speed(cell_idx):
    # print(f'testing cell {cell_idx}')
    speed_sum = 0
    num_points = 0 
    for idx, row in speeds[['cell', 'SPEED']].iterrows():
        cell_dict = row['cell']
        speed = row['SPEED']
        if cell_idx in cell_dict:
            # print(f'{cell_idx} is in {cell_dict}')
            these_points = cell_dict[cell_idx]
            # print(f'these points = {these_points}')
            speed_sum += speed*these_points
            num_points += these_points
    if num_points == 0:
        return np.nan
        # return 0
    return speed_sum/num_points
    
i = 0 
avg_speed = []
while i < 100: 
    avg_speed.append(get_avg_speed(i))
    i+=1
print(avg_speed)
cells['avg_speed'] = avg_speed


[nan, nan, nan, nan, nan, 59.673202614379086, 66.20689655172414, 80.44554455445545, nan, nan, nan, nan, 70.0, 70.45454545454545, 62.04255319148936, 63.6741519350215, 71.49141630901288, 67.21440397350993, 70.76843198338526, 76.34146341463415, nan, nan, nan, 72.1853388658368, 64.78494623655914, 62.06577119036591, 66.55727155727156, 69.90340620233859, 64.29906542056075, 58.07531380753138, nan, nan, nan, 60.0, 62.34993614303959, 62.788844621513945, 68.22074566260613, 70.02633889376646, 72.98299845440495, nan, nan, 73.96825396825396, 65.70743405275779, 73.4090909090909, 59.4337899543379, 58.44332175560467, 70.82397003745318, 62.31227651966627, 72.13842975206612, 80.0, nan, 60.0, 61.02028639618138, 66.2549537648613, 57.83313325330132, 44.25901201602136, 74.37106056453823, 75.69165143205363, 69.5031055900621, 80.0, 110.0, 70.859375, 63.483365949119374, 65.62115621156211, 61.7911227154047, 61.484848484848484, 67.25333333333333, 59.10411622276029, 71.12003780718337, nan, nan, 61.75438596491228,

In [6]:
df=ctrl.get_frame('cells')
df

Unnamed: 0,cells,cell_bounds,avg_speed
0,<folium.vector_layers.Rectangle object at 0x7f...,"[[50.842822, -114.315796], [50.8797823, -114.2...",
1,<folium.vector_layers.Rectangle object at 0x7f...,"[[50.842822, -114.2702069], [50.8797823, -114....",
2,<folium.vector_layers.Rectangle object at 0x7f...,"[[50.842822, -114.2246178], [50.8797823, -114....",
3,<folium.vector_layers.Rectangle object at 0x7f...,"[[50.842822, -114.1790287], [50.8797823, -114....",
4,<folium.vector_layers.Rectangle object at 0x7f...,"[[50.842822, -114.1334396], [50.8797823, -114....",
...,...,...,...
95,<folium.vector_layers.Rectangle object at 0x7f...,"[[51.175464700000006, -114.0878505], [51.21242...",75.049505
96,<folium.vector_layers.Rectangle object at 0x7f...,"[[51.175464700000006, -114.0422614], [51.21242...",88.552381
97,<folium.vector_layers.Rectangle object at 0x7f...,"[[51.175464700000006, -113.9966723], [51.21242...",69.622980
98,<folium.vector_layers.Rectangle object at 0x7f...,"[[51.175464700000006, -113.9510832], [51.21242...",98.833333


In [7]:
df= ctrl.get_frame('volumes')

cells = ctrl.get_frame('cells')


def get_cell_vol(cell_idx):
    # print(f'testing cell {cell_idx}')
    volume_sum = 0
    num_points = 0 
    for idx, row in df[['cell', 'VOLUME']].iterrows():
        cell_dict = row['cell']
        volume = row['VOLUME']
        if cell_idx in cell_dict:
            # print(f'{cell_idx} is in {cell_dict}')
            these_points = cell_dict[cell_idx]
            # print(f'these points = {these_points}')
            volume_sum += volume
            num_points += these_points
    if num_points == 0:
        return np.nan
        # return 0
    return volume_sum
    
i = 0 
volume_sum = []
while i < 100: 
    volume_sum.append(get_cell_vol(i))
    i+=1
print(volume_sum)
cells['volume_sum'] = volume_sum



[nan, nan, nan, nan, 2000, 98000, 172000, 140000, nan, nan, nan, nan, 14000, 40000, 365000, 1025000, 226000, 930000, 394000, 9000, nan, nan, nan, 75000, 395000, 911000, 848000, 972000, 231000, nan, nan, nan, nan, 6000, 564000, 880000, 1423000, 473000, 202000, 34000, nan, 34000, 165000, 644000, 1143000, 1451000, 1047000, 385000, 196000, nan, nan, 8000, 456000, 824000, 1843000, 3802000, 1452000, 1066000, 437000, 16000, 44000, 249000, 352000, 856000, 1158000, 1267000, 2002000, 1301000, 680000, nan, nan, 427000, 868000, 804000, 316000, 1184000, 368000, 490000, 399000, nan, nan, 180000, 386000, 614000, 887000, 719000, 822000, 313000, 124000, nan, nan, 7000, 14000, 42000, 36000, 23000, 167000, 41000, 40000, nan]


In [8]:
def count_incidents(cell_idx):
    incidents = ctrl.get_frame('incidents')
    # print(f'testing cell {cell_idx}')
    counter = 0
    for _, cell in incidents['cell'].items(): 
        if cell_idx == cell:
            counter +=1
    return counter

i = 0 
inc_count = []

while i<100: 
    inc_count.append(count_incidents(i))
    i+=1
print(inc_count)
cells['incident_count'] = inc_count

display(cells.sort_values(by='incident_count', ascending=False))

[0, 0, 0, 0, 0, 7, 17, 13, 0, 0, 0, 0, 1, 6, 50, 111, 27, 104, 35, 2, 0, 0, 0, 9, 46, 154, 96, 93, 27, 0, 0, 0, 0, 0, 88, 225, 149, 104, 38, 0, 0, 1, 15, 69, 303, 307, 297, 63, 36, 0, 0, 3, 45, 107, 326, 465, 362, 259, 84, 5, 5, 14, 44, 123, 161, 238, 354, 290, 127, 0, 0, 26, 67, 93, 23, 161, 25, 80, 81, 0, 0, 10, 32, 84, 79, 94, 55, 63, 26, 0, 0, 0, 0, 11, 2, 5, 22, 20, 1, 0]


Unnamed: 0,cells,cell_bounds,avg_speed,volume_sum,incident_count
55,<folium.vector_layers.Rectangle object at 0x7f...,"[[51.027623500000004, -114.0878505], [51.06458...",44.259012,3802000.0,465
56,<folium.vector_layers.Rectangle object at 0x7f...,"[[51.027623500000004, -114.0422614], [51.06458...",74.371061,1452000.0,362
66,<folium.vector_layers.Rectangle object at 0x7f...,"[[51.0645838, -114.0422614], [51.1015441, -113...",67.253333,2002000.0,354
54,<folium.vector_layers.Rectangle object at 0x7f...,"[[51.027623500000004, -114.1334396], [51.06458...",57.833133,1843000.0,326
45,<folium.vector_layers.Rectangle object at 0x7f...,"[[50.9906632, -114.0878505], [51.0276235000000...",58.443322,1451000.0,307
...,...,...,...,...,...
39,<folium.vector_layers.Rectangle object at 0x7f...,"[[50.953702899999996, -113.9054941], [50.99066...",,34000.0,0
40,<folium.vector_layers.Rectangle object at 0x7f...,"[[50.9906632, -114.315796], [51.02762350000000...",,,0
49,<folium.vector_layers.Rectangle object at 0x7f...,"[[50.9906632, -113.9054941], [51.0276235000000...",80.000000,,0
1,<folium.vector_layers.Rectangle object at 0x7f...,"[[50.842822, -114.2702069], [50.8797823, -114....",,,0


In [9]:
df = ctrl.get_frame('temporal')
df
display(df['date'].iloc[0].hour)

df[(df['date'] > '2018-01-01 00:00:00') & (df['date'] < '2018-01-02 00:00:00')]

filt = (df['date'] > '2018-01-01 00:00:00') & (df['date'] < '2018-01-02 00:00:00')
print(filt)

6

0        True
1        True
2        True
3        True
4        True
        ...  
8746    False
8747    False
8748    False
8749    False
8750    False
Name: date, Length: 8751, dtype: bool


In [10]:
df = ctrl.get_frame('temporal')
# display(df)



# display(df['Temp (C)'])

def melt_freeze(temps, freeze_temp, target = 8):
    '''
    return pd.Series bool filter of  hours from 2018 such that Ti is an hour where the temperature fell below freeze_temp (C). Targets Ti are the ith hours after the freeze_temp occured. 

    :Params:    temps: pd.Series of hourly temperatures degrees C
                freeze_temp: temperature of interest 
                target: number of temperatures to capture after the freeze_temp. 
    :Returns:  pd.Series bool filter
    '''
    then = 0
    now = 1
    freeze = freeze_temp

    #list of target hours
    hours=[]

    #pivot + 8 hours
    target = target 

    #  Assume typical dangerous freeze near midnight and we wish to capture the morning     rush hour between 6-8am . 
    
    while then < temps.size: 
        target_hours = []
        try: 
            if temps[now] <= freeze and temps[then] > freeze:
                # print('hit')
                idx = 0
                while idx < target: 
                    target_hours.append(now+idx)
                    idx+=1
                hours.append(target_hours)
            then +=1
            now +=1
        except: 
            then +=1
            now +=1
            continue

    mask_indices =[]
    for targets in hours: 
        for target in targets: 
            if target in mask_indices: 
                continue
            else: 
                mask_indices.append(target)

    mask = []
    idx = 0
    while idx < temps.size:
        if idx in mask_indices:
            mask.append(True)
        else:
            mask.append(False)
        idx += 1
    mask = pd.Series(mask)
    print(f'There were {len(hours)} melt-freeze cycles in 2018!')
    return(mask)

mask = melt_freeze(df['Temp (C)'], 0, 8)


df['date'].loc[mask]







There were 143 melt-freeze cycles in 2018!


35     2018-01-02 17:00:00
36     2018-01-02 18:00:00
37     2018-01-02 19:00:00
38     2018-01-02 20:00:00
39     2018-01-02 21:00:00
               ...        
8712   2018-12-30 06:00:00
8713   2018-12-30 07:00:00
8714   2018-12-30 08:00:00
8715   2018-12-30 09:00:00
8716   2018-12-30 10:00:00
Name: date, Length: 1018, dtype: datetime64[ns]

In [11]:
df =ctrl.get_frame('temporal')

df['date'] = pd.to_datetime(df['Date/Time'])
# display(df)

df2 = ctrl.get_frame('incidents')

# display(df2)
df2['date'] = pd.to_datetime(df2['START_DT'])
# display(df2)

df2['date'] = pd.to_datetime(df2['START_DT'])
mask_2018 = df2['date'].dt.year == 2018
incidents_2018 = df2[mask_2018]
# display(incidents_2018)
# display(df2['date'])


incidents_2018 = incidents_2018.resample('H', on='date')['Count'].count()
display(incidents_2018)
print(incidents_2018.name)
incidents_2018.name = 'incidents'
print(incidents_2018.name)


merged = pd.merge(df, incidents_2018, on = 'date')

# display(merged)

print(merged['Visibility (km)'].unique())

merged = merged.sort_values(by='incidents', ascending = False)
# display(merged)

grouped_weather = merged.groupby(by='Weather')

# display(grouped_weather['Count'].count())

# display(cell_groups['BLADE_TYPE'].agg('count').sort_values(ascending=False).head(10))


# cell_groups = signs_test_df.groupby(by='cell', as_index=True, sort=True)





date
2018-01-01 06:00:00    2
2018-01-01 07:00:00    0
2018-01-01 08:00:00    0
2018-01-01 09:00:00    3
2018-01-01 10:00:00    1
                      ..
2018-12-31 16:00:00    1
2018-12-31 17:00:00    2
2018-12-31 18:00:00    1
2018-12-31 19:00:00    0
2018-12-31 20:00:00    2
Freq: H, Name: Count, Length: 8751, dtype: int64

Count
incidents
[24.1 64.4 32.2 19.3 48.3  9.7  2.4  0.4  0.2  0.8  3.2  6.4  8.1  4.8
  3.6  1.6 16.1 12.9 40.2 56.3 11.3  2.   4.   1.  80.5  nan  2.8  1.2
 14.5  0.6  0.  22.5 20.9 72.4 17.7]


KeyError: 'incidents'

In [12]:
df = ctrl.get_frame('incidents')

filt = df['Count'] != 1

df.loc[filt]

Unnamed: 0,INCIDENT INFO,DESCRIPTION,START_DT,MODIFIED_DT,QUADRANT,Longitude,Latitude,location,Count,id,date,geometry,cell
