# Red Light Camera Analysis

Linear regression analysis of factors affecting red light violations

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
from modules.myfuncs import *
import warnings
import numpy as np
from geopy.geocoders import Nominatim
import matplotlib.pyplot as plt
import sqlite3
# import dask
# import dask.dataframe as dd
import gc

warnings.filterwarnings('ignore')

In [3]:
conn = create_connection('database/rlc2.db')  # function from myfuncs file
c = conn.cursor()

sqlite3 version: 2.6.0
connected to database/rlc2.db


In [4]:
print(sql_fetch_tables(c, conn))



def table_info(c, conn):
    '''
    prints out all of the columns of every table in db
    c : cursor object
    conn : database connection object
    '''
    tables = c.execute("SELECT name FROM sqlite_master WHERE type='table';").fetchall()
    for table_name in tables:
        table_name = table_name[0]
        table = pd.read_sql_query("SELECT * from {} LIMIT 0".format(table_name), conn)
        print(table_name)
        for col in table.columns:
            print('\t-' + col)
        print()

table_info(c, conn)


[('cam_locations',), ('cam_startend',), ('daily_violations',), ('intersction_locations',), ('all_crashes',), ('hourly_congestion',), ('hourly_weather',), ('region_data',), ('intersection_cams',), ('signal_crashes',)]
cam_locations
	-camera_id
	-intersection
	-address
	-latitude
	-longitude
	-day

cam_startend
	-camera_id
	-start
	-end

daily_violations
	-intersection
	-camera_id
	-address
	-violation_date
	-violations
	-latitude
	-longitude
	-month
	-day
	-weekday
	-year

intersction_locations
	-intersection
	-latitude
	-longitude

all_crashes
	-crash_record_id
	-rd_no
	-crash_date
	-posted_speed_limit
	-traffic_control_device
	-device_condition
	-weather_condition
	-lighting_condition
	-first_crash_type
	-trafficway_type
	-alignment
	-roadway_surface_cond
	-road_defect
	-report_type
	-crash_type
	-hit_and_run_i
	-damage
	-prim_contributory_cause
	-sec_contributory_cause
	-street_no
	-street_direction
	-street_name
	-beat_of_occurrence
	-num_units
	-most_severe_injury
	-injuries_total


## Query our data
I would like to get daily red light camera intersection data with the following columns:
- violations
- weekday
- month
- year
- region   (can add this post query with lambda function from intersection_cams and region_data tables, ugh!)
- congestion (try peak, rush hour, average, etc)
- precipitation
- max_temp



In [None]:
# LINK daily_violations to hourly_congestion through intersection_cams
int_df = pd.read_sql_query('''
                                    WITH v as
                                        (SELECT intersection,
                                                SUM(violations) as violations,
                                                month,
                                                day,
                                                year
                                        FROM daily_violations
                                        GROUP BY year, month, day, intersection
                                        ),
                                    
                                    ic as
                                        (SELECT region_id,
                                                intersection
                                         FROM intersection_cams
                                        ),
                                        
                                    c as
                                        (SELECT AVG(speed) as speed,
                                                region_id,
                                                month,
                                                day,
                                                weekday,
                                                year    
                                         FROM hourly_congestion
                                         GROUP BY year, month, day, region_id
                                        )
                                    SELECT  v.*,
                                            ic.region_id,
                                            c.speed
                                    FROM v
                                    LEFT JOIN ic
                                      ON v.intersection = ic.intersection
                                    LEFT JOIN c
                                      ON c.year = v.year
                                          AND c.month = v.month
                                          AND c.day = v.day
                                          AND c.region_id = ic.region_id
                                      
                                    WHERE v.year > 2015
                                    ORDER BY v.year, v.month, v.day, ic.region_id
                                    ''', conn)

In [39]:


# NOW LET'S ADD WX
# -temp
# -rain_1h
# -rain_3h
# -snow_1h
# -snow_3h
# -time
# -year
# -month
# -day
# -hour
# -weekday
int_df = pd.read_sql_query('''
                                    WITH v as
                                        (SELECT intersection,
                                                SUM(violations) as violations,
                                                month,
                                                day,
                                                year
                                        FROM daily_violations
                                        GROUP BY year, month, day, intersection
                                        ),
                                    
                                    ic as
                                        (SELECT region_id,
                                                intersection
                                         FROM intersection_cams
                                        ),
                                        
                                    c as
                                        (SELECT AVG(speed) as speed,
                                                MIN(speed) as min_speed,
                                                region_id,
                                                month,
                                                day,
                                                weekday,
                                                year    
                                         FROM hourly_congestion
                                         GROUP BY year, month, day, region_id
                                        ),
                                        
                                     wx as
                                        (SELECT temp,
                                                SUM(snow_1h)+SUM(rain_1h) as precip,
                                                month,
                                                day,
                                                weekday,
                                                year    
                                         FROM hourly_weather
                                         GROUP BY year, month, day
                                        )
                                    SELECT  v.*,
                                            ic.region_id,
                                            c.speed,
                                            c.min_speed,
                                            wx.weekday,
                                            wx.precip,
                                            wx.temp
                                    FROM v
                                    LEFT JOIN ic
                                      ON v.intersection = ic.intersection
                                    LEFT JOIN c
                                      ON c.year = v.year
                                          AND c.month = v.month
                                          AND c.day = v.day
                                          AND c.region_id = ic.region_id
                                    LEFT JOIN wx
                                      ON wx.year = v.year
                                          AND wx.month = v.month
                                          AND wx.day = v.day
                                    WHERE v.year > 2015
                                    ORDER BY v.year, v.month, v.day, ic.region_id
                                    ''', conn)

# LOOKS GOOD!!
print(int_df.info())
int_df.head(50)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 265314 entries, 0 to 265313
Data columns (total 11 columns):
intersection    265314 non-null object
violations      265314 non-null int64
month           265314 non-null int64
day             265314 non-null int64
year            265314 non-null int64
region_id       265314 non-null object
speed           262559 non-null float64
min_speed       262559 non-null float64
weekday         260952 non-null float64
precip          260952 non-null float64
temp            260952 non-null float64
dtypes: float64(5), int64(4), object(2)
memory usage: 22.3+ MB
None


Unnamed: 0,intersection,violations,month,day,year,region_id,speed,min_speed,weekday,precip,temp
0,WESTERN AND TOUHY,5,1,1,2016,1,23.812583,21.046667,4.0,0.0,272.15
1,CICERO AND CHICAGO,13,1,1,2016,10,23.846833,21.133333,4.0,0.0,272.15
2,CICERO AND HARRISON,8,1,1,2016,10,23.846833,21.133333,4.0,0.0,272.15
3,CICERO AND WASHINGTON,4,1,1,2016,10,23.846833,21.133333,4.0,0.0,272.15
4,HAMLIN AND LAKE,7,1,1,2016,10,23.846833,21.133333,4.0,0.0,272.15
5,HAMLIN AND MADISON,18,1,1,2016,10,23.846833,21.133333,4.0,0.0,272.15
6,PULASKI AND CHICAGO,1,1,1,2016,10,23.846833,21.133333,4.0,0.0,272.15
7,PULASKI AND DIVISION,5,1,1,2016,10,23.846833,21.133333,4.0,0.0,272.15
8,ROOSEVELT AND KOSTNER,8,1,1,2016,10,23.846833,21.133333,4.0,0.0,272.15
9,ROOSEVELT AND PULASKI,6,1,1,2016,10,23.846833,21.133333,4.0,0.0,272.15


In [62]:
# camera




int_df = pd.read_sql_query('''
                                    WITH v as
                                        (SELECT intersection,
                                                violations,
                                                month,
                                                dat as day,
                                                weekday,
                                                year
                                        FROM daily_violations
                                        GROUP BY intersection
                                        ),
                                    cr as
                                        (SELECT intersection,
                                                COUNT(crash_record_id) as n_crash,
                                                SUM(injuries_total) as injuries,
                                                SUM(injuries_fatal) as deaths
                                         FROM signal_crashes
                                         GROUP BY intersection
                                         ORDER BY intersection
                                         )
                                    SELECT v.intersection, v.violations, cr.n_crash, cr.injuries, cr.deaths
                                    FROM v
                                    LEFT JOIN cr
                                        ON cr.intersection=v.intersection
                                    GROUP BY v.intersection
                                         
                                    ''', conn)

DatabaseError: Execution failed on sql '
                                    WITH v as
                                        (SELECT intersection,
                                                violations,
                                                month,
                                                dat as day,
                                                weekday,
                                                year
                                        FROM daily_violations
                                        GROUP BY intersection
                                        ),
                                    cr as
                                        (SELECT intersection,
                                                COUNT(crash_record_id) as n_crash,
                                                SUM(injuries_total) as injuries,
                                                SUM(injuries_fatal) as deaths
                                         FROM signal_crashes
                                         GROUP BY intersection
                                         ORDER BY intersection
                                         )
                                    SELECT v.intersection, v.violations, cr.n_crash, cr.injuries, cr.deaths
                                    FROM v
                                    LEFT JOIN cr
                                        ON cr.intersection=v.intersection
                                    GROUP BY v.intersection
                                         
                                    ': no such column: dat