This file is used to compute basic statistics on the files in the stats folder.

### Import Statements

In [7]:
import json
import matplotlib.pyplot as plt
import math

### Basic Statistics for AADT by Road

In [76]:
f = open("stats/livingston_roads.txt",'r')
roads = [json.loads(line) for line in f.read().splitlines()]
f.close()

road_types = {}

# split the AADT up by road type
for road in roads:
    
    # if we have seen this road type before
    try:
        road_types[road['road type']].append(road['AADT'])
        
    # if we haven't, make a new list
    except KeyError:
        road_types[road['road type']] = [road['AADT']]
        
# the header of each column
print("{:15s} | {:12}  {:12}  {:13s} {:12s} {:12s} {:12s} {:12s} {:12s}".format("Road Type",\
       "Total Roads".rjust(12),  "AADT Present".rjust(12),  "Present/Total".rjust(13),\
        "Mean AADT".rjust(12), "Median AADT".rjust(12), "AADT Stdv.".rjust(12), "Max AADT".rjust(12), "Min AADT".rjust(12)))
print("-"*126)
for key in road_types:
      
    # make a list for the roads with AADT data
    state = [AADT for AADT in road_types[key] if AADT != None]
          
    meanvalue = sum(state)/len(state)
    median = sorted(state)[len(state)//2]
    stdv = math.sqrt(sum([(v - meanvalue)**2 for v in state])/len(state))
    print("{:15s} | {:12}  {:12}  {:13.3f} {:12.2f} {:12}  {:12.2f} {:12} {:12}".format(key,\
            len(road_types[key]),len(state),len(state)/len(road_types[key]),\
                meanvalue, median, stdv, max(state), min(state)))
print("-"*126)

state = [road['AADT'] for road in roads if road['AADT'] != None]
meanvalue = sum(state)/len(state)
median = sorted(state)[len(state)//2]
stdv = math.sqrt(sum([(v - meanvalue)**2 for v in state])/len(state))

# the total over all types
print("{:15s} | {:12}   {:12}  {:13.3f} {:12.2f} {:12} {:12.2f} {:12} {:12}".format("Total",\
            len(roads),len(state),len(state)/len(roads),\
                meanvalue, median, stdv, max(state), min(state)))

Road Type       |  Total Roads  AADT Present  Present/Total    Mean AADT  Median AADT   AADT Stdv.     Max AADT     Min AADT
------------------------------------------------------------------------------------------------------------------------------
residential     |         2082            70          0.034      1577.63         1184       1711.20        10203          115
tertiary        |          288           128          0.444      1005.47          678       1087.35         8774          153
secondary       |          236           194          0.822      3592.76         2928       2171.66        10548          566
primary         |          150           146          0.973      5536.31         5260       2471.51        10548          900
motorway        |           75            59          0.787     14052.19        14300       5773.94        27881         3454
----------------------------------------------------------------------------------------------------------------------

### Basic Statistics for Speed Limit by Road

In [78]:
f = open("stats/livingston_roads.txt",'r')
roads = [json.loads(line) for line in f.read().splitlines()]
f.close()

road_types = {}

# split the AADT up by road type
for road in roads:
    
    # if we have seen this road type before
    try:
        road_types[road['road type']].append(road['speed'])
        
    # if we haven't, make a new list
    except KeyError:
        road_types[road['road type']] = [road['speed']]
        
# the header of each column
print("{:15s} | {:12}  {:14}  {:13s} {:12s} {:12s} {:12s} {:12s} {:12s}".format("Road Type",\
       "Total Roads".rjust(12),  "Speeds Present".rjust(14),  "Present/Total".rjust(13),\
        "Mean Speed".rjust(12), "Median Speed".rjust(12), "Speed Stdv.".rjust(12), "Max Speed".rjust(12), "Min Speed".rjust(12)))
print("-"*126)
for key in road_types:
      
    # make a list for the roads with AADT data
    speeds = [speed for speed in road_types[key] if speed != None]
    
    if len(speeds) == 0:
        print("{:15s} | {:12}  {:14}  {:13} {:12} {:12} {:12} {:12} {:12}".format(key,\
            len(road_types[key]),len(speeds),len(speeds)/len(road_types[key]),\
                '-'.rjust(12), '-'.rjust(12), '-'.rjust(12), '-'.rjust(12), '-'.rjust(12)))
    else:
        meanvalue = sum(speeds)/len(speeds)
        median = sorted(speeds)[len(speeds)//2]
        stdv = math.sqrt(sum([(v - meanvalue)**2 for v in speeds])/len(speeds))
        print("{:15s} | {:12}  {:14}  {:13.3f} {:12.2f} {:12} {:12.2f} {:12} {:12}".format(key,\
                len(road_types[key]),len(speeds),len(speeds)/len(road_types[key]),\
                    meanvalue, median, stdv, max(speeds), min(speeds)))
print("-"*126)

speeds = [road['speed'] for road in roads if road['speed'] != None]
meanvalue = sum(speeds)/len(speeds)
median = sorted(speeds)[len(speeds)//2]
stdv = math.sqrt(sum([(v - meanvalue)**2 for v in speeds])/len(speeds))

# the total over all types
print("{:15s} | {:12}  {:14}  {:13.3f} {:12.2f} {:12} {:12.2f} {:12} {:12}".format("Total",\
            len(speeds),len(speeds),len(speeds)/len(roads),\
                meanvalue, median, stdv, max(speeds), min(speeds)))

Road Type       |  Total Roads  Speeds Present  Present/Total   Mean Speed Median Speed  Speed Stdv.    Max Speed    Min Speed
------------------------------------------------------------------------------------------------------------------------------
residential     |         2082               0            0.0            -            -            -            -            -
tertiary        |          288               4          0.014        43.75           55        12.44           55           25
secondary       |          236              42          0.178        39.40           35        10.98           55           30
primary         |          150              60          0.400        49.75           55         8.78           55           30
motorway        |           75              75          1.000        65.00           65         0.00           65           65
---------------------------------------------------------------------------------------------------------------

### Basic Statistics for Intersections

In [79]:
f = open("stats/livingston_intersections_plus.txt.",'r')

# make a list for each category
traffic_lights = []
num_state_directions = []
average_AADT = []
total_AADT = []
road_type_scores = []
speed_limits = []

for line in f:
    int_plus = json.loads(line)
    traffic_lights.append(int_plus['traffic light'])
    num_state_directions.append(int_plus['num state directions'])
    average_AADT.append(int_plus['average AADT'])
    total_AADT.append(int_plus['total AADT'])
    road_type_scores.append(int_plus['average road score'])
    speed_limits.append(int_plus['average speed'])
f.close()

all_data = {'Traffic Light': traffic_lights, 'Number of State Directions': num_state_directions,\
            'Average AADT': average_AADT, 'Total AADT': total_AADT, \
            'Average Road Score': road_type_scores, 'Speed Limit': speed_limits}
print("{:26s} | {:7} {:7} {:7s} {:7s} {:7s} {:7s} {:7s}".format("Category",\
       "Mean".rjust(7), "Max".rjust(7), "Min".rjust(7), "Median".rjust(7),\
        "Stdv.".rjust(7), "Present".rjust(7),  "Missing".rjust(8)))
print("-"*85)
for key in all_data:
    values = all_data[key]
    
    # separate the missing values
    present = []
    missing = 0
    for value in values:
        if value == None:
            missing += 1
        else:
            present.append(value)
    maxvalue = max(present)
    minvalue = min(present)
    meanvalue = sum(present)/len(present)
    medianvalue = sorted(present)[len(present)//2]
    N = len(present)
    stdv = math.sqrt(sum([(v - meanvalue)**2 for v in present])/len(present))
    print("{:26s} & {:7.2f} & {:7}& {:7}& {:7.1f} &{:7.1f} &{:7} &{:8}".format(key,\
            meanvalue,int(maxvalue), int(minvalue) ,medianvalue, stdv, N, missing))

Category                   |    Mean     Max     Min  Median   Stdv. Present  Missing
-------------------------------------------------------------------------------------
Traffic Light              &    0.01 &       1&       0&     0.0 &    0.1 &   3481 &       0
Number of State Directions &    0.91 &       4&       0&     0.0 &    1.1 &   3481 &       0
Average AADT               & 3755.29 &   27881&     115&  2940.0 & 3322.8 &   1528 &    1953
Total AADT                 & 7829.62 &   55762&     115&  6202.0 & 6943.7 &   1528 &    1953
Average Road Score         &    1.75 &       5&       1&     1.5 &    0.9 &   3481 &       0
Speed Limit                &   48.43 &      65&      25&    55.0 &   12.9 &    363 &    3118


### Statistics for Intersections with Multiple State Roads

In [59]:
f = open("stats/livingston_intersections_plus.txt.",'r')

# make a list for each category
traffic_lights = []
num_state_directions = []
average_AADT = []
total_AADT = []
road_type_scores = []
speed_limits = []

for line in f:
    int_plus = json.loads(line)
    
    # make sure we have at least two state roads
    if int_plus['num state directions'] > 2:
        traffic_lights.append(int_plus['traffic light'])
        num_state_directions.append(int_plus['num state directions'])
        average_AADT.append(int_plus['average AADT'])
        total_AADT.append(int_plus['total AADT'])
        road_type_scores.append(int_plus['average road score'])
        speed_limits.append(int_plus['average speed'])
f.close()

all_data = {'Traffic Light': traffic_lights, 'Number of State Directions': num_state_directions,\
            'Average AADT': average_AADT, 'Total AADT': total_AADT, \
            'Average Road Score': road_type_scores, 'Speed Limit': speed_limits}
print("{:26s} | {:8} {:7} {:7s} {:7s} {:7s} {:7s} {:7s}".format("Category",\
       "Mean".rjust(7), "Max".rjust(7), "Min".rjust(7), "Median".rjust(7),\
        "Stdv.".rjust(7), "Present".rjust(7),  "Missing".rjust(8)))
print("-"*85)
for key in all_data:
    values = all_data[key]
    
    # separate the missing values
    present = []
    missing = 0
    for value in values:
        if value == None:
            missing += 1
        else:
            present.append(value)
    maxvalue = max(present)
    minvalue = min(present)
    meanvalue = sum(present)/len(present)
    medianvalue = sorted(present)[len(present)//2]
    N = len(present)
    stdv = math.sqrt(sum([(v - meanvalue)**2 for v in present])/len(present))
    print("{:26s} | {:8.2f} {:7} {:7} {:7.1f} {:7.1f} {:7} {:8}".format(key,\
            meanvalue,int(maxvalue), int(minvalue) ,medianvalue, stdv, N, missing))

Category                   |    Mean      Max     Min  Median   Stdv. Present  Missing
-------------------------------------------------------------------------------------
Traffic Light              |     0.12       1       0     0.0     0.3     167        0
Number of State Directions |     3.22       4       3     3.0     0.4     167        0
Average AADT               |  3438.07   10344     214  2939.0  2310.1     167        0
Total AADT                 | 10894.76   31032     642  9295.0  7105.7     167        0
Average Road Score         |     2.64       4       1     2.7     0.8     167        0
Speed Limit                |    43.55      55      30    45.0    10.8      39      128


### More Statistics on Intersections

In [70]:
f = open("stats/livingston_intersections_plus.txt", 'r')
by_num_state_directions = {}
for line in f:
    int_plus = json.loads(line)
    n = int_plus['num state directions']
    if int_plus['average AADT'] != None:
        try:
            by_num_state_directions[n].append(int_plus)
        except KeyError:
            by_num_state_directions[n] = [int_plus]
f.close()

print("{:26s} | {:12} {:12} {:12s} {:20s} {:20s}".format("Number of State Directions",\
       "Intersections".rjust(12), "With Light".rjust(12), "Proportion".rjust(12), "Avg AADT with Light".rjust(20),\
        "Avg AADT without".rjust(20)))
print("-"*112)

for n in sorted(by_num_state_directions.keys()):
    num_ints = len(by_num_state_directions[n])
    with_light = []
    without = []
    
    # put the intersections into separate lists
    for int_plus in by_num_state_directions[n]:
        if int_plus['traffic light'] == 1:
            with_light.append(int_plus)
        else:
            without.append(int_plus)
    
    avg_AADT_without = sum([int_plus['average AADT'] for int_plus in without])/len(without)
    num_with_light = len(with_light)
    if num_with_light != 0:
        avg_AADT_with = sum([int_plus['average AADT'] for int_plus in with_light])/len(with_light)
        print("{:26s} | {:12} {:12} {:12.3f} {:20.3f} {:20.3f}".format(str(n), num_ints, len(with_light),\
            len(with_light)/num_ints, avg_AADT_with, avg_AADT_without))
    else:
        print("{:26s} | {:12} {:12} {:12.3f} {:20s} {:20.3f}".format(str(n), num_ints, len(with_light),\
            len(with_light)/num_ints, '-'.rjust(20), avg_AADT_without))

Number of State Directions | Intersections   With Light   Proportion  Avg AADT with Light     Avg AADT without
----------------------------------------------------------------------------------------------------------------
1                          |           86            0        0.000                    -             2135.140
2                          |         1275            9        0.007             5670.278             3893.578
3                          |          130           15        0.115             4674.444             3539.881
4                          |           37            5        0.135             5084.800             2235.312
