In [1]:
import pandas as pd
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))

In [2]:
interstates = pd.read_csv('resources/all_us_interstates.csv') 
highways = pd.read_csv('resources/all_us_highways.csv')

In [3]:
interstates.head()

Unnamed: 0,full_name,state_name,road_geom
0,I- 10,Alabama,"LINESTRING(-87.412165 30.573765, -87.412276 30..."
1,I- 10,Alabama,"LINESTRING(-88.40047 30.474213, -88.399619 30...."
2,I- 10,Alabama,"LINESTRING(-88.007269 30.686598, -88.005754 30..."
3,I- 10,Alabama,"LINESTRING(-88.40049 30.474455, -88.399889 30...."
4,I- 10 Industrial Pkwy,Alabama,"LINESTRING(-88.174432 30.568621, -88.174352 30..."


In [4]:
highways.head()

Unnamed: 0,full_name,state_name,road_geom
0,US 98 Frontage Rd,Alabama,"LINESTRING(-87.912731 30.64539, -87.912816 30...."
1,US Forest Svc Rd,Alabama,"LINESTRING(-87.379328 34.097353, -87.379332 34..."
2,US Government Rd,Alabama,"LINESTRING(-85.106533 31.64339, -85.106493 31...."
3,US Government Rd,Alabama,"LINESTRING(-85.113503 31.643839, -85.113289 31..."
4,US Hwy 10,Alabama,"LINESTRING(-88.295371 32.170564, -88.295732 32..."


In [5]:
import math

In [6]:
def get_distance(dataset):
    
    output = []
    for i in dataset['road_geom'].to_list():
        i = i.replace('LINESTRING(','')[:-1]
        f = [[float(h) for h in j.split()] for j in i.split(',')]

        output.append(list(f))
            
    dataset['road_geom'] = output
        
    
    sums = []
    for i in dataset['road_geom'].to_list():
    
        
        each_road = 0

        last_coord = i[0]
        for j in i[1:]:
            distance = math.sqrt(((last_coord[0]-j[0])**2)+((last_coord[1]-j[1])**2))
            last_coord = j

            each_road += distance
        sums.append(each_road)
    
    dataset['geo_points'] = [len(i) for i in dataset['road_geom']]
    dataset['distance'] = sums
    return dataset

In [7]:
interstates = get_distance(interstates)

In [8]:
interstates.head()

Unnamed: 0,full_name,state_name,road_geom,geo_points,distance
0,I- 10,Alabama,"[[-87.412165, 30.573765], [-87.412276, 30.5737...",385,0.619295
1,I- 10,Alabama,"[[-88.40047, 30.474213], [-88.399619, 30.47454...",386,0.466148
2,I- 10,Alabama,"[[-88.007269, 30.686598], [-88.005754, 30.6861...",373,0.619205
3,I- 10,Alabama,"[[-88.40049, 30.474455], [-88.399889, 30.47469...",363,0.466335
4,I- 10 Industrial Pkwy,Alabama,"[[-88.174432, 30.568621], [-88.174352, 30.5685...",9,0.00269


In [9]:
highways = get_distance(highways)

In [10]:
highways.head()

Unnamed: 0,full_name,state_name,road_geom,geo_points,distance
0,US 98 Frontage Rd,Alabama,"[[-87.912731, 30.64539], [-87.912816, 30.64543...",21,0.006994
1,US Forest Svc Rd,Alabama,"[[-87.379328, 34.097353], [-87.379332, 34.0978...",11,0.010345
2,US Government Rd,Alabama,"[[-85.106533, 31.64339], [-85.106493, 31.64318...",7,0.002469
3,US Government Rd,Alabama,"[[-85.113503, 31.643839], [-85.113289, 31.6438...",17,0.007466
4,US Hwy 10,Alabama,"[[-88.295371, 32.170564], [-88.295732, 32.1706...",5,0.001346


In [11]:
interstate_distance = interstates.groupby(['state_name']).sum()['distance'].reset_index().rename(columns = {'distance':'sum_interstate_distance'})
interstate_distance.head()

Unnamed: 0,state_name,sum_interstate_distance
0,Alabama,36.236478
1,Arizona,39.706072
2,Arkansas,24.131938
3,California,81.544651
4,Colorado,34.757985


In [12]:
highway_distance = highways.groupby(['state_name']).sum()['distance'].reset_index().rename(columns = {'distance':'sum_highway_distance'})
highway_distance.head()

Unnamed: 0,state_name,sum_highway_distance
0,Alabama,84.315171
1,Alaska,0.002441
2,Arizona,37.897351
3,Arkansas,86.258683
4,California,46.537062


In [13]:
distance_by_state = interstate_distance.merge(highway_distance, how = 'outer', on = 'state_name').sort_values('state_name').reset_index(drop=True)
distance_by_state

Unnamed: 0,state_name,sum_interstate_distance,sum_highway_distance
0,Alabama,36.236478,84.315171
1,Alaska,,0.002441
2,Arizona,39.706072,37.897351
3,Arkansas,24.131938,86.258683
4,California,81.544651,46.537062
5,Colorado,34.757985,105.259867
6,Connecticut,11.909521,11.266595
7,Delaware,1.390199,7.388483
8,District of Columbia,0.396204,0.821816
9,Florida,46.610764,111.016011


In [14]:
interstates = 0
highways = 0

In [15]:
import os

In [21]:
state_list = os.listdir('resources/non_freeway_roads/')
output_roads = {}

for i in state_list:
    state_df = pd.read_csv(f'resources/non_freeway_roads/{i}')
    x = get_distance(state_df)
    output_roads[i] = sum(x['distance'].to_list())

In [22]:
output_roads

{'Alabama.csv': 2055.135931194679,
 'Arizona.csv': 1519.7984156635687,
 'Arkansas.csv': 2121.400731771309,
 'California.csv': 4664.484636791149,
 'Colorado.csv': 2022.606169492029,
 'Connecticut.csv': 471.5694659920096,
 'Delaware.csv': 207.43820151965093,
 'District of Columbia.csv': 20.552629329131445,
 'Florida.csv': 3012.1234482669915,
 'Georgia.csv': 2834.4342016233795,
 'Idaho.csv': 1357.0589522259213,
 'Illinois.csv': 3681.51476596439,
 'Indiana.csv': 2334.126738551404,
 'Iowa.csv': 2584.541829049201,
 'Kansas.csv': 2679.244407619301,
 'Kentucky.csv': 1836.979655093898,
 'Louisiana.csv': 1500.6627517453533,
 'Maine.csv': 801.0856432381745,
 'Maryland.csv': 766.1900308220053,
 'Massachusetts.csv': 792.1449440759625,
 'Michigan.csv': 2920.103706889421,
 'Minnesota.csv': 3573.569155748913,
 'Mississippi.csv': 1485.9789789526515,
 'Missouri.csv': 3062.896296461078,
 'Montana.csv': 1719.021094631166,
 'Nebraska.csv': 1821.2372447477753,
 'Nevada.csv': 615.1291338014304,
 'New Hampshi

In [30]:
all_other_roads = pd.DataFrame({'state_name':[i.replace('.csv','') for i in output_roads.keys()], 'sum_other_distance':output_roads.values()})
all_other_roads.head()

Unnamed: 0,state_name,sum_other_distance
0,Alabama,2055.135931
1,Arizona,1519.798416
2,Arkansas,2121.400732
3,California,4664.484637
4,Colorado,2022.606169


In [45]:
distance_by_state = distance_by_state.merge(all_other_roads, how ='left', on='state_name').dropna()
distance_by_state

Unnamed: 0,state_name,sum_interstate_distance,sum_highway_distance,sum_other_distance
0,Alabama,36.236478,84.315171,2055.135931
2,Arizona,39.706072,37.897351,1519.798416
3,Arkansas,24.131938,86.258683,2121.400732
4,California,81.544651,46.537062,4664.484637
5,Colorado,34.757985,105.259867,2022.606169
6,Connecticut,11.909521,11.266595,471.569466
7,Delaware,1.390199,7.388483,207.438202
8,District of Columbia,0.396204,0.821816,20.552629
9,Florida,46.610764,111.016011,3012.123448
10,Georgia,39.589953,124.396675,2834.434202


In [46]:
distance_by_state.to_csv('road_distance_by_state.csv', index=False)