In [1]:
import pandas as pd

The previous notebook presented the traffic data with each row corresponding to an hour-long reading of traffic volume from one station in one direction. The volume counts are split into lanes when that information is available. With a handful of exceptions, volume data from before 2019 lacks lane information. Lane information will be omitted for this project.

In [2]:
volume_file_name = 'Data/volume_data.pkl'
one_way_volume_df = pd.read_pickle(volume_file_name)

In [3]:
pd.set_option('display.max_columns', None)
one_way_volume_df

Unnamed: 0,station_id,dir_of_travel,datetime,lane_x,lane_1,lane_2,lane_3,lane_4,total
0,8,1,2012-07-02 00:00:00,0,0,0,0,0,0
1,8,1,2012-07-02 01:00:00,0,0,0,0,0,0
2,8,1,2012-07-02 02:00:00,0,0,0,0,0,0
3,8,1,2012-07-02 03:00:00,2,0,0,0,0,2
4,8,1,2012-07-02 04:00:00,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...
25576843,6224,7,2021-02-27 19:00:00,0,235,7,0,0,242
25576844,6224,7,2021-02-27 20:00:00,0,224,6,0,0,230
25576845,6224,7,2021-02-27 21:00:00,0,165,3,0,0,168
25576846,6224,7,2021-02-27 22:00:00,0,130,1,0,0,131


In [4]:
one_way_volume_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25576848 entries, 0 to 25576847
Data columns (total 9 columns):
 #   Column         Dtype         
---  ------         -----         
 0   station_id     category      
 1   dir_of_travel  category      
 2   datetime       datetime64[ns]
 3   lane_x         int16         
 4   lane_1         int16         
 5   lane_2         int16         
 6   lane_3         int16         
 7   lane_4         int16         
 8   total          int16         
dtypes: category(2), datetime64[ns](1), int16(6)
memory usage: 536.6 MB


In [5]:
one_way_volume_df.describe()

Unnamed: 0,lane_x,lane_1,lane_2,lane_3,lane_4,total
count,25576850.0,25576850.0,25576850.0,25576850.0,25576850.0,25576850.0
mean,563.448,13.67444,6.562003,0.589202,0.2335735,584.2736
std,1014.878,75.89667,55.44583,15.46331,10.24269,1011.423
min,0.0,0.0,0.0,0.0,0.0,0.0
25%,17.0,0.0,0.0,0.0,0.0,30.0
50%,124.0,0.0,0.0,0.0,0.0,155.0
75%,619.0,0.0,0.0,0.0,0.0,660.0
max,10290.0,5196.0,3841.0,1841.0,1636.0,10290.0


In [6]:
one_way_volume_df.drop(['lane_x', 'lane_1', 'lane_2', 'lane_3', 'lane_4'], axis=1, inplace=True)
one_way_volume_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25576848 entries, 0 to 25576847
Data columns (total 4 columns):
 #   Column         Dtype         
---  ------         -----         
 0   station_id     category      
 1   dir_of_travel  category      
 2   datetime       datetime64[ns]
 3   total          int16         
dtypes: category(2), datetime64[ns](1), int16(1)
memory usage: 292.7 MB


In addition to dropping the lane counts, we combine the two directions.

In [7]:
two_way_volume_df = one_way_volume_df.groupby(['station_id', 'datetime'], as_index=False, sort=False, observed=True).sum()
two_way_volume_df

Unnamed: 0,station_id,datetime,total
0,8,2012-07-02 00:00:00,4
1,8,2012-07-02 01:00:00,0
2,8,2012-07-02 02:00:00,0
3,8,2012-07-02 03:00:00,6
4,8,2012-07-02 04:00:00,0
...,...,...,...
12791851,6224,2021-02-27 19:00:00,505
12791852,6224,2021-02-27 20:00:00,480
12791853,6224,2021-02-27 21:00:00,358
12791854,6224,2021-02-27 22:00:00,231


Finally, we drop any station that isn't currently active.

In [8]:
current_stations_file = 'Data/Current_CC_StationList.xlsx'
current_df = pd.read_excel(current_stations_file)
current_stations = current_df['Continuous Number'].values

In [9]:
current_df

Unnamed: 0,Continuous Number,Sequence Number,Collection Type,Route,Pos Dir Dir,Pos Lanes,Neg Lanes,Urban/Rural,Functional Class,County Name,Location Text
0,26,5707,WIM,I-35,North,2,2,Rural,Interstate,Steele,"3.5 MI N OF TH30, N OF ELLENDALE"
1,27,9830,WIM,TH 60,East,2,2,Rural,Principal Arterial - Other,Watonwan,"0.7 MI W OF W JCT OF TH4, SW OF ST JAMES"
2,28,22993,"ATR Volume, Speed, Class",MSAS 114,East,1,1,Urban,Minor Arterial,Stearns,W OF 20TH AVE N IN ST CLOUD
3,29,69377,WIM,TH 53,North,2,2,Rural,Principal Arterial - Other,Saint Louis,"3 MI S OF CSAH59 (MELRUDE RD), S OF EVELETH"
4,30,6757,WIM,TH 61,North,2,2,Rural,Principal Arterial - Other,Lake,"SW OF CSAH25, SW OF TWO HARBORS"
...,...,...,...,...,...,...,...,...,...,...,...
99,4820,4820,"ATR Volume, Speed, Length",I-90,East,2,2,Rural,Interstate,Rock,"W OF CSAH 4, W OF BEAVER CREEK"
100,4910,4910,"ATR Volume, Speed, Length",TH 53,North,2,2,Rural,Principal Arterial - Other,Saint Louis,"N OF CSAH8 (DULUTH SAINT VINCENT RD), NW OF HE..."
101,5984,5984,"ATR Volume, Speed, Length",TH 61,North,2,2,Rural,Principal Arterial - Other,Wabasha,".9 MI SE OF CSAH18, S OF KELLOGG"
102,6067,6067,"ATR Volume, Speed, Length",TH 10,East,2,2,Rural,Principal Arterial - Other,Wadena,".3 MI E OF 141ST AVE, W OF VERNDALE"


In [10]:
current_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 104 entries, 0 to 103
Data columns (total 11 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   Continuous Number  104 non-null    int64 
 1   Sequence Number    104 non-null    int64 
 2   Collection Type    104 non-null    object
 3   Route              104 non-null    object
 4   Pos Dir Dir        104 non-null    object
 5   Pos Lanes          104 non-null    int64 
 6   Neg Lanes          104 non-null    int64 
 7   Urban/Rural        104 non-null    object
 8   Functional Class   104 non-null    object
 9   County Name        104 non-null    object
 10  Location Text      104 non-null    object
dtypes: int64(4), object(7)
memory usage: 9.1+ KB


In [11]:
active_stations = two_way_volume_df.station_id.isin(current_stations)
two_way_volume_df = two_way_volume_df[active_stations]
two_way_volume_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 11267376 entries, 135888 to 12791855
Data columns (total 3 columns):
 #   Column      Dtype         
---  ------      -----         
 0   station_id  category      
 1   datetime    datetime64[ns]
 2   total       int16         
dtypes: category(1), datetime64[ns](1), int16(1)
memory usage: 204.2 MB


In [12]:
two_way_volume_df.reset_index(drop=True, inplace=True)
two_way_volume_df

Unnamed: 0,station_id,datetime,total
0,26,2007-01-01 00:00:00,92
1,26,2007-01-01 01:00:00,69
2,26,2007-01-01 02:00:00,70
3,26,2007-01-01 03:00:00,55
4,26,2007-01-01 04:00:00,93
...,...,...,...
11267371,6224,2021-02-27 19:00:00,505
11267372,6224,2021-02-27 20:00:00,480
11267373,6224,2021-02-27 21:00:00,358
11267374,6224,2021-02-27 22:00:00,231


In [13]:
two_way_volume_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11267376 entries, 0 to 11267375
Data columns (total 3 columns):
 #   Column      Dtype         
---  ------      -----         
 0   station_id  category      
 1   datetime    datetime64[ns]
 2   total       int16         
dtypes: category(1), datetime64[ns](1), int16(1)
memory usage: 118.2 MB


Finally the two-way frame is pivoted so that all columns are time series, with one column per station.

In [14]:
two_way_volume_df = two_way_volume_df.pivot(index='datetime', columns='station_id', values='total')
two_way_volume_df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 167904 entries, 2002-01-01 00:00:00 to 2021-02-28 23:00:00
Columns: 101 entries, 26 to 6224
dtypes: float64(101)
memory usage: 130.7 MB


In [15]:
two_way_volume_df

station_id,26,27,28,29,30,31,32,33,34,35,37,38,39,40,41,42,43,44,45,46,47,48,49,51,103,110,164,175,179,187,191,198,199,200,204,208,209,210,211,212,214,218,219,220,221,222,223,225,227,228,229,230,231,232,233,301,303,305,309,315,321,326,329,335,336,341,342,351,352,353,354,365,381,382,384,386,388,389,390,400,402,405,407,410,420,425,458,460,464,495,949,1335,1604,1940,3467,3790,3801,4820,4910,5984,6224
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1
2002-01-01 00:00:00,,,166.0,,,,,,,,,,,,,,,,,,,,,13.0,822.0,116.0,66.0,120.0,35.0,152.0,405.0,32.0,36.0,287.0,177.0,261.0,8.0,12.0,22.0,306.0,3.0,5.0,36.0,17.0,63.0,11.0,73.0,32.0,72.0,,,,,,,4144.0,2869.0,1489.0,2554.0,1824.0,2798.0,3251.0,2351.0,486.0,1405.0,1056.0,,136.0,495.0,198.0,813.0,397.0,73.0,190.0,410.0,86.0,231.0,646.0,170.0,2.0,126.0,1874.0,358.0,73.0,541.0,507.0,129.0,128.0,138.0,,,,,,,,,,,,
2002-01-01 01:00:00,,,157.0,,,,,,,,,,,,,,,,,,,,,11.0,877.0,58.0,68.0,157.0,46.0,157.0,568.0,23.0,18.0,305.0,173.0,258.0,14.0,16.0,18.0,332.0,10.0,9.0,28.0,17.0,52.0,10.0,61.0,31.0,63.0,,,,,,,4761.0,3165.0,1453.0,3173.0,2230.0,3157.0,4074.0,2947.0,545.0,1549.0,1292.0,,178.0,526.0,210.0,912.0,367.0,112.0,283.0,411.0,111.0,259.0,654.0,168.0,2.0,121.0,2322.0,378.0,56.0,523.0,440.0,111.0,105.0,103.0,,,,,,,,,,,,
2002-01-01 02:00:00,,,97.0,,,,,,,,,,,,,,,,,,,,,5.0,472.0,49.0,57.0,83.0,19.0,92.0,395.0,20.0,17.0,242.0,131.0,179.0,1.0,16.0,19.0,130.0,0.0,7.0,16.0,8.0,30.0,3.0,34.0,13.0,44.0,,,,,,,2849.0,1773.0,773.0,2207.0,1362.0,1773.0,2385.0,1852.0,332.0,865.0,704.0,,110.0,290.0,141.0,517.0,218.0,67.0,164.0,205.0,58.0,148.0,419.0,72.0,1.0,58.0,1444.0,212.0,35.0,326.0,235.0,77.0,68.0,78.0,,,,,,,,,,,,
2002-01-01 03:00:00,,,57.0,,,,,,,,,,,,,,,,,,,,,1.0,353.0,48.0,39.0,81.0,10.0,61.0,255.0,14.0,2.0,160.0,73.0,135.0,2.0,5.0,12.0,90.0,3.0,11.0,11.0,8.0,31.0,2.0,28.0,9.0,29.0,,,,,,,1736.0,1069.0,461.0,1320.0,825.0,1069.0,1255.0,1146.0,206.0,509.0,405.0,,38.0,173.0,85.0,310.0,173.0,32.0,94.0,134.0,25.0,88.0,240.0,49.0,3.0,41.0,870.0,131.0,12.0,213.0,122.0,29.0,40.0,35.0,,,,,,,,,,,,
2002-01-01 04:00:00,,,39.0,,,,,,,,,,,,,,,,,,,,,2.0,296.0,59.0,28.0,75.0,13.0,43.0,168.0,4.0,2.0,142.0,74.0,101.0,1.0,2.0,8.0,82.0,0.0,3.0,5.0,1.0,17.0,2.0,30.0,9.0,35.0,,,,,,,1121.0,785.0,324.0,916.0,579.0,908.0,822.0,734.0,133.0,319.0,209.0,,29.0,122.0,85.0,219.0,216.0,31.0,93.0,77.0,18.0,72.0,182.0,25.0,4.0,38.0,583.0,60.0,18.0,120.0,63.0,21.0,29.0,33.0,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-02-28 19:00:00,712.0,,,,,162.0,1234.0,129.0,,,,858.0,219.0,,,1415.0,584.0,23.0,396.0,,,,,,,723.0,,,195.0,719.0,,74.0,34.0,,309.0,,21.0,29.0,,389.0,8.0,27.0,,61.0,,61.0,,75.0,262.0,26.0,31.0,80.0,10.0,,48.0,3598.0,3953.0,2383.0,,,,,,,2205.0,,,,,,,1126.0,,2254.0,976.0,164.0,541.0,876.0,193.0,,,2550.0,,81.0,1032.0,566.0,292.0,119.0,365.0,807.0,1542.0,471.0,,138.0,,935.0,713.0,,,,
2021-02-28 20:00:00,549.0,,,,,119.0,763.0,51.0,,,,629.0,173.0,,,1090.0,384.0,7.0,276.0,,,,,,,499.0,,,67.0,465.0,,57.0,13.0,,228.0,,16.0,23.0,,232.0,3.0,13.0,,37.0,,27.0,,41.0,204.0,11.0,17.0,52.0,3.0,,34.0,3014.0,3108.0,1750.0,,,,,,,1797.0,,,,,,,955.0,,1947.0,722.0,127.0,432.0,724.0,145.0,,,2093.0,,68.0,737.0,404.0,188.0,75.0,226.0,601.0,1176.0,350.0,,86.0,,692.0,581.0,,,,
2021-02-28 21:00:00,386.0,,,,,79.0,483.0,50.0,,,,531.0,115.0,,,857.0,249.0,6.0,207.0,,,,,,,306.0,,,159.0,299.0,,25.0,17.0,,124.0,,7.0,12.0,,132.0,5.0,12.0,,18.0,,16.0,,39.0,155.0,6.0,12.0,23.0,7.0,,23.0,2468.0,2475.0,1309.0,,,,,,,1373.0,,,,,,,647.0,,1801.0,555.0,64.0,278.0,570.0,87.0,,,1605.0,,45.0,557.0,273.0,135.0,47.0,160.0,408.0,851.0,230.0,,57.0,,456.0,388.0,,,,
2021-02-28 22:00:00,257.0,,,,,46.0,320.0,31.0,,,,379.0,93.0,,,663.0,140.0,4.0,140.0,,,,,,,209.0,,,116.0,192.0,,18.0,10.0,,104.0,,2.0,8.0,,109.0,0.0,5.0,,10.0,,10.0,,21.0,105.0,5.0,8.0,18.0,2.0,,26.0,1929.0,1940.0,827.0,,,,,,,1359.0,,,,,,,406.0,,1649.0,377.0,45.0,167.0,409.0,59.0,,,1179.0,,24.0,327.0,165.0,67.0,33.0,132.0,280.0,486.0,180.0,,34.0,,294.0,270.0,,,,


In [16]:
two_way_volume_df.index.freq

In [17]:
two_way_volume_df = two_way_volume_df.asfreq('H')

In [18]:
two_way_volume_df.index.freq

<Hour>

In [27]:
two_way_volume_df.describe()

station_id,26,27,28,29,30,31,32,33,34,35,37,38,39,40,41,42,43,44,45,46,47,48,49,51,103,110,164,175,179,187,191,198,199,200,204,208,209,210,211,212,214,218,219,220,221,222,223,225,227,228,229,230,231,232,233,301,303,305,309,315,321,326,329,335,336,341,342,351,352,353,354,365,381,382,384,386,388,389,390,400,402,405,407,410,420,425,458,460,464,495,949,1335,1604,1940,3467,3790,3801,4820,4910,5984,6224
count,104520.0,36648.0,149016.0,91176.0,20088.0,77520.0,61128.0,97056.0,95736.0,104568.0,4320.0,87360.0,87840.0,70128.0,66792.0,70752.0,68328.0,51240.0,53472.0,37680.0,5712.0,24456.0,24336.0,152640.0,127584.0,148152.0,165528.0,150504.0,151608.0,160464.0,156432.0,166128.0,164016.0,140904.0,154584.0,147792.0,166248.0,165792.0,147408.0,166512.0,155328.0,147408.0,153600.0,165912.0,149016.0,164376.0,115536.0,163248.0,149016.0,76032.0,75672.0,73968.0,74544.0,71208.0,65424.0,148944.0,148848.0,149760.0,148416.0,148416.0,148416.0,148416.0,148416.0,147552.0,149616.0,150384.0,130896.0,160056.0,162048.0,152808.0,138768.0,165360.0,97944.0,156168.0,167256.0,163752.0,162480.0,149784.0,167016.0,136728.0,143952.0,149784.0,144288.0,167520.0,166800.0,162216.0,167784.0,167424.0,159936.0,18048.0,16032.0,10680.0,10944.0,18216.0,8736.0,7752.0,12504.0,15864.0,15864.0,16080.0,16248.0
mean,823.911232,241.110702,381.588085,333.606278,338.690114,221.716164,1174.177496,203.219245,127.452735,232.416179,1250.282639,1235.480552,429.716314,2260.163758,18.738621,1293.280755,673.373932,39.733236,513.389194,17.003105,777.965511,15.519096,433.561391,20.231473,1875.224801,1060.105061,378.653756,933.931802,91.538118,866.575288,1797.113794,109.686001,60.910405,1753.402338,427.829523,1272.211426,25.919295,47.583116,155.13964,900.340564,16.618066,35.730415,144.886172,80.979531,124.687282,71.701812,179.366977,117.147003,383.140529,31.251289,44.392444,103.098137,12.69351,14.467167,55.091174,6563.187587,5552.757786,4215.634495,3802.802063,4583.960894,6619.515409,5563.775018,3788.897875,1832.631784,3269.762532,2849.902643,4751.685231,580.102189,1434.571121,922.475643,3454.225837,1641.238661,448.706005,1191.248053,1271.019204,275.205628,800.074095,1783.754587,416.289733,77.326963,845.647445,3716.793863,751.435636,124.896054,1194.162836,935.84541,367.780146,230.926235,840.977666,912.176031,1172.179516,303.415356,334.972588,161.825209,381.082761,781.865841,591.790147,468.458081,262.303328,161.137811,482.407742
std,559.375188,173.731342,297.114162,257.386606,278.292391,162.298123,798.815427,147.640079,94.548808,168.91895,864.506523,898.620162,303.422281,1496.455941,17.920011,843.115232,505.498468,33.92415,377.628136,13.115321,687.808357,12.811987,312.886913,25.444168,1254.541245,844.319479,279.572572,675.917013,72.547895,669.144061,1210.05328,77.363442,45.917,1142.668035,371.709436,819.336828,19.068106,35.324599,126.290995,679.794849,15.833076,28.483303,111.042497,71.996655,98.045274,82.863566,139.532882,90.078375,268.480025,26.660739,33.100503,76.261917,12.524174,13.566869,42.40245,3905.075709,3274.97249,2718.341657,2078.486155,2888.281694,3809.000746,3424.272999,2178.196086,1208.748019,2254.041004,1934.767326,2920.360088,401.53211,937.874072,602.250096,2213.196849,1039.967723,307.891991,799.976531,879.640645,194.501457,525.719478,1299.418985,319.044177,59.649373,709.174416,2396.984896,528.498666,92.614637,816.591108,645.254447,270.291585,173.44754,701.674081,658.026981,872.551437,213.149328,226.234683,121.7445,262.970807,570.748119,410.387708,338.220927,204.005255,129.198108,326.093422
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,64.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,15.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,127.0,0.0,54.0,6.0,133.0,0.0,0.0,98.0,0.0,65.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,28.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,22.0,0.0,0.0,0.0,0.0,0.0
25%,298.0,70.0,92.0,83.0,72.75,56.0,348.0,52.0,32.0,60.0,394.0,395.0,134.0,768.0,4.0,432.0,149.0,7.0,126.0,5.0,192.0,3.0,130.0,3.0,646.0,208.0,101.0,300.0,22.0,225.0,611.0,32.0,14.0,647.0,114.0,463.0,8.0,12.0,39.0,262.0,3.0,10.0,38.0,17.0,31.0,13.0,46.0,26.0,123.0,6.0,12.0,32.0,3.0,2.0,15.0,2762.0,2273.0,1316.0,1780.0,1564.75,2563.0,2186.0,1583.0,623.0,1068.0,877.0,1601.0,183.0,478.0,315.0,1167.75,580.0,155.0,383.0,371.0,80.0,258.0,564.0,84.0,19.0,125.0,1256.0,203.0,31.0,351.0,267.0,94.0,57.0,133.0,242.0,296.0,100.0,132.0,39.0,116.0,233.0,184.0,134.0,55.0,37.0,167.0
50%,789.0,233.0,348.0,326.0,319.0,227.0,1237.5,194.0,124.0,232.0,1229.0,1121.0,429.0,2329.0,15.0,1336.0,686.0,35.0,530.0,16.0,715.0,14.0,398.0,15.0,1962.0,1005.0,382.0,887.0,95.0,844.0,1844.5,109.0,59.0,1775.5,367.0,1298.0,25.0,48.0,146.0,880.0,13.0,33.0,140.0,71.0,117.0,50.0,170.0,112.0,370.0,28.0,45.0,99.0,11.0,12.0,53.0,6933.0,5950.0,4568.0,4291.0,4927.0,7474.0,5919.0,4196.0,1922.0,3330.0,2861.5,5249.0,607.0,1526.0,968.0,3561.5,1797.0,450.0,1225.0,1356.0,286.0,871.0,1571.0,435.0,77.0,778.0,3770.0,815.0,126.0,1278.0,1048.0,382.0,233.0,791.0,914.0,1142.5,285.0,296.0,155.0,366.0,743.0,575.0,450.0,250.0,145.0,451.0
75%,1207.0,368.0,602.0,503.0,521.0,348.0,1799.25,334.0,202.0,365.0,1889.5,1895.0,648.0,3292.0,28.0,1948.0,1057.0,64.0,783.0,27.0,1113.0,25.0,659.0,29.0,2806.0,1845.0,573.0,1371.0,141.0,1286.0,2656.0,173.0,99.0,2566.0,618.0,1881.0,41.0,75.0,231.0,1309.0,25.0,54.0,221.0,118.0,191.0,96.0,272.0,190.0,578.0,49.0,68.0,151.0,19.0,23.0,83.0,9776.0,8295.0,6430.0,5499.0,6953.0,10011.0,8569.25,5792.0,2745.0,4745.0,4299.0,7321.25,863.0,2181.0,1375.0,5233.0,2439.0,662.0,1809.0,1963.0,416.0,1211.0,2741.0,649.0,119.0,1485.0,5783.0,1168.0,189.0,1915.0,1395.0,546.0,361.0,1495.0,1453.0,1838.0,453.0,493.0,255.0,590.0,1209.0,912.0,723.0,431.0,257.0,714.0
max,4225.0,1098.0,1756.0,1774.0,1847.0,894.0,4005.0,665.0,577.0,936.0,3313.0,4579.0,1431.0,6849.0,177.0,3675.0,2422.0,270.0,1841.0,87.0,3948.0,77.0,1555.0,5231.0,7420.0,3335.0,1530.0,8759.0,1823.0,4727.0,7427.0,890.0,379.0,10956.0,2319.0,5681.0,182.0,266.0,3303.0,9408.0,155.0,2512.0,4346.0,947.0,609.0,901.0,964.0,874.0,1557.0,187.0,392.0,559.0,417.0,459.0,393.0,14113.0,12233.0,16512.0,8453.0,12686.0,14668.0,13373.0,10865.0,16901.0,9761.0,14654.0,10634.0,4276.0,4249.0,5776.0,10378.0,6495.0,1767.0,6138.0,4991.0,4049.0,2866.0,6468.0,1827.0,773.0,2963.0,9871.0,9843.0,1027.0,8457.0,2802.0,3906.0,3939.0,5243.0,2806.0,4533.0,1349.0,1553.0,542.0,1139.0,2607.0,2187.0,1628.0,912.0,740.0,1676.0


In [20]:
two_way_volume_df.columns = two_way_volume_df.columns.astype('str')

In [21]:
two_way_volume_file_name = 'Data/two_way_volume_data.pkl'
two_way_volume_df.to_pickle(two_way_volume_file_name)

In [23]:
len(two_way_volume_df.index)

167976

In [29]:
two_way_volume_df.count()[two_way_volume_df.count()==max(two_way_volume_df.count())]

station_id
458    167784
dtype: int64

In [38]:
len(two_way_volume_df[two_way_volume_df.T.count() == 0])

72