In [27]:
import pandas as pd
import datetime
import matplotlib.pyplot as plt

In [5]:
# Read the Excel file into a dictionary of data frames, with sheet names as keys
xls = pd.read_excel('Data Collection/TurningMovementCounts.xlsx', sheet_name=None)
xls

{'7800':          Time  EBL  EBT  EBR  SBL  SBT
 0    00:00:00    2    5    2   12    7
 1    00:15:00    2    4    0    9    7
 2    00:30:00    3    1    1    0    3
 3    00:45:00    0    1    1    3    5
 4    01:00:00    4    6    0    9    5
 ..        ...  ...  ...  ...  ...  ...
 955  22:45:00    8    8    1   14   35
 956  23:00:00   12    5    3    8   10
 957  23:15:00    6    1    1   17   14
 958  23:30:00    5    6    2    9   13
 959  23:45:00    4    6    1    3   18
 
 [960 rows x 6 columns],
 '7801':          Time  EBL  EBT  EBR  WBL  WBT  WBR  NBL  NBT  NBR  SBL  SBT  SBR
 0    00:00:00    2    1    0    1    0    0    1    5    0    0   20    7
 1    00:15:00    2    0    0    0    0    0    2    5    0    0   22    5
 2    00:30:00    2    0    0    0    0    0    0    8    0    0   11   10
 3    00:45:00    1    0    0    2    1    0    0    4    0    0   16    3
 4    01:00:00    1    1    0    0    0    0    0    2    0    0   17    4
 ..        ...  ...  ...  .

In [7]:
# Initialize an empty list to store modified data frames
dfs = []

# Iterate through each sheet
for sheet_name, df in xls.items():
    # Add a new column "ID" with the sheet name
    df['ID'] = sheet_name
    # Add the data frame to the list
    dfs.append(df)
dfs

[         Time  EBL  EBT  EBR  SBL  SBT    ID
 0    00:00:00    2    5    2   12    7  7800
 1    00:15:00    2    4    0    9    7  7800
 2    00:30:00    3    1    1    0    3  7800
 3    00:45:00    0    1    1    3    5  7800
 4    01:00:00    4    6    0    9    5  7800
 ..        ...  ...  ...  ...  ...  ...   ...
 955  22:45:00    8    8    1   14   35  7800
 956  23:00:00   12    5    3    8   10  7800
 957  23:15:00    6    1    1   17   14  7800
 958  23:30:00    5    6    2    9   13  7800
 959  23:45:00    4    6    1    3   18  7800
 
 [960 rows x 7 columns],
          Time  EBL  EBT  EBR  WBL  WBT  WBR  NBL  NBT  NBR  SBL  SBT  SBR  \
 0    00:00:00    2    1    0    1    0    0    1    5    0    0   20    7   
 1    00:15:00    2    0    0    0    0    0    2    5    0    0   22    5   
 2    00:30:00    2    0    0    0    0    0    0    8    0    0   11   10   
 3    00:45:00    1    0    0    2    1    0    0    4    0    0   16    3   
 4    01:00:00    1    1    0  

In [36]:
# Concatenate all data frames along columns, filling missing values with NA
result = pd.concat(dfs, ignore_index=True)


In [40]:
longdata = result.drop_duplicates().\
    melt(id_vars=['ID', 'Time'], var_name='Movement', value_name='Volume')

In [39]:
# Define a function to determine the value of the "NBSB" column
def determine_direction(Movement	):
    if 'N' in Movement	:
        return 'northbound'
    elif 'S' in Movement	:
        return 'southbound'
    else:
        return 'other'

In [47]:
longdata['NBSB'] = longdata['Movement'].apply(determine_direction)
cleandata = longdata.groupby(['ID', 'Time', 'NBSB'])['Volume'].\
    mean().\
    reset_index().\
    dropna(subset=['Volume'])

In [48]:
am_data = cleandata[(cleandata['Time'] >= datetime.time(6, 0)) & (cleandata['Time'] <= datetime.time(9, 0))]
md_data = cleandata[(cleandata['Time'] >= datetime.time(9, 0)) & (cleandata['Time'] <= datetime.time(15, 0))]
pm_data = cleandata[(cleandata['Time'] >= datetime.time(15, 0)) & (cleandata['Time'] <= datetime.time(19, 0))]

In [59]:
am_data

Unnamed: 0,ID,Time,NBSB,Volume
73,7800,06:00:00,other,5.200000
74,7800,06:00:00,southbound,17.500000
76,7800,06:15:00,other,4.400000
77,7800,06:15:00,southbound,24.450000
79,7800,06:30:00,other,4.700000
...,...,...,...,...
3850,7819,08:45:00,other,21.888889
3851,7819,08:45:00,southbound,58.740741
3852,7819,09:00:00,northbound,43.740741
3853,7819,09:00:00,other,18.288889


In [65]:
# Assuming your DataFrame is named df
# Convert 'Time' column to datetime
# df['Time'] = pd.to_datetime(df['Time'])

# Plot volume over time
plt.figure(figsize=(10, 6))
for nbsb, data in am_data.groupby('NBSB'):
    plt.plot(data['Time'], data['Volume'], label=nbsb)

plt.xlabel('Time')
plt.ylabel('Volume')
plt.title('Volume Over Time')
plt.legend()
plt.show()


TypeError: <class 'datetime.time'> is not convertible to datetime, at position 0

In [56]:
am_data.groupby(['ID', 'NBSB'])['Volume'].\
    sum().\
    reset_index().\
    pivot(index='ID', columns='NBSB', values='Volume').\
    drop(columns='other')

NBSB,northbound,southbound
ID,Unnamed: 1_level_1,Unnamed: 2_level_1
7800,,483.15
7801,1260.962963,618.148148
7802,2514.666667,669.777778
7803,873.083333,468.291667
7804,1283.296296,697.814815
7805,1356.074074,863.333333
7811,1221.518519,693.777778
7812,1293.222222,
7813,1297.740741,655.037037
7814,1077.555556,731.777778


In [57]:
md_data.groupby(['ID', 'NBSB'])['Volume'].\
    sum().\
    reset_index().\
    pivot(index='ID', columns='NBSB', values='Volume').\
    drop(columns='other')

NBSB,northbound,southbound
ID,Unnamed: 1_level_1,Unnamed: 2_level_1
7800,,1336.1
7801,1318.740741,1453.62963
7802,2732.0,1422.851852
7803,1191.0,873.488095
7804,1622.703704,1442.777778
7805,1866.814815,1586.185185
7811,1748.592593,1601.259259
7812,2150.777778,
7813,2017.62963,1720.592593
7814,2107.111111,2162.62963


In [58]:
pm_data.groupby(['ID', 'NBSB'])['Volume'].\
    sum().\
    reset_index().\
    pivot(index='ID', columns='NBSB', values='Volume').\
    drop(columns='other')

NBSB,northbound,southbound
ID,Unnamed: 1_level_1,Unnamed: 2_level_1
7800,,1439.3
7801,773.592593,2206.0
7802,2017.111111,2122.407407
7803,959.208333,1381.458333
7804,1216.148148,1823.925926
7805,1741.888889,1654.703704
7811,1288.481481,1609.333333
7812,1656.722222,
7813,1498.962963,1614.888889
7814,1455.074074,1887.925926
