In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Read the CSV file
df = pd.read_csv('2024-04-30T15-00_20-00_MononElmStreetNB.csv', parse_dates=['capture_ts'], index_col='capture_ts', date_format='ISO8601').drop(columns=['detection_id'])

### Find out, how many different objects have been tracked and plot the distribution of datapoint count per object

In [None]:
# Show how many different object ids are in the data
print(f'There are {df["object_id"].nunique()} unique object_ids in the data')

# Show a histogram of the number of data points per object id
object_hist = df['object_id'].value_counts()

# Use the describe method to get some statistics on the object_hist data
print(object_hist.describe())

# Plot object_hist as a histogram
object_hist.plot.hist(bins=200)


In [None]:
# Only keep the 99 percentiles of the histogram data
object_hist_95 = object_hist[object_hist < object_hist.quantile(0.95)]

object_hist_95.plot.hist(bins=200)

In [None]:
# Plot the 95 percentile again with a log scale on the y-axis
object_hist_95.plot.hist(bins=200, logy=True)

In [None]:
# Do the same plot as above but with matplotlib
plt.hist(object_hist_95, bins=200, log=True)
plt.show()

### Visualize traffic over time

In [None]:
# Plot object count over time (count unique object_id per minute)
object_count = df['object_id'].resample('1min').nunique()
object_count.plot()


In [None]:
# Smooth the object count data by applying a rolling mean (e.g. windows size of 10)
object_count.rolling(window=10).mean().plot()

In [None]:
# Plot the same as above, but differentiate between the different class_ids (use different colors and correct labels; 0: person, 1: bicycle, 2: car)
CLASSNAME_BY_ID = {0: 'person', 1: 'bicycle', 2: 'car'}

for class_id in df['class_id'].unique():
    class_count = df[df['class_id'] == class_id]['object_id'].resample('1min').nunique()  
    class_count.rolling(window=10).mean().plot(label=CLASSNAME_BY_ID[class_id])
plt.ylabel('avg object count per minute')
plt.legend()
