In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm

In [33]:
object_data = pd.read_csv('./2024_Jul_ob_count_v3.csv')

In [34]:
# Create array of category names as they appear in the detections data. See paper for details of each category.
categories = ['car', 'person', 'trotro', 'stall', 'truck', 'stove', 'motorcycle', 'vendor', 'lorry', 'umbrella', 'bus', 'trash', 'taxi', 'van', 'debris', 'loudspeaker', 'bowl', 'food', 'animal', 'bicycle']

# Column names in the data frame for the number of counts of each category type in an image.
count_cols = [cat+'_counts' for cat in categories]

super_count_cols = ['people'+'_counts', 'small_vehicles'+'_counts', 'two_wheelers'+'_counts', 'large_vehicles'+'_counts', 'refuse'+'_counts', 'market'+'_counts']

all_count_cols = count_cols + super_count_cols

vehicle_categories = ['car', 'trotro', 'truck', 'motorcycle', 'lorry', 'bus', 'taxi', 'van', 'bicycle']

# Define super categories
super_categories = {
    'people': ['person', 'vendor'],
    'small_vehicles': ['car', 'taxi', 'truck'],
    'two_wheelers': ['bicycle', 'motorcycle'],
    'large_vehicles': ['trotro', 'van', 'lorry', 'bus'],
    'refuse': ['trash', 'debris'],
    'market': ['umbrella', 'stall', 'bowl', 'food']
}

In [35]:
# Ensure datetime is in datetime format
object_data['datetime'] = pd.to_datetime(object_data['datetime_rectified'], format='%Y-%m-%d %H:%M:%S')

# Create additional time-related columns
object_data['hour'] = object_data['datetime'].dt.hour
object_data['day'] = object_data['datetime'].dt.dayofweek + 1  # +1 to match R's 1-indexing
object_data['week'] = object_data['datetime'].dt.isocalendar().week
object_data['year'] = object_data['datetime'].dt.year

# Split 'site_id_cam_angle' into 'site_id' and 'camera' columns
object_data[['site_id', 'camera']] = object_data['site_id_cam_angle'].str.split('_', expand=True)

# Fill missing values in 'camera' with 'single'
object_data['camera'].fillna('single', inplace=True)

# Filter data between specified dates
start_date = pd.Timestamp('2019-04-01')
end_date = pd.Timestamp('2024-04-01')
fixed_object_data = object_data[(object_data['datetime'] >= start_date) & (object_data['datetime'] <= end_date)]
fixed_object_data = fixed_object_data[fixed_object_data['view'] == 'clear']
# Display the first few rows of the new dataframe to verify
fixed_object_data.head()


Unnamed: 0,directory_name_rectified,site_id_cam_angle,view,image_name,datetime_rectified,date_rectified,animal_counts,bicycle_counts,bowl_counts,bus_counts,...,vendor_counts,directory_name_original,datetime_original,datetime,hour,day,week,year,site_id,camera
0,AD_01_03_2024_C22_S15,AD_right,clear,MFDC3070.JPG,2024-03-01 08:32:02,2024-03-01,0,0,0,0,...,0,AD_01_03_2024_C22_S15,2024-03-01 08:32:02,2024-03-01 08:32:02,8,5,9,2024,AD,right
1,AD_01_03_2024_C22_S15,AD_right,clear,MFDC3071.JPG,2024-03-01 08:37:02,2024-03-01,0,1,0,0,...,0,AD_01_03_2024_C22_S15,2024-03-01 08:37:02,2024-03-01 08:37:02,8,5,9,2024,AD,right
2,AD_01_03_2024_C22_S15,AD_right,clear,MFDC3072.JPG,2024-03-01 08:42:02,2024-03-01,0,0,0,0,...,0,AD_01_03_2024_C22_S15,2024-03-01 08:42:02,2024-03-01 08:42:02,8,5,9,2024,AD,right
3,AD_01_03_2024_C22_S15,AD_right,clear,MFDC3073.JPG,2024-03-01 08:47:02,2024-03-01,0,0,0,0,...,0,AD_01_03_2024_C22_S15,2024-03-01 08:47:02,2024-03-01 08:47:02,8,5,9,2024,AD,right
4,AD_01_03_2024_C22_S15,AD_right,clear,MFDC3074.JPG,2024-03-01 08:52:02,2024-03-01,0,0,0,0,...,0,AD_01_03_2024_C22_S15,2024-03-01 08:52:02,2024-03-01 08:52:02,8,5,9,2024,AD,right


In [36]:
# Sum counts for each super category
for super_cat, categories in super_categories.items():
    # Create a column for each supercategory by summing its categories
    fixed_object_data[super_cat + '_counts'] = fixed_object_data[[cat + '_counts' for cat in categories]].sum(axis=1)

In [22]:
# import pandas as pd

# # Ensure 'datetime_hour' is created properly
# fixed_object_data['datetime_hour'] = fixed_object_data['datetime'].dt.round('H')
# assert 'datetime_hour' in fixed_object_data.columns, "'datetime_hour' column is missing"

# # Print the first few rows of fixed_object_data to verify 'datetime_hour'
# print("fixed_object_data:")
# print(fixed_object_data.head())

# # Sum the counts within each hour for each camera at each site and include directory_name_rectified and camera info
# hourly_counts = fixed_object_data.groupby(['site_id', 'camera', 'datetime_hour', 'directory_name_rectified'])[all_count_cols].sum().reset_index()
# assert 'datetime_hour' in hourly_counts.columns, "'datetime_hour' column is missing after groupby"

# # Print the first few rows of hourly_counts to verify the groupby operation
# print("\nhourly_counts:")
# print(hourly_counts.head())

# # Define the aggregation functions for each column
# def aggregate_directories(x):
#     print("Aggregating directories:", x)
#     return '|'.join(sorted(set(x)))

# def aggregate_cameras(x):
#     print("Aggregating cameras:", x)
#     return ','.join(sorted(set(x)))

# # Group by the rounded 'datetime', 'site_id', then calculate the mean for each object category
# agg_dict = {col: 'mean' for col in all_count_cols}
# agg_dict['directory_name_rectified'] = aggregate_directories
# agg_dict['camera'] = aggregate_cameras

# print("Aggregation dictionary:")
# print(agg_dict)

# # Perform the aggregation step by step
# grouped = hourly_counts.groupby(['site_id', 'datetime_hour'])

# # Aggregate numeric columns
# hourly_averages = grouped.agg({col: 'mean' for col in all_count_cols}).reset_index()

# # Aggregate directory_name_rectified
# hourly_averages['directory_name_rectified'] = grouped['directory_name_rectified'].apply(aggregate_directories).values

# # Aggregate camera
# hourly_averages['camera'] = grouped['camera'].apply(aggregate_cameras).values

# assert 'datetime_hour' in hourly_averages.columns, "'datetime_hour' column is missing after aggregation"

# # Print the first few rows of hourly_averages to verify the aggregation
# print("\nhourly_averages:")
# print(hourly_averages.head())

# # Add the date column from the rounded 'datetime_hour'
# hourly_averages['date'] = hourly_averages['datetime_hour'].dt.date

# # Create additional time-related columns
# hourly_averages['hour'] = hourly_averages['datetime_hour'].dt.hour
# hourly_averages['day'] = hourly_averages['datetime_hour'].dt.dayofweek + 1  # +1 to match R's 1-indexing
# hourly_averages['week'] = hourly_averages['datetime_hour'].dt.isocalendar().week
# hourly_averages['year'] = hourly_averages['datetime_hour'].dt.year

# # Add the left and right camera indicator columns
# hourly_averages['left_cam'] = hourly_averages['camera'].apply(lambda x: 1 if 'left' in x else 0)
# hourly_averages['right_cam'] = hourly_averages['camera'].apply(lambda x: 1 if 'right' in x else 0)

# # Reorder the columns to match the requested format
# final_columns = ['datetime_hour', 'date', 'site_id', 'hour', 'day', 'week', 'year', 'directory_name_rectified', 'left_cam', 'right_cam'] + all_count_cols
# hourly_averages = hourly_averages[final_columns]

# # Rename 'datetime_hour' to 'datetime' to match the final requested column name
# hourly_averages.rename(columns={'datetime_hour': 'datetime', 'directory_name_rectified': 'directory_pair'}, inplace=True)

# # Display the first few rows of the new dataframe to verify
# print("\nfinal hourly_averages:")
# print(hourly_averages.head())


fixed_object_data:
  directory_name_rectified site_id_cam_angle   view    image_name  \
0    AD_01_03_2024_C22_S15          AD_right  clear  MFDC3070.JPG   
1    AD_01_03_2024_C22_S15          AD_right  clear  MFDC3071.JPG   
2    AD_01_03_2024_C22_S15          AD_right  clear  MFDC3072.JPG   
3    AD_01_03_2024_C22_S15          AD_right  clear  MFDC3073.JPG   
4    AD_01_03_2024_C22_S15          AD_right  clear  MFDC3074.JPG   

    datetime_rectified date_rectified  animal_counts  bicycle_counts  \
0  2024-03-01 08:32:02     2024-03-01              0               0   
1  2024-03-01 08:37:02     2024-03-01              0               1   
2  2024-03-01 08:42:02     2024-03-01              0               0   
3  2024-03-01 08:47:02     2024-03-01              0               0   
4  2024-03-01 08:52:02     2024-03-01              0               0   

   bowl_counts  bus_counts  ...  year  site_id  camera  people_counts  \
0            0           0  ...  2024       AD   right      

AttributeError: 'DataFrame' object has no attribute 'name'

In [37]:
# Step 7: Round the 'datetime' to the nearest hour
fixed_object_data['datetime_hour'] = fixed_object_data['datetime'].dt.round('H')

# Step 8: Sum the counts within each hour for each camera at each site
hourly_counts = fixed_object_data.groupby(['site_id', 'camera', 'datetime_hour'])[all_count_cols].sum().reset_index()

# Step 9: Group by the rounded 'datetime', 'date', and 'site_id', then calculate the mean for each object category
hourly_averages = hourly_counts.groupby(['site_id', 'datetime_hour'])[all_count_cols].mean().reset_index()

# Step 10: Add the date column from the rounded 'datetime_hour'
hourly_averages['date'] = hourly_averages['datetime_hour'].dt.date

# Create additional time-related columns
hourly_averages['hour'] = hourly_averages['datetime_hour'].dt.hour
hourly_averages['day'] = hourly_averages['datetime_hour'].dt.dayofweek + 1  # +1 to match R's 1-indexing
hourly_averages['week'] = hourly_averages['datetime_hour'].dt.isocalendar().week
hourly_averages['year'] = hourly_averages['datetime_hour'].dt.year


# Step 11: Reorder the columns to match the requested format
final_columns = ['datetime_hour', 'date', 'site_id', 'hour', 'day', 'week', 'year'] + all_count_cols
hourly_averages = hourly_averages[final_columns]

# Step 12: Rename 'datetime_hour' to 'datetime' to match the final requested column name
hourly_averages.rename(columns={'datetime_hour': 'datetime'}, inplace=True)

# Display the first few rows of the new dataframe to verify
hourly_averages.head()

Unnamed: 0,datetime,date,site_id,hour,day,week,year,car_counts,person_counts,trotro_counts,...,bowl_counts,food_counts,animal_counts,bicycle_counts,people_counts,small_vehicles_counts,two_wheelers_counts,large_vehicles_counts,refuse_counts,market_counts
0,2019-04-12 10:00:00,2019-04-12,AD,10,5,15,2019,9.0,32.0,3.0,...,0.0,0.0,0.0,0.0,32.0,15.0,3.0,4.0,0.0,0.0
1,2019-04-12 11:00:00,2019-04-12,AD,11,5,15,2019,228.0,219.0,30.5,...,0.5,0.0,0.0,0.5,219.5,290.0,16.5,36.0,0.0,23.0
2,2019-04-12 12:00:00,2019-04-12,AD,12,5,15,2019,314.0,230.5,49.5,...,0.5,0.0,0.0,2.5,232.0,400.0,17.5,58.0,1.0,26.5
3,2019-04-12 13:00:00,2019-04-12,AD,13,5,15,2019,371.5,192.5,43.5,...,1.0,0.0,0.0,0.5,195.0,482.0,13.0,58.5,2.0,24.5
4,2019-04-12 14:00:00,2019-04-12,AD,14,5,15,2019,351.5,205.0,60.5,...,0.5,0.0,0.0,1.0,206.5,432.0,21.0,67.0,0.0,20.0


In [13]:
from tqdm import tqdm
import pandas as pd
import numpy as np

# Create a function to determine the directory pair or single directory for each row
def get_directory_name(row):
    # Filter rows for the same site and datetime
    matching_rows = fixed_object_data[(fixed_object_data['site_id'] == row['site_id']) & 
                                      (fixed_object_data['datetime_hour'] == row['datetime'])]
    # Get unique directories and cameras
    unique_dirs = matching_rows['directory_name_rectified'].unique()
    unique_cameras = matching_rows['camera'].unique()
    
    if len(unique_dirs) == 1:
        return unique_dirs[0]  # Single camera or only one camera available
    else:
        return '|'.join(sorted(unique_dirs))  # Join directories with '|' to indicate pairs

# Initialize the tqdm progress bar
tqdm.pandas()

# Apply the function to create the new column with progress monitoring
hourly_averages['directory_pair'] = hourly_averages.progress_apply(get_directory_name, axis=1)

# Create indicator variables for left and right cameras
hourly_averages['left_cam'] = hourly_averages.progress_apply(
    lambda row: 1 if 'left' in fixed_object_data[(fixed_object_data['site_id'] == row['site_id']) & 
                                                 (fixed_object_data['datetime_hour'] == row['datetime'])]['camera'].unique() else 0,
    axis=1
)

hourly_averages['right_cam'] = hourly_averages.progress_apply(
    lambda row: 1 if 'right' in fixed_object_data[(fixed_object_data['site_id'] == row['site_id']) & 
                                                  (fixed_object_data['datetime_hour'] == row['datetime'])]['camera'].unique() else 0,
    axis=1
)


  0%|▏                                                                                                                                                             | 445/353738 [09:23<124:11:21,  1.27s/it]


KeyboardInterrupt: 

In [38]:
# Display the first few rows of the new dataframe to verify
hourly_averages_cleaned = hourly_averages.loc[:,~hourly_averages.columns.duplicated()].copy()
print(hourly_averages.columns)
print("\n")
print(hourly_averages_cleaned.columns)

Index(['datetime', 'date', 'site_id', 'hour', 'day', 'week', 'year',
       'car_counts', 'person_counts', 'trotro_counts', 'stall_counts',
       'truck_counts', 'stove_counts', 'motorcycle_counts', 'vendor_counts',
       'lorry_counts', 'umbrella_counts', 'bus_counts', 'trash_counts',
       'taxi_counts', 'van_counts', 'debris_counts', 'loudspeaker_counts',
       'bowl_counts', 'food_counts', 'animal_counts', 'bicycle_counts',
       'people_counts', 'small_vehicles_counts', 'two_wheelers_counts',
       'large_vehicles_counts', 'refuse_counts', 'market_counts'],
      dtype='object')


Index(['datetime', 'date', 'site_id', 'hour', 'day', 'week', 'year',
       'car_counts', 'person_counts', 'trotro_counts', 'stall_counts',
       'truck_counts', 'stove_counts', 'motorcycle_counts', 'vendor_counts',
       'lorry_counts', 'umbrella_counts', 'bus_counts', 'trash_counts',
       'taxi_counts', 'van_counts', 'debris_counts', 'loudspeaker_counts',
       'bowl_counts', 'food_counts', 

In [11]:
# hourly_averages.to_csv('hourly_averages_indicator.csv')

In [40]:
# hourly_averages.to_csv('hourly_averages_updated.csv')

hourly_averages = pd.read_csv('hourly_averages_updated.csv')

In [41]:
hourly_averages_old = pd.read_csv('dockerfiles/hourly_averages.csv')

In [42]:
hourly_averages_old.columns

Index(['Unnamed: 0', 'datetime', 'date', 'site_id', 'hour', 'day', 'week',
       'year', 'car_counts', 'person_counts', 'trotro_counts', 'stall_counts',
       'truck_counts', 'stove_counts', 'motorcycle_counts', 'vendor_counts',
       'lorry_counts', 'umbrella_counts', 'bus_counts', 'trash_counts',
       'taxi_counts', 'van_counts', 'debris_counts', 'loudspeaker_counts',
       'bowl_counts', 'food_counts', 'animal_counts', 'bicycle_counts',
       'people_counts', 'small_vehicles_counts', 'two_wheelers_counts',
       'large_vehicles_counts', 'refuse_counts', 'market_counts',
       'directory_pair'],
      dtype='object')

In [43]:
# Merge the dataframes
hourly_averages = hourly_averages.merge(
    hourly_averages_old[['date', 'site_id', 'hour', 'day', 'week', 'year', 'directory_pair']],
    on=['date', 'site_id', 'hour', 'day', 'week', 'year'],
    how='left'
)

In [46]:
hourly_averages.head(100)

Unnamed: 0.1,Unnamed: 0,datetime,date,site_id,hour,day,week,year,car_counts,person_counts,...,food_counts,animal_counts,bicycle_counts,people_counts,small_vehicles_counts,two_wheelers_counts,large_vehicles_counts,refuse_counts,market_counts,directory_pair
0,0,2019-04-12 10:00:00,2019-04-12,AD,10,5,15,2019,9.0,32.0,...,0.0,0.0,0.0,32.0,15.0,3.0,4.0,0.0,0.0,AD_12_04_2019_C14
1,1,2019-04-12 11:00:00,2019-04-12,AD,11,5,15,2019,228.0,219.0,...,0.0,0.0,0.5,219.5,290.0,16.5,36.0,0.0,23.0,AD_12_04_2019_C14|AD_12_04_2019_C17
2,2,2019-04-12 12:00:00,2019-04-12,AD,12,5,15,2019,314.0,230.5,...,0.0,0.0,2.5,232.0,400.0,17.5,58.0,1.0,26.5,AD_12_04_2019_C14|AD_12_04_2019_C17
3,3,2019-04-12 13:00:00,2019-04-12,AD,13,5,15,2019,371.5,192.5,...,0.0,0.0,0.5,195.0,482.0,13.0,58.5,2.0,24.5,AD_12_04_2019_C14|AD_12_04_2019_C17
4,4,2019-04-12 14:00:00,2019-04-12,AD,14,5,15,2019,351.5,205.0,...,0.0,0.0,1.0,206.5,432.0,21.0,67.0,0.0,20.0,AD_12_04_2019_C14|AD_12_04_2019_C17
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,95,2019-04-16 09:00:00,2019-04-16,AD,9,2,16,2019,203.5,157.0,...,0.0,0.0,0.5,158.5,243.0,15.5,55.0,0.0,24.0,AD_12_04_2019_C14|AD_12_04_2019_C17
96,96,2019-04-16 10:00:00,2019-04-16,AD,10,2,16,2019,269.0,150.5,...,0.0,0.0,1.5,151.0,322.5,12.5,62.5,1.0,36.0,AD_12_04_2019_C14|AD_12_04_2019_C17
97,97,2019-04-16 11:00:00,2019-04-16,AD,11,2,16,2019,270.5,145.0,...,0.0,0.0,0.0,146.0,331.0,15.0,56.5,0.0,36.0,AD_12_04_2019_C14|AD_12_04_2019_C17
98,98,2019-04-16 12:00:00,2019-04-16,AD,12,2,16,2019,318.5,156.0,...,0.0,0.0,0.5,157.0,401.0,14.5,66.0,0.5,37.5,AD_12_04_2019_C14|AD_12_04_2019_C17


In [53]:
# Get the value counts of the 'directory_pair' column
directory_pair_counts = hourly_averages['directory_pair'].value_counts()

# Filter for directory_pair values that occur only once
single_occurrence_pairs = directory_pair_counts[directory_pair_counts == 1]

# Get the number of such unique directory_pair values
count_single_occurrences = single_occurrence_pairs.shape[0]

print(f"Number of 'directory_pair' values that occur only once: {count_single_occurrences}")


Number of 'directory_pair' values that occur only once: 1555


In [55]:
# Step 1: Get the value counts of the 'directory_pair' column
directory_pair_counts = hourly_averages['directory_pair'].value_counts()

# Step 2: Identify the 'directory_pair' values that occur only once
single_occurrence_pairs = directory_pair_counts[directory_pair_counts == 1].index

# Step 3: Filter the dataframe to exclude these 'directory_pair' values
hourly_averages_filtered = hourly_averages[~hourly_averages['directory_pair'].isin(single_occurrence_pairs)]

# If you want to update the original DataFrame, you can assign it back
hourly_averages = hourly_averages_filtered

# Display the resulting DataFrame
hourly_averages_filtered.head()

Unnamed: 0.1,Unnamed: 0,datetime,date,site_id,hour,day,week,year,car_counts,person_counts,...,food_counts,animal_counts,bicycle_counts,people_counts,small_vehicles_counts,two_wheelers_counts,large_vehicles_counts,refuse_counts,market_counts,directory_pair
1,1,2019-04-12 11:00:00,2019-04-12,AD,11,5,15,2019,228.0,219.0,...,0.0,0.0,0.5,219.5,290.0,16.5,36.0,0.0,23.0,AD_12_04_2019_C14|AD_12_04_2019_C17
2,2,2019-04-12 12:00:00,2019-04-12,AD,12,5,15,2019,314.0,230.5,...,0.0,0.0,2.5,232.0,400.0,17.5,58.0,1.0,26.5,AD_12_04_2019_C14|AD_12_04_2019_C17
3,3,2019-04-12 13:00:00,2019-04-12,AD,13,5,15,2019,371.5,192.5,...,0.0,0.0,0.5,195.0,482.0,13.0,58.5,2.0,24.5,AD_12_04_2019_C14|AD_12_04_2019_C17
4,4,2019-04-12 14:00:00,2019-04-12,AD,14,5,15,2019,351.5,205.0,...,0.0,0.0,1.0,206.5,432.0,21.0,67.0,0.0,20.0,AD_12_04_2019_C14|AD_12_04_2019_C17
5,5,2019-04-12 15:00:00,2019-04-12,AD,15,5,15,2019,354.5,249.5,...,0.5,0.0,0.5,251.0,433.0,23.5,54.0,0.5,24.0,AD_12_04_2019_C14|AD_12_04_2019_C17


In [56]:
len(hourly_averages_filtered)

354828

In [57]:
hourly_averages_filtered.to_csv('hourly_averages_v2.csv')

In [58]:
hourly_averages_filtered['directory_pair'].value_counts()

directory_pair
LA_27_03_2020_C16_S28                            597
EL_29_03_2020_C12_S30                            524
AD_24_04_2020_C12_S17|AD_24_04_2020_C4_S21       505
JT_24_04_2020_C31_S49                            505
NIMA_24_04_2020_C27_S45|NIMA_24_04_2020_C2_S2    504
                                                ... 
EL_15_06_2020_C12_S53|EL_15_06_2020_C32_S65        2
NIMA_15_11_2019_C2_S28                             2
JT_18_12_2020_C9_S5                                2
JT_03_01_2020_C31_S44                              2
TMW_06_09_2021_C19_S5|TMW_06_09_2021_C30_S44       2
Name: count, Length: 2612, dtype: int64

In [59]:
import pandas as pd
import numpy as np

# Assuming you have already read your data into a dataframe
# hourly_averages_filtered is the dataframe

# Randomly assign each row to one of five folds
np.random.seed(42)  # For reproducibility
hourly_averages_filtered['fold'] = np.random.randint(1, 6, size=len(hourly_averages_filtered))

year                2019  2020  2021  2022  2023  2024
fold hour day week                                    
1    0    1   1        2     0     3     0     3     3
              2        0     0     2     3     4     2
              3        0     3     2     5     1     1
              4        0     1     1     2     2     0
              5        0     0     0     1     1     0
...                  ...   ...   ...   ...   ...   ...
5    23   7   49       1     1     1     5     1     0
              50       1     0     5     0     2     0
              51       1     2     1     1     1     0
              52       2     2     0     1     4     0
              53       0     0     2     0     0     0

[44388 rows x 6 columns]


In [66]:
for col in ['hour', 'day', 'week', 'year']:
    print(hourly_averages_filtered[hourly_averages_filtered['fold'] == 1][[col]].value_counts())

hour
14      3069
10      3052
3       2985
18      2981
6       2975
20      2974
15      2973
16      2971
12      2962
9       2960
17      2953
22      2952
21      2939
13      2939
23      2935
11      2932
1       2928
2       2923
8       2911
19      2910
4       2908
0       2906
7       2902
5       2886
Name: count, dtype: int64
day
2      10292
6      10212
3      10153
1      10088
5      10083
7      10024
4       9974
Name: count, dtype: int64
week
34      1641
28      1594
26      1540
31      1530
32      1522
30      1494
29      1491
33      1485
37      1484
24      1480
42      1470
35      1469
41      1464
43      1461
39      1459
40      1457
36      1449
47      1435
27      1431
49      1422
45      1419
38      1417
50      1411
22      1408
51      1407
21      1406
44      1396
46      1395
23      1385
19      1382
18      1381
20      1379
2       1374
25      1366
52      1352
17      1344
1       1302
48      1297
16      1287
8       1282
7       124

In [67]:
hourly_averages_filtered.to_csv('./hourly_averages_v2.csv', index=False)