In [141]:
# import libraries that we need to process the data
import pandas as pd
import numpy as np
import os
import zipfile

In [142]:
# Show the path for folder where zip file is sitting
zip_directory = r'Z:\D4\Continuous Counts\Bicycle Location ID\Central & EB580\2024'

# lets create a bucket where we will drop the processed data
aggregated_data = []

# Iterate over each ZIP file in the directory
for zip_filename in os.listdir(zip_directory):
    if zip_filename.endswith('.zip'): 
        zip_path = os.path.join(zip_directory, zip_filename)

        try:
            # Open the ZIP file
            with zipfile.ZipFile(zip_path, 'r') as z:
                # List all files inside the ZIP
                file_list = z.namelist()

                # Filter CSV files that match the prefix
                target_files = [f for f in file_list if f.startswith('Approach Volume - 1 hour bins.csv')]
                print(f"Processing {zip_filename}: {target_files}")

                # Process each matching CSV file
                for file_name in target_files:
                    try:
                        with z.open(file_name) as f:
                            # We will read the first 6 rows to extract metadata
                            metadata = []
                            for _ in range(6):  # Read the first 6 rows as metadata
                                line = f.readline().decode('utf-8').strip()
                                metadata.append(line)
                            #print("Metadata:")
                            #print(metadata)

                            # Extract Intersection Name
                            intersection_name = None
                            for line in metadata:
                                if '"Intersection Name"' in line:
                                    intersection_name = line.split(',')[1].strip('"')
                                    break
                            print(f"Intersection Name: {intersection_name}")

                            # lets reset the file pointer to the beginning
                            f.seek(0)

                            # Read the actual data starting from row 7 (header row)
                            bdata = pd.read_csv(f, skiprows=6, nrows=6576, delimiter=',')  
                            #print("Data:")
                            #print(bdata.head())

                            # Clean column names
                            bdata.columns = bdata.columns.str.strip()

                            # Add metadata to the DataFrame
                            bdata['Intersection Name'] = intersection_name

                            # Convert Date column to datetime format
                            bdata['Date'] = pd.to_datetime(bdata['Date'], errors='coerce')

                            # Store the data
                            aggregated_data.append(bdata)
                            print(f"Appended data from {file_name}")

                    except Exception as e:
                        print(f"Error processing {file_name} in {zip_filename}: {str(e)}")

        except Exception as e:
            print(f"Error opening ZIP file {zip_filename}: {str(e)}")

# Concatenate all processed data
if aggregated_data:
    result_df = pd.concat(aggregated_data, ignore_index=True)
    # Generate a filename using the intersection name
    intersection_name = result_df['Intersection Name'].iloc[0] if 'Intersection Name' in result_df.columns else "Unknown"
    output_path = f'Z:/D4/Continuous Counts/Bicycle Location ID/Central & EB580/2024/Approach_volume_agg_{intersection_name}.csv'
    
    # Save to CSV with the intersection name in the filename
    result_df.to_csv(output_path, index=False)
    print(f"Data saved to {output_path}")
else:
    print("No valid files were processed.")

Processing EB 1Hour Notraffic Traffic Counts 1 January-30 September,2024.zip: ['Approach Volume - 1 hour bins.csv']
Intersection Name: Central & EB580
Appended data from Approach Volume - 1 hour bins.csv
Processing NB 1Hour Notraffic Traffic Counts 1 January-30 September,2024.zip: ['Approach Volume - 1 hour bins.csv']
Intersection Name: Central & EB580
Appended data from Approach Volume - 1 hour bins.csv
Processing SB 1 Hour Notraffic Traffic Counts 1 January-30 September,2024.zip: ['Approach Volume - 1 hour bins.csv']
Intersection Name: Central & EB580
Appended data from Approach Volume - 1 hour bins.csv
Processing WB 1 Hour Notraffic Traffic Counts 1 January-30 September,2024.zip: ['Approach Volume - 1 hour bins.csv']
Intersection Name: Central & EB580
Appended data from Approach Volume - 1 hour bins.csv
Data saved to Z:/D4/Continuous Counts/Bicycle Location ID/Central & EB580/2024/Approach_volume_agg_Central & EB580.csv


In [None]:
# end of the Script