In [13]:
import os
import polars as pl
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
#import pyarrow as pa
#import datetime

# Define paths
wea_files = ['LC_2022Q1.csv', 'LC_2022Q2.csv', 'LC_2022Q3.csv', 'LC_2022Q4.csv']
id_list = ['439', '440', '441', '910', '442', '443', '444', '324'] # Same order as 1-9 in project.pdf

# Load with polars and remove noise stations 7 8 9
noise_loc = pl.read_csv('../noise_meta.csv')\
            .slice(0,6)
wea_loc = pl.read_csv('../01_Metadata_v2.csv')
wea_prep = pd.read_csv('out/wea_prep.csv')

In [25]:
# Preprocess noise level with pandas
# Using output_path fixes [Errno 13]
def noise_preprocess_csv(folder_path, output_path, names):
    for name in names:
        noise = pd.DataFrame(columns=['description', 'date', 'lamax'])  # Step 1
        
        for file_name in os.listdir(folder_path):
            if file_name.endswith('.csv') and name in file_name:
                file_path = os.path.join(folder_path, file_name)
                add = pd.read_csv(file_path, delimiter=';')

                add['date'] = add['result_timestamp'].str[0:10]
                add = add.groupby(['description', 'date'])['lamax'].mean().reset_index()

                # Get the 'AD' value from the DataFrame's 'description' column
                address = add['description'].values[0]
                
                # Find the corresponding metadata row
                noise_loc_row = noise_loc[noise_loc['AD'] == address]
                
                add['LAT'] = noise_loc_row['LAT'][0]
                add['LON'] = noise_loc_row['LON'][0]

                # Step 3
                noise = pd.concat([noise, add], sort=False)

        # Step 4
        noise.to_csv(os.path.join(output_path, name + '-3.csv'), index=False)

    return 0

In [26]:
# Takes about 10 minutes. Requires about 4Gb of RAM.
noise_preprocess_csv('../noise level/', 'out/', id_list)

ValueError: Cannot __getitem__ on DataFrame with item: 'shape: (6,)
Series: 'AD' [bool]
[
	true
	false
	false
	false
	false
	false
]' of type: '<class 'polars.series.series.Series'>'.

In [10]:
import pandas as pd
import glob

# Find CSV files that match the partial names
matching_files = []
for partial_name in id_list:
    matching_files.extend(glob.glob('*{}-3.csv'.format(partial_name)))

# Loop through each matching CSV file
for file_name in matching_files:
    # Read the CSV file
    df = pd.read_csv(file_name)
    
    # Loop through each row in the DataFrame
    for index, row in df.iterrows():
        # Get the description from 'description' column
        description = row['description']
        
        # Find the matching row in the original DataFrame
        match_row = df.loc[df['AD'] == description]
        
        # Get the latitude and longitude values
        lat = match_row['LAT'].values[0]
        lon = match_row['LON'].values[0]
        
        # Update the 'LAT' and 'LON' columns
        df.at[index, 'LAT'] = lat
        df.at[index, 'LON'] = lon
    
    # Save the updated DataFrame to a new CSV file
    updated_file_name = file_name.replace('-3.csv', '-updated.csv')
    df.to_csv(updated_file_name, index=False)
