In [1]:
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
# Define column groups
base_cols = ['TEMP', 'HUM', 'CO2', 'PM1', 'PM2.5', 'PM5', 'PM10', 'HCHO',
             'VOC', 'NO2', 'NOISE', 'DOOR1', 'WINDOW1', 'WINDOW2', 'VENT_TYPE', 'VENT_TIME']
ext_cols = ['EXT_TEMP', 'EXT_HUM', 'EXT_PM2.5', 'EXT_PM10', 'EXT_NO2', 'EXT_CO']
all_cols = ['date_bin'] + base_cols + ext_cols

In [3]:
# Read and process all devices
devices = {
    i: pd.read_csv(f"device{i}.csv", 
                   decimal='.' if i==2 else ',',
                   names=all_cols, 
                   header=0)
    for i in range(1, 6)
}

In [4]:
# Convert dates
for df in devices.values():
    df['date_bin'] = pd.to_datetime(df['date_bin'])

In [5]:
# Rename columns with device numbers for base columns only
for i, df in devices.items():
    df.rename(columns={col: f"{col}_DEVICE_{i}" for col in base_cols}, inplace=True)
    # Drop external columns for all except device 1
    if i > 1:
        df.drop(columns=ext_cols, inplace=True)

In [6]:
# Merge all devices
merged_df = devices[1]
for i in range(2, 6):
    merged_df = merged_df.merge(devices[i], on='date_bin', how='outer')

In [7]:
# Reorder columns: date_bin, external columns, then device-specific columns
device_cols = [f"{col}_DEVICE_{i}" 
               for i in range(1, 6) 
               for col in base_cols]
merged_df = merged_df[['date_bin'] + ext_cols + device_cols]

In [8]:
merged_df.to_csv("all_devices_merged.csv", index=False)