In [40]:
import os
import pandas as pd
import matplotlib.pyplot as plt
from glob import glob
import json


ROOT_DIR = "DOO-RE"  
ACTIVITY_NAME = "Seminar"


def load_activity_data(activity_folder):
    sensor_dir = os.path.join(activity_folder, "sensor")
    metadata_dir = os.path.join(activity_folder, "metadata")
    all_data = []

    for sensor_file in glob(os.path.join(sensor_dir, "*.csv")):
        base_name = os.path.basename(sensor_file).replace(".csv", "")
        metadata_file = os.path.join(metadata_dir, base_name + ".json")

        df = pd.read_csv(sensor_file)
        df['episode'] = base_name

        if os.path.exists(metadata_file):
            with open(metadata_file) as f:
                meta = json.load(f)
            df['activity'] = meta.get("activity", os.path.basename(activity_folder))
            df['start_time'] = meta.get("start_time")
            df['end_time'] = meta.get("end_time")
            df['avg_participants'] = meta.get("avg_participants", None)
        else:
            df['activity'] = os.path.basename(activity_folder)

        all_data.append(df)

    return pd.concat(all_data, ignore_index=True)

#load the data
activity_path = os.path.join(ROOT_DIR, ACTIVITY_NAME)
df = load_activity_data(activity_path)
df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')
df.head()

Unnamed: 0,timestamp,sensor_name,value,episode,activity,start_time,end_time,avg_participants
0,1970-01-01 00:25:05.091265039,Temperature_1,25.89,Seminar_0,Seminar,,,
1,1970-01-01 00:25:05.091265042,Humidity_1,57.31114791205309,Seminar_0,Seminar,,,
2,1970-01-01 00:25:05.091265046,Brightness_1,48.0,Seminar_0,Seminar,,,
3,1970-01-01 00:25:05.091265049,Temperature_2,23.9,Seminar_0,Seminar,,,
4,1970-01-01 00:25:05.091265051,Humidity_2,59.40758588226711,Seminar_0,Seminar,,,


## Find the most active sensors

In [35]:
df['value'] = df['value'].astype(str).str.lower()
active_counts = df[df['value'].isin(['1', 'true', 'occupied'])]['sensor_name'].value_counts()
print("Most active sensors:")
print(active_counts.head(10))

Most active sensors:
Motion_1    937
Motion_5    791
Motion_3    748
Motion_7    711
Motion_8    710
Motion_2    698
Motion_4    463
Motion_6    449
Seat_9       70
Seat_2       43
Name: sensor_name, dtype: int64


## Find the most active seats

In [38]:
seat_df = df[df['sensor_name'].str.contains('Seat', na=False)]
seat_df['value'] = seat_df['value'].astype(str).str.lower()
seat_usage = seat_df[seat_df['value'].isin(['1', 'true'])]['sensor_name'].value_counts()
print("Most used seats:")
print(seat_usage)

Most used seats:
Seat_9     70
Seat_2     43
Seat_8     37
Seat_3     30
Seat_4     26
Seat_1     24
Seat_12    13
Seat_5     11
Seat_11    11
Seat_7      9
Seat_10     3
Name: sensor_name, dtype: int64


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  seat_df['value'] = seat_df['value'].astype(str).str.lower()
