In [None]:
import pandas as pd
import numpy as np
import sys
sys.path.append("..")  
from openai import OpenAI
import re
from typing import Optional
import matplotlib.pyplot as plt
import os
from dotenv import load_dotenv
load_dotenv()
import csv

In [None]:
DEEPSEEK_API_KEY = os.getenv("DEEPSEEK_API_KEY")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

In [None]:
ACTIVITY = 'activity'
DEVICE = 'device'
START_TIME = 'start_time'
END_TIME = 'end_time'
TIME = 'time'
VALUE = 'value'
NAME = 'name'

DATASET ='Aruba'
DATA_DIR = 'LLM_segment_ARUBA'

# api_key = DEEPSEEK_API_KEY  
# client = OpenAI(api_key=api_key, base_url="https://api.deepseek.com")
# model_name = "deepseek-chat"
# MODEL="deepseek"

api_key = OPENAI_API_KEY
client = OpenAI(api_key=api_key)
model_name="gpt-4o-mini-2024-07-18"
MODEL="gpt-4o-mini"
temperature=0

In [3]:
import sys
sys.path.append("..")  
from data_load.load_data import load_aruba_dataset
from pathlib import Path

dataset_b = load_aruba_dataset(Path("../dataset/casas/Aruba"))
print(dataset_b['activities'].head())
print(dataset_b['devices'].head())
print(dataset_b.keys())
print(dataset_b['activity_list'])
print(dataset_b['device_list'])

                   start_time                    end_time       activity
0  2010-11-04 00:03:50.209589  2010-11-04 05:40:43.642664          sleep
1  2010-11-04 05:40:51.303739  2010-11-04 05:43:30.279021  bed_to_toilet
2    2010-11-04 05:43:45.7324   2010-11-04 08:01:12.28297          sleep
3  2010-11-04 08:11:09.966157  2010-11-04 08:27:02.801314           cook
4  2010-11-04 08:33:52.929406  2010-11-04 08:35:45.822482           cook
                         time                     device  value
0  2010-11-04 00:03:50.209589          M003_Bedroom1_Bed   True
1  2010-11-04 00:03:57.399391          M003_Bedroom1_Bed  False
2  2010-11-04 00:15:08.984841         T002_Living_Corner   21.5
3  2010-11-04 00:30:19.185547       T003_Kitchen_Counter   21.0
4  2010-11-04 00:30:19.385336  T004_Aisle_near_Bathroom2   21.0
dict_keys(['activities', 'devices', 'activity_list', 'device_list'])
['sleep', 'bed_to_toilet', 'cook', 'relax', 'work', 'eat', 'leave_home', 'enter_home', 'other']
['M003_Bedroo

In [4]:
import pandas as pd

def identify_activity_edges_from_raw_data(df):
    """
    根据传感器原始数据识别活动边缘（开始时间和结束时间）。
    假设输入数据的列包含 'start_time', 'end_time', 'state', 'location', 'place' 等。
    """
    # 确保时间列是 datetime 类

    # 系统提示，指导模型直接从传感器数据推断活动
    system_prompt = """
    Analyze the provided raw sensor data and identify distinct indoor human activities.
    For each activity, determine the start and end times based on the sensor events.
    Please note that activities may include:
    'sleep', 'bed_to_toilet', 'cook', 'relax', 'work', 'eat', 'leave_home', 'enter_home', 'other'
    Return only the result in the following table format, without any additional explanations or summaries:
    start_time(YYYY-MM-DD HH:MM:SS),end_time(YYYY-MM-DD HH:MM:SS),activity
    """
    
    # 将 DataFrame 转换为字符串形式的描述，以便传递给模型
    data_str = df.to_string(index=False)

    try:
        response = client.chat.completions.create(
            model=model_name,  # 确保 model_name 已定义
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": f"Raw sensor data:\n{data_str}"}
            ],
            max_tokens=500,
            temperature=0.0
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        return f"Error identifying activity edges: {str(e)}"

def run_pipeline(df, window_size=50, activity_output_path="aruba/activity_edges.txt"):
    """
    运行整个流程：直接从传感器原始数据识别活动边缘，并将活动信息保存到文件中。
    将 DataFrame 分割成指定大小的窗口，并以指定表格形式输出活动信息。
    """
    num_rows = len(df)
    
    with open(activity_output_path, "w", encoding="utf-8") as activity_file:
        for i in range(0, num_rows, window_size):
            window = df.iloc[i:i + window_size]
            
            if not window.empty:
                activity_edges = identify_activity_edges_from_raw_data(window)
                
                activity_file.write(f"Window {i // window_size + 1}:\n{activity_edges}\n\n")
                print(f"Window {i // window_size + 1}:\n{activity_edges}\n")
    
    print(f"Activity edges saved to: {activity_output_path}")


run_pipeline(dataset_b['devices'], window_size=50)  # 设置窗口大小为 10

Window 1:
```
start_time,end_time,activity
2010-11-04 00:03:50,2010-11-04 00:03:57,sleep
2010-11-04 02:32:33,2010-11-04 02:32:38,sleep
2010-11-04 03:42:21,2010-11-04 03:42:25,sleep
2010-11-04 03:49:52,2010-11-04 03:49:57,sleep
2010-11-04 04:14:32,2010-11-04 04:14:39,sleep
2010-11-04 04:34:17,2010-11-04 04:34:21,sleep
2010-11-04 05:40:27,2010-11-04 05:40:34,sleep
2010-11-04 05:40:40,2010-11-04 05:40:43,sleep
```

Window 2:
```
start_time,end_time,activity
2010-11-04 05:40:45,2010-11-04 05:40:52,sleep
2010-11-04 05:43:24,2010-11-04 05:43:30,bed_to_toilet
2010-11-04 05:43:35,2010-11-04 05:44:23,relax
2010-11-04 05:53:50,2010-11-04 06:12:49,work
2010-11-04 06:37:45,2010-11-04 06:39:17,relax
2010-11-04 07:04:37,2010-11-04 07:09:29,work
```

Window 3:
```
start_time,end_time,activity
2010-11-04 07:52:53,2010-11-04 07:52:59,sleep
2010-11-04 07:52:59,2010-11-04 07:53:01,bed_to_toilet
2010-11-04 07:53:05,2010-11-04 07:53:07,bed_to_toilet
2010-11-04 07:53:15,2010-11-04 07:53:18,bed_to_toilet
201

KeyboardInterrupt: 