# CSV -> NPZ (PID)

## 실험 1
- 상태: T1, T2, TSP1, TSP2 
- 액션: Q1, Q2
- 보상: 목표온도 - 측정온도

![image-2.png](attachment:image-2.png)

In [19]:
import pandas as pd
import numpy as np
from glob import glob
import os

csv_dir = "/home/minchanggi/code/TCLab/data/PID2MPC/PID3/csv"
csv_dir2 = "/home/minchanggi/code/TCLab/data/PID600-1200/csv"
npz_output_path = "/home/minchanggi/code/TCLab/data/PID2MPC/NPZ/offline1_T-Tsp.npz"

all_obs=[]
all_next_obs=[]
all_actions=[]
all_rewards=[]
all_terminals=[]

csv_files_1 = sorted(glob(os.path.join(csv_dir,'PID_episode_*_data.csv')))
csv_files_2 = sorted(glob(os.path.join(csv_dir2,"PID_episode_*_data.csv")))
print(len(csv_files_1),len(csv_files_2))

csv_files = csv_files_1 + csv_files_2

for file_path in csv_files:
    df = pd.read_csv(file_path)
    if len(df)<1000:
        continue
    print(file_path)
    obs = df[['T1','T2','TSP1','TSP2']].values[:-1]
    next_obs = df[['T1','T2','TSP1','TSP2']].values[1:]
    actions=df[['Q1','Q2']].values[:-1]
    rewards = -np.linalg.norm(
        df[['T1','T2']].values[:-1] - df[['TSP1','TSP2']].values[:-1],
        axis=1,
        keepdims=True
    )
    terminals=np.zeros((len(obs),1),dtype=np.float32)
    terminals[-1]=0.0
    
    all_obs.append(obs)
    all_next_obs.append(next_obs)
    all_actions.append(actions)
    all_rewards.append(rewards)
    all_terminals.append(terminals)

dataset={
    'observations': np.vstack(all_obs),
    'actions': np.vstack(all_actions),
    'rewards': np.vstack(all_rewards),
    'next_observations': np.vstack(all_next_obs),
    'terminals':np.vstack(all_terminals),
}
np.savez(npz_output_path, **dataset)
print(f"✅ 저장 완료: {dataset['observations'].shape[0]} transitions")
print(f"→ {npz_output_path}")


50 46
/home/minchanggi/code/TCLab/data/PID2MPC/PID3/csv/PID_episode_10_data.csv
/home/minchanggi/code/TCLab/data/PID2MPC/PID3/csv/PID_episode_11_data.csv
/home/minchanggi/code/TCLab/data/PID2MPC/PID3/csv/PID_episode_12_data.csv
/home/minchanggi/code/TCLab/data/PID2MPC/PID3/csv/PID_episode_13_data.csv
/home/minchanggi/code/TCLab/data/PID2MPC/PID3/csv/PID_episode_14_data.csv
/home/minchanggi/code/TCLab/data/PID2MPC/PID3/csv/PID_episode_15_data.csv
/home/minchanggi/code/TCLab/data/PID2MPC/PID3/csv/PID_episode_16_data.csv
/home/minchanggi/code/TCLab/data/PID2MPC/PID3/csv/PID_episode_17_data.csv
/home/minchanggi/code/TCLab/data/PID2MPC/PID3/csv/PID_episode_18_data.csv
/home/minchanggi/code/TCLab/data/PID2MPC/PID3/csv/PID_episode_19_data.csv
/home/minchanggi/code/TCLab/data/PID2MPC/PID3/csv/PID_episode_1_data.csv
/home/minchanggi/code/TCLab/data/PID2MPC/PID3/csv/PID_episode_20_data.csv
/home/minchanggi/code/TCLab/data/PID2MPC/PID3/csv/PID_episode_21_data.csv
/home/minchanggi/code/TCLab/data/

## 실험 2
- 상태: T1, T2, TSP1, TSP2 
- 액션: Q1, Q2
- 보상: 목표온도(t+1) - 측정온도(t+1)

t시점의 Q는 t시점의 T1,T2가 제어되는 것이 아닌 t+1시점의 T1,T2가 제어되는 것임
그렇기에 다음 시점의 목표온도와 측정온도의 차이를 보상함수로 지정함.

In [18]:
import pandas as pd
import numpy as np
from glob import glob
import os

csv_dir = "/home/minchanggi/code/TCLab/data/PID2MPC/PID3/csv"
csv_dir2 = "/home/minchanggi/code/TCLab/data/PID600-1200/csv"
npz_output_path = "/home/minchanggi/code/TCLab/data/PID2MPC/NPZ/offline2_T-Tsp_next.npz"

all_obs=[]
all_next_obs=[]
all_actions=[]
all_rewards=[]
all_terminals=[]

csv_files_1 = sorted(glob(os.path.join(csv_dir,'PID_episode_*_data.csv')))
csv_files_2 = sorted(glob(os.path.join(csv_dir2,"PID_episode_*_data.csv")))
print(len(csv_files_1),len(csv_files_2))

csv_files = csv_files_1 + csv_files_2

for file_path in csv_files:
    df = pd.read_csv(file_path)
    if len(df)<1000:
        continue
    print(file_path)
    obs = df[['T1','T2','TSP1','TSP2']].values[:-1]
    next_obs = df[['T1','T2','TSP1','TSP2']].values[1:]
    actions=df[['Q1','Q2']].values[:-1]
    rewards = -np.linalg.norm(
        df[['T1','T2']].values[1:] - df[['TSP1','TSP2']].values[1:],
        axis=1,
        keepdims=True
    )
    terminals=np.zeros((len(obs),1),dtype=np.float32)
    terminals[-1]=0.0
    
    all_obs.append(obs)
    all_next_obs.append(next_obs)
    all_actions.append(actions)
    all_rewards.append(rewards)
    all_terminals.append(terminals)

dataset={
    'observations': np.vstack(all_obs),
    'actions': np.vstack(all_actions),
    'rewards': np.vstack(all_rewards),
    'next_observations': np.vstack(all_next_obs),
    'terminals':np.vstack(all_terminals),
}
np.savez(npz_output_path, **dataset)
print(f"✅ 저장 완료: {dataset['observations'].shape[0]} transitions")
print(f"→ {npz_output_path}")


50 46
/home/minchanggi/code/TCLab/data/PID2MPC/PID3/csv/PID_episode_10_data.csv
/home/minchanggi/code/TCLab/data/PID2MPC/PID3/csv/PID_episode_11_data.csv
/home/minchanggi/code/TCLab/data/PID2MPC/PID3/csv/PID_episode_12_data.csv
/home/minchanggi/code/TCLab/data/PID2MPC/PID3/csv/PID_episode_13_data.csv
/home/minchanggi/code/TCLab/data/PID2MPC/PID3/csv/PID_episode_14_data.csv
/home/minchanggi/code/TCLab/data/PID2MPC/PID3/csv/PID_episode_15_data.csv
/home/minchanggi/code/TCLab/data/PID2MPC/PID3/csv/PID_episode_16_data.csv
/home/minchanggi/code/TCLab/data/PID2MPC/PID3/csv/PID_episode_17_data.csv
/home/minchanggi/code/TCLab/data/PID2MPC/PID3/csv/PID_episode_18_data.csv
/home/minchanggi/code/TCLab/data/PID2MPC/PID3/csv/PID_episode_19_data.csv
/home/minchanggi/code/TCLab/data/PID2MPC/PID3/csv/PID_episode_1_data.csv
/home/minchanggi/code/TCLab/data/PID2MPC/PID3/csv/PID_episode_20_data.csv
/home/minchanggi/code/TCLab/data/PID2MPC/PID3/csv/PID_episode_21_data.csv
/home/minchanggi/code/TCLab/data/

# 실험 3

- 상태: T1, T2, TSP1, TSP2 
- 액션: Q1, Q2

앞선 방식들은 올바른 액션을 취해도 현재 상태가 안좋으면 보상 또한 좋지 않았다.

ex) 
Tsp가 60일때
T가 20일 때 Q를 100으로 하여도 -40의 보상
T가 50일 때 Q를 100으로 하면 -10의 보상

즉, 둘 다 올바른 행동이었지만 상태에 따라 보상이 달라진다.

- 보상:  PBRS(Potential-Based Reward Shaping) 적용 

-> phi_t = -‖T_t - TSP_t‖
reward_t = γ * phi_{t+1} - phi_t


현재의 행동으로 인해 다음 상태의 개선정도가 보상으로서 작용된다.

| 상황             | PBRS 보상     | 유도되는 행동  |
|------------------|---------------|----------------|
| 목표에 가까워짐  | 양의 보상 ↑   | 접근           |
| 그대로 유지      | 거의 0        | 정지 유지       |
| 목표에서 멀어짐  | 음의 보상 ↓   | 복귀 시도       |


In [None]:
import pandas as pd
import numpy as np
from glob import glob
import os

csv_dir = "/home/minchanggi/code/TCLab/data/PID2MPC/PID3/csv"
csv_dir2 = "/home/minchanggi/code/TCLab/data/PID600-1200/csv"
npz_output_path = "/home/minchanggi/code/TCLab/data/PID2MPC/NPZ/offline3_PBRS.npz"


all_obs=[]
all_next_obs=[]
all_actions=[]
all_rewards=[]
all_terminals=[]

csv_files_1 = sorted(glob(os.path.join(csv_dir,'PID_episode_*_data.csv')))
csv_files_2 = sorted(glob(os.path.join(csv_dir2,"PID_episode_*_data.csv")))
print(len(csv_files_1),len(csv_files_2))

csv_files = csv_files_1 + csv_files_2

for file_path in csv_files:
    df = pd.read_csv(file_path)
    if len(df) < 1000:
        continue
    print(file_path)

    obs = df[['T1','T2','TSP1','TSP2']].values[:-1]
    next_obs = df[['T1','T2','TSP1','TSP2']].values[1:]
    actions = df[['Q1','Q2']].values[:-1]
    
    # PBRS 보상
    gamma = 0.99
    T = df[['T1','T2']].values
    TSP = df[['TSP1','TSP2']].values
    phi = -np.linalg.norm(T - TSP, axis=1, keepdims=True)
    rewards = gamma * phi[1:] - phi[:-1]

    terminals=np.zeros((len(obs),1),dtype=np.float32)
    terminals[-1]=0.0
    
    all_obs.append(obs)
    all_next_obs.append(next_obs)
    all_actions.append(actions)
    all_rewards.append(rewards)
    all_terminals.append(terminals)

dataset={
    'observations': np.vstack(all_obs),
    'actions': np.vstack(all_actions),
    'rewards': np.vstack(all_rewards),
    'next_observations': np.vstack(all_next_obs),
    'terminals':np.vstack(all_terminals),
}
np.savez(npz_output_path, **dataset)
print(f"✅ 저장 완료: {dataset['observations'].shape[0]} transitions")
print(f"→ {npz_output_path}")
