In [47]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import torch
from torch.utils.data import Dataset, DataLoader
from torch import nn
from torch.optim import Adam
import random
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
import tensorflow as tf
import math
from torch.utils.data import TensorDataset, DataLoader
import seaborn as sns
import os

# 시드 고정
def set_seed(seed=42):
    np.random.seed(seed)
    tf.random.set_seed(seed)
    random.seed(seed)
    
set_seed(42)

In [3]:
data = pd.read_csv('/home/aibig25/hong_sj/trb/num.csv')
data = data.fillna(0)

unique_ids = data['sequence_ID'].unique()
train_ids, test_ids = train_test_split(unique_ids, test_size=41, random_state=42)
train_data = data[data['sequence_ID'].isin(train_ids)]
test_data = data[data['sequence_ID'].isin(test_ids)]

independent_vars = data.columns.difference(['center_x', 'center_y','center_x_ma','center_y_ma', 'ID', 'frame'])
dependent_vars = ['center_y_ma']

scaler = MinMaxScaler()

train_data[independent_vars] = scaler.fit_transform(train_data[independent_vars])
test_data[independent_vars] = scaler.transform(test_data[independent_vars])

X_train = train_data[independent_vars]
y_train = train_data[dependent_vars]

X_test = test_data[independent_vars]
y_test = test_data[dependent_vars]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_data[independent_vars] = scaler.fit_transform(train_data[independent_vars])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_data[independent_vars] = scaler.transform(test_data[independent_vars])


In [4]:
# 입력 및 예측 시퀀스 길이 정의
input_sequence_length = 120
output_sequence_length = 90

def create_sequences(data, input_sequence_length, output_sequence_length):
    X = []
    y = []

    for i in range(len(data) - input_sequence_length - output_sequence_length + 1):
        X.append(data.iloc[i:(i + input_sequence_length)][independent_vars].values)
        y.append(data.iloc[(i + input_sequence_length):(i + input_sequence_length + output_sequence_length)][dependent_vars].values)
    
    return np.array(X), np.array(y)

X_train, y_train = create_sequences(train_data, input_sequence_length, output_sequence_length)
X_test, y_test = create_sequences(test_data, input_sequence_length, output_sequence_length)

In [5]:
# 데이터셋을 텐서로 변환
train_dataset = TensorDataset(torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.float32))
test_dataset = TensorDataset(torch.tensor(X_test, dtype=torch.float32), torch.tensor(y_test, dtype=torch.float32))

# 데이터 로더 생성
train_loader = DataLoader(train_dataset, batch_size=10, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=10, shuffle=False)

In [8]:
class PositionalEncoding(nn.Module):
    def __init__(self, model_dim, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.encoding = torch.zeros(max_len, model_dim)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, model_dim, 2).float() * (-math.log(10000.0) / model_dim))
        self.encoding[:, 0::2] = torch.sin(position * div_term)
        self.encoding[:, 1::2] = torch.cos(position * div_term)
        self.encoding = self.encoding.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', self.encoding)

    def forward(self, x):
        x = x + self.pe[:x.size(0), :]
        return x

class TrajectoryTransformer(nn.Module):
    def __init__(self, input_dim, model_dim, num_heads, num_encoder_layers, num_decoder_layers, output_dim):
        super(TrajectoryTransformer, self).__init__()
        self.model_dim = model_dim
        
        self.encoder = nn.Linear(input_dim, model_dim)
        self.pos_encoder = PositionalEncoding(model_dim)
        self.tgt_linear = nn.Linear(1, model_dim)
        
        self.transformer = nn.Transformer(
            d_model=model_dim,
            nhead=num_heads,
            num_encoder_layers=num_encoder_layers,
            num_decoder_layers=num_decoder_layers,
            dim_feedforward=model_dim * 4,
            dropout=0.1
        )
        
        self.decoder = nn.Linear(model_dim, output_dim)
        self.attn_weights = None

    def forward(self, src, tgt):
        src = self.encoder(src)
        src = src * math.sqrt(self.model_dim)
        src = self.pos_encoder(src.permute(1, 0, 2))

        tgt = tgt.squeeze(-1)
        original_shape = tgt.shape
        tgt = tgt.reshape(-1, 1)
        tgt = self.tgt_linear(tgt)
        tgt = tgt.view(original_shape[0], original_shape[1], -1)
        tgt = tgt * math.sqrt(self.model_dim)
        tgt = self.pos_encoder(tgt.permute(1, 0, 2))

        output = self.transformer(src, tgt)
        output = self.decoder(output.permute(1, 0, 2))

        return output

    def get_attention_map(self, src, tgt):
        src = self.encoder(src)
        src = src * math.sqrt(self.model_dim)
        src = self.pos_encoder(src.permute(1, 0, 2))

        tgt = tgt.squeeze(-1)
        original_shape = tgt.shape
        tgt = tgt.reshape(-1, 1)
        tgt = self.tgt_linear(tgt)
        tgt = tgt.view(original_shape[0], original_shape[1], -1)
        tgt = tgt * math.sqrt(self.model_dim)
        tgt = self.pos_encoder(tgt.permute(1, 0, 2))

        src_key_padding_mask = None
        tgt_key_padding_mask = None
        memory_key_padding_mask = None
        memory = self.transformer.encoder(src, src_key_padding_mask=src_key_padding_mask)
        
        output = tgt
        self.attn_weights = []
        for layer in self.transformer.decoder.layers:
            output, attn = layer.multihead_attn(output, memory, memory, need_weights=True)
            self.attn_weights.append(attn)

        return self.attn_weights

In [10]:
def train_model(model, train_loader, criterion, optimizer, num_epochs, device):
    model.train()
    model.to(device)
    for epoch in range(num_epochs):
        running_loss = 0.0
        for inputs, targets in train_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            optimizer.zero_grad()
            outputs = model(inputs, targets)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader)}")

# 모델 초기화
input_dim = len(independent_vars)
output_dim = len(dependent_vars)
model_dim = 512
num_heads = 4
num_encoder_layers = 3
num_decoder_layers = 3

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = TrajectoryTransformer(input_dim, model_dim, num_heads, num_encoder_layers, num_decoder_layers, output_dim)

In [97]:
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
num_epochs = 50

train_model(model, train_loader, criterion, optimizer, num_epochs, device)

Epoch 1/50, Loss: 11701.169069997455
Epoch 2/50, Loss: 1231.854136720064
Epoch 3/50, Loss: 1231.697259487293
Epoch 4/50, Loss: 1229.8300892539937
Epoch 5/50, Loss: 1231.3954918296558
Epoch 6/50, Loss: 1231.0917903819695
Epoch 7/50, Loss: 1229.786757715557
Epoch 8/50, Loss: 1229.9088522570833
Epoch 9/50, Loss: 1230.050419339815
Epoch 10/50, Loss: 1229.6009766305478
Epoch 11/50, Loss: 1230.7381268050424
Epoch 12/50, Loss: 1230.6543213788989
Epoch 13/50, Loss: 1229.1251526448764
Epoch 14/50, Loss: 1229.4910906270434
Epoch 15/50, Loss: 1229.860212362923
Epoch 16/50, Loss: 1230.5493336897114
Epoch 17/50, Loss: 1228.5935417604362
Epoch 18/50, Loss: 1230.7904780208542
Epoch 19/50, Loss: 1229.5876862654995
Epoch 20/50, Loss: 1228.7818415294842
Epoch 21/50, Loss: 1229.5895692091206
Epoch 22/50, Loss: 1231.775401385262
Epoch 23/50, Loss: 1229.060552027188
Epoch 24/50, Loss: 1230.0225940697642
Epoch 25/50, Loss: 1229.3738969859842
Epoch 26/50, Loss: 1230.5811601883497
Epoch 27/50, Loss: 1229.9239

In [136]:
def evaluate_model(model, test_loader, device):
    model.eval()  # 평가 모드로 전환
    model.to(device)
    total_loss = 0.0
    total_rmse = 0.0
    total_mape = 0.0
    criterion = nn.MSELoss()
    
    with torch.no_grad():
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs, targets)
            loss = criterion(outputs, targets)
            total_loss += loss.item()
            
            rmse = torch.sqrt(loss)
            total_rmse += rmse.item()
            
            mape = torch.mean(torch.abs((targets - outputs) / targets)) * 100
            total_mape += mape.item()
    
    avg_loss = total_loss / len(test_loader)
    avg_rmse = total_rmse / len(test_loader)
    avg_mape = total_mape / len(test_loader)
    
    print(f"Test Loss: {avg_loss:.4f}")
    print(f"Test RMSE: {avg_rmse:.4f}")
    print(f"Test MAPE: {avg_mape:.4f}%")

In [137]:
evaluate_model(model, test_loader, device)

Test Loss: 1069.2389
Test RMSE: 29.3760
Test MAPE: 5.6550%


In [105]:
# 학습된 모델 저장
torch.save(model.state_dict(), '4 to 3.pt')

In [6]:
def plot_attention_heatmap(attention_weights, input_sequence_length, output_sequence_length, sequence_id, sequence_index):
    attention_weights = attention_weights[0][0].detach().cpu().numpy()
    fig, ax = plt.subplots(figsize=(10, 10))
    sns.heatmap(attention_weights, ax=ax, cmap='viridis')
    ax.set_xlabel('Input Sequence')
    ax.set_ylabel('Output Sequence')
    plt.title(f'Attention Heatmap / Vehicel ID : {sequence_id} , Sequence : {sequence_index}')
    plt.show()

def get_sequences_by_id(data, sequence_id):
    sequences = data[data['sequence_ID'] == sequence_id]
    total_sequences = (len(sequences) - input_sequence_length - output_sequence_length + 1)
    return sequences, total_sequences

def select_and_plot_heatmap(model, data, sequence_id, sequence_index, device):
    model.eval()  # 평가 모드로 전환
    sequences, total_sequences = get_sequences_by_id(data, sequence_id)
    if total_sequences <= 0:
        print("No valid sequences found for the given sequence_id.")
        return

    if sequence_index >= total_sequences:
        print(f"Sequence index out of range. There are only {total_sequences} sequences.")
        return
    
    X, y = create_sequences(sequences, input_sequence_length, output_sequence_length)
    sample_src = torch.tensor(X[sequence_index:sequence_index+1], dtype=torch.float32).to(device)
    sample_tgt = torch.tensor(y[sequence_index:sequence_index+1], dtype=torch.float32).to(device)

    attention_weights = model.get_attention_map(sample_src, sample_tgt)
    plot_attention_heatmap(attention_weights, input_sequence_length, output_sequence_length, sequence_id, sequence_index)
    print(f"Total sequences for sequence_ID {sequence_id}: {total_sequences}")

In [11]:
loaded_model = TrajectoryTransformer(input_dim, model_dim, num_heads, num_encoder_layers, num_decoder_layers, output_dim)
loaded_model.load_state_dict(torch.load('4 to 3.pt', map_location=device))
loaded_model.to(device)

TrajectoryTransformer(
  (encoder): Linear(in_features=26, out_features=512, bias=True)
  (pos_encoder): PositionalEncoding()
  (tgt_linear): Linear(in_features=1, out_features=512, bias=True)
  (transformer): Transformer(
    (encoder): TransformerEncoder(
      (layers): ModuleList(
        (0-2): 3 x TransformerEncoderLayer(
          (self_attn): MultiheadAttention(
            (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
          )
          (linear1): Linear(in_features=512, out_features=2048, bias=True)
          (dropout): Dropout(p=0.1, inplace=False)
          (linear2): Linear(in_features=2048, out_features=512, bias=True)
          (norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
          (norm2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
          (dropout1): Dropout(p=0.1, inplace=False)
          (dropout2): Dropout(p=0.1, inplace=False)
        )
      )
      (norm): LayerNorm((512,), eps=1e-05, e

In [51]:
# Example usage:
sequence_id = 100  # 원하는 sequence_ID를 지정
sequence_index = 0  # 원하는 시퀀스 인덱스를 지정
select_and_plot_heatmap(loaded_model, data, sequence_id, sequence_index, device)

TypeError: select_and_plot_heatmap() missing 1 required positional argument: 'save_dir'

In [53]:
def plot_attention_heatmap(attention_weights, input_sequence_length, output_sequence_length, sequence_id, sequence_index, save_path):
    attention_weights = attention_weights[0][0].detach().cpu().numpy()
    fig, ax = plt.subplots(figsize=(10, 10))
    sns.heatmap(attention_weights, ax=ax, cmap='viridis')
    ax.set_xlabel('Input Sequence')
    ax.set_ylabel('Output Sequence')
    plt.title(f'Attention Heatmap / Vehicle ID : {sequence_id} , Sequence : {sequence_index}')
    
    # Save the plot instead of showing it
    plt.savefig(save_path)
    plt.close()

def get_sequences_by_id(data, sequence_id):
    sequences = data[data['sequence_ID'] == sequence_id]
    total_sequences = (len(sequences) - input_sequence_length - output_sequence_length + 1)
    return sequences, total_sequences

def select_and_plot_heatmap(model, data, sequence_id, sequence_index, device, save_dir):
    model.eval()  # 평가 모드로 전환
    sequences, total_sequences = get_sequences_by_id(data, sequence_id)
    if total_sequences <= 0:
        print("No valid sequences found for the given sequence_id.")
        return

    if sequence_index >= total_sequences:
        print(f"Sequence index out of range. There are only {total_sequences} sequences.")
        return
    
    X, y = create_sequences(sequences, input_sequence_length, output_sequence_length)
    sample_src = torch.tensor(X[sequence_index:sequence_index+1], dtype=torch.float32).to(device)
    sample_tgt = torch.tensor(y[sequence_index:sequence_index+1], dtype=torch.float32).to(device)

    attention_weights = model.get_attention_map(sample_src, sample_tgt)
    
    # Save path for the heatmap
    save_path = os.path.join(save_dir, f'heatmap_sequence_{sequence_index}.png')
    plot_attention_heatmap(attention_weights, input_sequence_length, output_sequence_length, sequence_id, sequence_index, save_path)
    print(f"Total sequences for sequence_ID {sequence_id}: {total_sequences}")

def save_heatmaps(loaded_model, data, sequence_id, device, save_dir):
    # Create the directory if it doesn't exist
    os.makedirs(save_dir, exist_ok=True)
    
    for sequence_index in range(100):  # 0 to 71 inclusive
        select_and_plot_heatmap(loaded_model, data, sequence_id, sequence_index, device, save_dir)


In [66]:
sequence_id = 61  # 원하는 sequence_ID를 지정
sequence_index = 0  # 원하는 시퀀스 인덱스를 지정
save_dir = "/home/aibig25/hong_sj/trb/transformer/heatmap/ID173"
save_heatmaps(loaded_model, data, sequence_id, device, save_dir)

No valid sequences found for the given sequence_id.
No valid sequences found for the given sequence_id.
No valid sequences found for the given sequence_id.
No valid sequences found for the given sequence_id.
No valid sequences found for the given sequence_id.
No valid sequences found for the given sequence_id.
No valid sequences found for the given sequence_id.
No valid sequences found for the given sequence_id.
No valid sequences found for the given sequence_id.
No valid sequences found for the given sequence_id.
No valid sequences found for the given sequence_id.
No valid sequences found for the given sequence_id.
No valid sequences found for the given sequence_id.
No valid sequences found for the given sequence_id.
No valid sequences found for the given sequence_id.
No valid sequences found for the given sequence_id.
No valid sequences found for the given sequence_id.
No valid sequences found for the given sequence_id.
No valid sequences found for the given sequence_id.
No valid seq