In [1]:
# Yiming Guo 07/14/2024
"""
./bin/ycsb run basic -p recordcount=100 -p operationcount=500000 -p workload=site.ycsb.workloads.CoreWorkload -p requestdistribution=hotspot -p hotspotdatafraction=0.2 -p hotspotopnfraction=0.8 -p readproportion=1.0 -p insertorder=ordered -p updateproportion=0 -s > data/tracea_load.txt
"""
import os
import subprocess
import time
import csv
import pandas as pd
import numpy as np
from collections import Counter
import torch
import os
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt

In [1]:
# import torch

# torch.cuda.empty_cache()
# print("CUDA 是否可用:", torch.cuda.is_available())
# print("当前 CUDA 设备数量:", torch.cuda.device_count())
# print("当前 CUDA 设备名称:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "无")
import torch

# 检查 CUDA 可用性
print("CUDA 是否可用:", torch.cuda.is_available())
print("当前 CUDA 设备数量:", torch.cuda.device_count())

# 如果 CUDA 不可用，尝试找出原因
if not torch.cuda.is_available():
    print("\nCUDA 不可用的可能原因:")
    
    # 检查是否安装了 GPU 版本的 PyTorch
    print("PyTorch 版本:", torch.__version__)
    
    # 检查 CUDA 是否在系统上可用
    try:
        import subprocess
        result = subprocess.run(['nvidia-smi'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        if result.returncode != 0:
            print("1. 系统中没有 NVIDIA GPU 或 NVIDIA 驱动程序未正确安装")
        else:
            print("1. 系统中有 NVIDIA GPU，但 PyTorch 可能未编译为 GPU 版本")
    except:
        print("1. 无法执行 nvidia-smi 命令")
    
    # 检查是否安装了正确版本的 PyTorch
    print("2. 您可能安装了 CPU 版本的 PyTorch，而不是 GPU 版本")
    
    # 建议的解决方案
    print("\n解决方案:")
    print("a. 确保系统中有 NVIDIA GPU")
    print("b. 安装 NVIDIA 驱动程序 (使用 nvidia-smi 检查)")
    print("c. 安装 CUDA 工具包")
    print("d. 安装与您 CUDA 版本匹配的 GPU 版本 PyTorch:")
    print("   访问 https://pytorch.org/get-started/locally/ 获取正确的安装命令")
    
    # 示例安装命令
    print("\n例如，对于 CUDA 11.7:")
    print("pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu117")
else:
    print("当前 CUDA 设备名称:", torch.cuda.get_device_name(0))

CUDA 是否可用: True
当前 CUDA 设备数量: 1
当前 CUDA 设备名称: NVIDIA GeForce GTX 1650 Ti


In [None]:


YCSB_PATH = "/home/ming/桌面/Lion/YCSB"  # YCSB安装路径
OUTPUT_FILE = "/home/ming/桌面/Lion/ycsb_hotspot_result.txt"  # 原始结果输出文件
PROCESSED_FILE = "/home/ming/桌面/Lion/lstm_dataset.csv"  # 处理后的数据集文件
LOG_FILE = "/home/ming/桌面/Lion/query_log.txt"  # 详细查询日志文件
MODEL_PATH = "/home/ming/桌面/Lion/hotspot_predictor.pth"  # 模型保存路径
RECORD_COUNT = 100  # 数据集大小
OPERATION_COUNT = 500000  # 查询操作数量
HOTSPOT_FRACTION = 0.2  # 20%的热点键
HOTSPOT_OPS_FRACTION = 0.8  # 80%的操作访问热点
WINDOW_SIZE_MS = 100  # 时间窗口大小(毫秒)
SEQUENCE_LENGTH = 10  # 输入序列长度(时间窗口数)
KEYS_PER_WINDOW = 20  # 每个时间窗口跟踪的热键数量
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"使用设备: {device}")


NameError: name 'torch' is not defined

In [1]:
import subprocess
import os

def run_ycsb_command():
    ycsb_dir = "/home/ming/桌面/PLIN-N /PLIN-N/YCSB"
    original_dir = os.getcwd()
    os.chdir(ycsb_dir)

    # # load
    # load_command = [
    #     "./bin/ycsb", "load", "basic",
    #     "-p", "recordcount=10000000",    # total 10000000 10000000
    #     "-p", "operationcount=50000000",
    #     "-p", "workload=site.ycsb.workloads.CoreWorkload",
    #     "-p", "requestdistribution=zipfian",
    #     # "-p", "hotspotdatafraction=0.2",
    #     # "-p", "hotspotopnfraction=0.8",
    #     "-p", "readproportion=1.0",
    #     "-p", "insertorder=ordered",
    #     "-p", "updateproportion=0",
    #     "-p","readallfields=true",
    #     "-s"
    # ]
    
    # with open("/home/ming/桌面/PLIN-N /PLIN-N/build/tracea_load.txt", "w") as f:
    #     subprocess.run(load_command, stdout=f, stderr=subprocess.PIPE, text=True)

    # # run
    # run_command = [
    #     "./bin/ycsb", "run", "basic",
    #     "-p", "recordcount=10000000",  
    #     "-p", "operationcount=50000000", 
    #     "-p", "workload=site.ycsb.workloads.CoreWorkload",
    #     "-p", "requestdistribution=zipfian",
    #     # "-p", "hotspotdatafraction=0.2",
    #     # "-p", "hotspotopnfraction=0.8",
    #     "-p", "readproportion=1.0",
    #     "-p", "insertorder=ordered",
    #     "-p", "updateproportion=0",
    #     "-p","readallfields=true",
    #     "-s"
    # ]
    # f.close()
    # load
    load_command = [
        "./bin/ycsb", "load", "basic",
        "-p", "recordcount=1000000",    # total 10000000 10000000
        "-p", "operationcount=25000000",
        "-p", "workload=site.ycsb.workloads.CoreWorkload",
        "-p", "requestdistribution=zipfian",
        # "-p", "hotspotdatafraction=0.2",
        # "-p", "hotspotopnfraction=0.8",
        "-p", "readproportion=1.0",
        "-p", "insertorder=ordered",
        "-p", "updateproportion=0",
        "-p","readallfields=true",
        "-s"
    ]
    
    with open("/home/ming/桌面/PLIN-N /PLIN-N/build/tracea_load.txt", "w") as f:
        subprocess.run(load_command, stdout=f, stderr=subprocess.PIPE, text=True)

    # run
    run_command = [
        "./bin/ycsb", "run", "basic",
        "-p", "recordcount=1000000",  
        "-p", "operationcount=25000000", 
        "-p", "workload=site.ycsb.workloads.CoreWorkload",
        "-p", "requestdistribution=zipfian",
        # "-p", "hotspotdatafraction=0.2",
        # "-p", "hotspotopnfraction=0.8",
        "-p", "readproportion=1.0",
        "-p", "insertorder=ordered",
        "-p", "updateproportion=0",
        "-p","readallfields=true",
        "-s"
    ]
    f.close()
    
    with open("/home/ming/桌面/PLIN-N /PLIN-N/build/tracea_run.txt", "w") as f1:
        subprocess.run(run_command, stdout=f1, stderr=subprocess.PIPE, text=True)

    os.chdir(original_dir)

run_ycsb_command()

In [None]:
import re
import sys

input_file = "/home/ming/桌面/PLIN-N /PLIN-N/build/tracea_run.txt"
output_file = "/home/ming/桌面/PLIN-N /PLIN-N/build/command_plus.txt"

def extract_keys_from_ycsb_log(input_file, output_file):
    try:
        with open(input_file, 'r', encoding='utf-8') as f:
            log_content = f.readlines()
        keys = []
        for line in log_content:
            if 'READ usertable' in line:
                match = re.search(r'READ usertable (\w+)', line)
                if match:
                    keys.append("find " + match.group(1)[4:])
            elif 'UPDATE usertable' in line:
                match = re.search(r'UPDATE usertable (\w+)', line)
                if match:
                    keys.append("find " + match.group(1)[4:])
            elif 'DELETE usertable' in line:
                match = re.search(r'DELETE usertable (\w+)', line)
                if match:
                    keys.append("find " + match.group(1)[4:])
        
        with open(output_file, 'w', encoding='utf-8') as f:
            for key in keys:
                f.write(f"{key}\n")
        
        print(f"extract {len(keys)} keys to {output_file}")
        
    except FileNotFoundError:
        print(f"error: cannot find {input_file}")
    except Exception as e:
        print(f"something wrong when process the file: {e}")

extract_keys_from_ycsb_log(input_file, output_file)



extract 50000000 keys to /home/ming/桌面/PLIN-N /PLIN-N/build/command.txt.txt


In [None]:
import re
import sys
from collections import defaultdict

input_file = "/home/ming/桌面/PLIN-N /PLIN-N/build/tracea_run.txt"
output_file = "/home/ming/桌面/PLIN-N /PLIN-N/build/command_plus.txt"
output_file_summary = "/home/ming/桌面/PLIN-N /PLIN-N/build/processed_key_summary.txt"

def extract_keys_from_ycsb_log(input_file, output_file):
    try:
        with open(input_file, 'r', encoding='utf-8') as f:
            log_content = f.readlines()
        
        key_counts = defaultdict(int)
        total_keys = 0
        keys = []
         
        for line in log_content:
            match = None
            if 'READ usertable' in line:
                match = re.search(r'READ usertable (\w+)', line)
            elif 'UPDATE usertable' in line:
                match = re.search(r'UPDATE usertable (\w+)', line)
            elif 'DELETE usertable' in line:
                match = re.search(r'DELETE usertable (\w+)', line)
            
            if match:
                key = match.group(1)
                keys.append("find " + match.group(1)[4:])
                if key.startswith("user"):
                    key = key[4:]
                key_counts[key] += 1
                total_keys += 1
        

        sorted_keys = sorted(key_counts.items(), key=lambda x: x[1], reverse=True)

        with open(output_file, 'w', encoding='utf-8') as f:
            f.write("key\n")
            for key in keys:
                f.write(f"{key}\n")
        
        print(f"extract {len(keys)} keys to {output_file}")
        
        with open(output_file_summary, 'w', encoding='utf-8') as f:
            f.write(f"总键访问次数: {total_keys}\n")
            f.write(f"唯一键数量: {len(key_counts)}\n\n")
            f.write("键访问统计 (按访问次数降序):\n")
            for key, count in sorted_keys:
                f.write(f"{key}: {count}\n")
        
        print(f"成功处理 {total_keys} 次键访问，涉及 {len(key_counts)} 个唯一键")
        print(f"结果已保存到: {output_file}")
        # print("\n前10个最常访问的键:")
        # for i, (key, count) in enumerate(sorted_keys[:10], 1):
        #     print(f"{i}. {key}: {count} 次")
       
    except FileNotFoundError:
        print(f"错误: 找不到文件 {input_file}")
    except Exception as e:
        print(f"处理文件时出错: {e}")

extract_keys_from_ycsb_log(input_file, output_file)

In [None]:
# def process_for_lstm():
#     """处理日志为LSTM训练所需的格式"""
#     print("为LSTM处理数据...")
    
#     df = pd.read_csv(
#         LOG_FILE,
#         names=['timestamp', 'key'],
#         header=0 
#     )
    
#     df['time_window'] = (df['timestamp'] // WINDOW_SIZE_MS) * WINDOW_SIZE_MS
    
#     # create time windows and count keys
#     window_groups = df.groupby('time_window')['key'].apply(
#         lambda x: Counter(x).most_common(KEYS_PER_WINDOW)  # 每个窗口取Top热键
#     )
    
#     lstm_data = []
#     window_keys = {} #由hash <window(time), (hot_key,times)>
    
#     group = 0
#     for window, keys in window_groups.items():
#         # 确保每个窗口有KEYS_PER_WINDOW个键
#         if not group:
#             group += 1
#             continue
#         key_list = [key for key, _ in keys] #只关注提取热键本身，不关注其热键的次数
#         if len(key_list) < KEYS_PER_WINDOW:
#             key_list += ['0'] * (KEYS_PER_WINDOW - len(key_list))
#         window_keys[window] = key_list[:KEYS_PER_WINDOW]  # 确保长度一致
    
    
#     """
#     windows = sorted(window_keys.keys()) #a list sorted by time, is the windows name
#     windows_keys = {} as hash <window,hot_keys>
#     """
#     windows = sorted(window_keys.keys()) 
#     # for i in range(len(windows)):
#     #     print(f"窗口 {i}: {windows[i]} - 热键: {window_keys[windows[i]]}")
    
#     for i in range(len(windows) - SEQUENCE_LENGTH):
#         input_seq = []
#         for j in range(SEQUENCE_LENGTH):
#             win = windows[i + j]
#             input_seq.append(window_keys[win])
#         # print(f"round {i + SEQUENCE_LENGTH}")
#         target_win = windows[i + SEQUENCE_LENGTH]
#         target = window_keys[target_win]
        
#         lstm_data.append({
#             "input_sequence": input_seq,
#             "target": target,
#             "start_window": windows[i],
#             "end_window": windows[i + SEQUENCE_LENGTH - 1],
#             "target_window": target_win
#         })
    
#     # 保存处理后的数据
#     with open(PROCESSED_FILE, 'w') as f:
#         writer = csv.writer(f)
#         writer.writerow(["sequence", "target", "start_window", "end_window", "target_window"])
        
#         for item in lstm_data:
#             # 将序列转换为字符串表示，确保所有元素都是字符串类型
#             seq_str = ";".join([",".join(str(key) for key in keys) for keys in item["input_sequence"]])
#             target_str = ",".join(str(key) for key in item["target"])
            
#             writer.writerow([
#                 seq_str,
#                 target_str,
#                 item["start_window"],
#                 item["end_window"],
#                 item["target_window"]
#             ])
    
#     print(f"处理后的数据保存至: {PROCESSED_FILE}")
#     print(f"总序列数: {len(lstm_data)}")
    
#     return lstm_data

# # process_for_lstm()
# class HotspotPredictor(nn.Module):
#     """PyTorch LSTM模型用于热点键预测"""
#     def __init__(self, vocab_size, embedding_dim=64, hidden_dim=128, num_layers=2):
#         super().__init__()
#         self.embedding = nn.Embedding(vocab_size, embedding_dim)
#         self.lstm = nn.LSTM(
#             input_size=embedding_dim * KEYS_PER_WINDOW,
#             hidden_size=hidden_dim,
#             num_layers=num_layers,
#             batch_first=True,
#             bidirectional=True  # 添加双向LSTM
#         )
#         self.fc = nn.Sequential(
#             nn.Linear(2*hidden_dim if num_layers > 1 else hidden_dim, 256),
#             nn.ReLU(),
#             nn.Dropout(0.3),
#             nn.Linear(256, KEYS_PER_WINDOW * vocab_size)  # 预测多个键
#         )
#         # self.fc = nn.Sequential(
#         #     nn.Linear(2*hidden_dim, 256),
#         #     nn.ReLU(),
#         #     nn.Dropout(0.3),
#         #     nn.Linear(256, vocab_size)  # 只预测单个热键
#         # )

#     def forward(self, x):
#         batch_size, seq_len, keys = x.size()
#         x = self.embedding(x)  # (batch, seq, keys, emb)
#         x = x.view(batch_size, seq_len, -1)  # (batch, seq, keys*emb)
        
#         lstm_out, _ = self.lstm(x)  # (batch, seq, hidden)
#         lstm_out = lstm_out[:, -1, :]  # 取最后时间步

#         output = self.fc(lstm_out)
#         return output  # [batch, vocab_size]
        
#         # output = self.fc(lstm_out)
#         # return output.view(batch_size, KEYS_PER_WINDOW, -1)  # (batch, KEYS, vocab)

# # ===== PyTorch 数据加载器 =====
# class HotspotDataset(Dataset):
#     """PyTorch数据集加载器"""
#     def __init__(self, sequences, targets):
#         self.sequences = sequences
#         self.targets = targets
        
#     def __len__(self):
#         return len(self.sequences)
    
#     def __getitem__(self, idx):
#         return self.sequences[idx], self.targets[idx]
# # ===== PyTorch 训练和预测 =====
# def train_and_predict_hotkeys():
#     """使用PyTorch LSTM模型训练和预测热点键"""
#     plt.rcParams['axes.unicode_minus'] = False 
#     print("\n===== 开始训练PyTorch LSTM热点预测模型 =====")
    
#     # 1. 加载处理后的数据
#     if not os.path.exists(PROCESSED_FILE):
#         print(f"错误：处理后的数据文件 {PROCESSED_FILE} 不存在")
#         return
    
#     data = pd.read_csv(PROCESSED_FILE)
#     print(f"加载数据集: {len(data)} 条序列")
    
#     # 2. 数据预处理
#     all_keys = set()
#     for seq in data['sequence']:
#         windows = seq.split(';')
#         for window in windows:
#             keys = window.split(',')
#             all_keys.update(keys)
#     all_keys.add('0')
    
#     key_to_int = {key: i for i, key in enumerate(sorted(all_keys))}
#     int_to_key = {i: key for key, i in key_to_int.items()}
#     vocab_size = len(all_keys)
    
#     # 3. 准备训练数据
#     sequences = []
#     targets = []
    
#     for i, row in data.iterrows():
#         seq_str = row['sequence']
#         target_str = row['target']
#         seq_tensor = []
#         windows = seq_str.split(';')
#         for window in windows:
#             keys = window.split(',')
#             window_ints = [key_to_int.get(key, key_to_int['0']) for key in keys[:KEYS_PER_WINDOW]]
#             seq_tensor.append(window_ints)
        
#         # # only take the first key as target
#         # target_key = target_str.split(',')[0]
#         # target_int = key_to_int.get(target_key, key_to_int['0'])
#         target_keys = target_str.split(',')[:KEYS_PER_WINDOW]
#         target_ints = [key_to_int.get(k, key_to_int['0']) for k in target_keys]
#         targets.append(target_ints)
        
#         sequences.append(seq_tensor)
#         # targets.append(target_int)
    
#     # transform to PyTorch tensors
#     sequences_tensor = torch.tensor(sequences, dtype=torch.long)
#     targets_tensor = torch.tensor(targets, dtype=torch.long)
    
#     print(f"输入序列形状: {sequences_tensor.shape}")
    
#     # 4. classify the data into training and testing sets
#     X_train, X_test, y_train, y_test = train_test_split(
#         sequences_tensor, targets_tensor, test_size=0.2, random_state=42
#     )
    
#     # create DataLoader
#     train_dataset = HotspotDataset(X_train, y_train)
#     test_dataset = HotspotDataset(X_test, y_test)
    
#     batch_size = 64
#     train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
#     test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    
#     print(f"训练集大小: {len(train_dataset)}, 批次: {len(train_loader)}")
#     print(f"测试集大小: {len(test_dataset)}, 批次: {len(test_loader)}")
    
#     # 5. initialize the model
#     model = HotspotPredictor(
#         vocab_size=vocab_size,
#         embedding_dim=64,
#         hidden_dim=128,
#         num_layers=2
#     ).to(device)
    
#     print("模型结构:")
#     print(model)
    
#     # loss function and optimizer
#     criterion = nn.CrossEntropyLoss()
#     optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
#     scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3, verbose=True)
    
#     # 6. begin training the model
#     print("\n开始训练模型...")
#     num_epochs = 40
#     train_losses = []
#     val_losses = []
#     val_accuracies = []
#     top5_accuracies = []
    
#     for epoch in range(num_epochs):
#         model.train()
#         running_loss = 0.0
        
#         for inputs, labels in train_loader:
#             inputs, labels = inputs.to(device), labels.to(device)
            
#             # 前向传播
#             outputs = model(inputs)
#             loss = criterion(outputs, labels)
            
#             # 反向传播和优化
#             optimizer.zero_grad()
#             loss.backward()
#             optimizer.step()
            
#             running_loss += loss.item() * inputs.size(0)
        
#         # 计算平均训练损失
#         epoch_loss = running_loss / len(train_dataset)
#         train_losses.append(epoch_loss)
        
#         # 验证阶段
#         model.eval()
#         val_loss = 0.0
#         correct = 0
#         total = 0
#         top5_correct = 0
        
#         with torch.no_grad():
#             for inputs, labels in test_loader:
#                 inputs, labels = inputs.to(device), labels.to(device)
#                 outputs = model(inputs)
#                 # loss = criterion(outputs, labels)
#                 loss = 0
#                 for i in range(KEYS_PER_WINDOW):
#                     loss += criterion(outputs[:, i* vocab_size : (i+1)*vocab_size], labels[:, i])
#                 val_loss += loss.item() * inputs.size(0)
                

#                 _, predicted = torch.max(outputs.data, 1)
#                 total += labels.size(0)
#                 correct += (predicted == labels).sum().item()
                
#                 _, top5_preds = torch.topk(outputs, 15, dim=1)
#                 top5_correct += sum([1 for i in range(labels.size(0)) if labels[i] in top5_preds[i]])
        
#         val_loss = val_loss / len(test_dataset)
#         val_losses.append(val_loss)
#         accuracy = correct / total
#         top5_accuracy = top5_correct / total
#         top5_accuracies.append(top5_accuracy)
#         val_accuracies.append(accuracy)
        
#         # 更新学习率
#         scheduler.step(val_loss)
        
#         print(f"Epoch [{epoch+1}/{num_epochs}] | "
#               f"训练损失: {epoch_loss:.4f} | 验证损失: {val_loss:.4f} | "
#               f"准确率: {accuracy:.4f} | Top-5准确率: {top5_accuracy:.4f}")
    
#     # 绘制训练曲线
#     plt.figure(figsize=(12, 5))
#     plt.subplot(1, 3, 1)
#     plt.plot(train_losses, label='train_loss')
#     plt.plot(val_losses, label='validation_loss')
#     plt.xlabel('Epoch')
#     plt.ylabel('loss')
#     plt.legend()
#     plt.title('training and validation loss')
    
#     plt.subplot(1, 3, 2)
#     plt.plot(val_accuracies, label='validation_accuracy')
#     plt.xlabel('Epoch')
#     plt.ylabel('accuracy')
#     plt.legend()
#     plt.title('validation accuracy')

#     plt.subplot(1, 3, 3)
#     plt.plot(top5_accuracies, label='Top-5 accuracy')
#     plt.xlabel('Epoch')
#     plt.ylabel('accuracy')
#     plt.legend() 
#     plt.title('Top-5 accuracy over epochs')    
    
#     # plt.tight_layout()
#     plt.show()
#     # plt.savefig("/home/ming/Lion/training_metrics.png")
#     # plt.close()
    
#     # 7. 保存模型
#     torch.save({
#         'model_state_dict': model.state_dict(),
#         'key_to_int': key_to_int,
#         'int_to_key': int_to_key,
#         'vocab_size': vocab_size
#     }, MODEL_PATH)
#     print(f"模型已保存至: {MODEL_PATH}")

#     # 8. 使用模型进行预测
#     def predict_next_hotkeys(model, sequence, key_to_int, top_k=20):
#         """预测下一个窗口最可能的热键"""
#         # 准备输入数据
#         seq_tensor = []
#         windows = sequence.split(';')
#         for window in windows:
#             keys = window.split(',')
#             window_ints = [key_to_int.get(key, key_to_int['0']) for key in keys[:KEYS_PER_WINDOW]]
#             seq_tensor.append(window_ints)
        
#         input_tensor = torch.tensor([seq_tensor], dtype=torch.long).to(device)
        
#         # 进行预测
#         model.eval()
#         with torch.no_grad():
#             output = model(input_tensor)
#             probabilities = torch.softmax(output[0], dim=0) #类别预测的概率
        
#         # 获取top_k预测
#         top_probs, top_indices = torch.topk(probabilities, top_k)
#         top_keys = [(int_to_key[i.item()], top_probs[j].item()) 
#                    for j, i in enumerate(top_indices)]
        
#         return top_keys
    
#     # 示例预测
#     if len(data) > 0:
#         sample_sequence = data.iloc[0]['sequence']
#         predicted = predict_next_hotkeys(model, sample_sequence, key_to_int)
        
#         actual_target = data.iloc[0]['target'].split(',')[0]
        
#         print("\n示例预测:")
#         print(f"输入序列: {sample_sequence}")
#         print(f"实际下一个热键: {actual_target}")
#         print("预测的下一个热键 (概率):")
#         for key, prob in predicted:
#             print(f"  {key}: {prob:.4f}")
    
#     return model, key_to_int, int_to_key
# # ===== 主执行流程 =====
# if __name__ == "__main__":
#     # 确保输出目录存在
#     # os.makedirs(os.path.dirname(OUTPUT_FILE), exist_ok=True)
    
#     # # 清空旧文件
#     for file_path in [OUTPUT_FILE, LOG_FILE, PROCESSED_FILE]:
#         if os.path.exists(file_path):
#             os.remove(file_path)
            
#     run_query_workload()
#     process_for_lstm()
    
#     # 训练和预测热点键
#     model, key_to_int, int_to_key = train_and_predict_hotkeys()
    
#     print("\n所有处理完成! PyTorch LSTM模型已训练并可用于热点键预测。")
    
    


Running query workload...
All Keys:100

Top 10 Hot Keys:
4: 20302 queries (4.06%)
10: 20213 queries (4.04%)
17: 20177 queries (4.04%)
18: 20168 queries (4.03%)
13: 20113 queries (4.02%)
19: 20102 queries (4.02%)
5: 20050 queries (4.01%)
1: 20046 queries (4.01%)
14: 20041 queries (4.01%)
11: 20014 queries (4.00%)
0: 19964 queries (3.99%)
16: 19944 queries (3.99%)
9: 19932 queries (3.99%)
12: 19931 queries (3.99%)
15: 19900 queries (3.98%)
7: 19896 queries (3.98%)
8: 19874 queries (3.97%)
3: 19870 queries (3.97%)
6: 19852 queries (3.97%)
2: 19790 queries (3.96%)
94: 1327 queries (0.27%)
22: 1325 queries (0.27%)
27: 1323 queries (0.26%)
75: 1321 queries (0.26%)
69: 1306 queries (0.26%)
99: 1298 queries (0.26%)
46: 1298 queries (0.26%)
52: 1293 queries (0.26%)
45: 1288 queries (0.26%)
74: 1287 queries (0.26%)
89: 1286 queries (0.26%)
21: 1284 queries (0.26%)
25: 1282 queries (0.26%)
60: 1279 queries (0.26%)
31: 1278 queries (0.26%)
32: 1276 queries (0.26%)
54: 1273 queries (0.25%)
90: 1273




开始训练模型...


RuntimeError: 0D or 1D target tensor expected, multi-target not supported