# 数据预处理

In [42]:
import numpy as np
import pandas as pd

In [43]:
# 加载换道数据
dataLC = np.load('../data/x_all_lc.npy')
dataLK = np.load('../data/x_all_lk.npy')
print(dataLC.shape)
# 加载标签---注意！请将第一行“样本id,是否风险（1：高风险；0：低风险）”删除
dataFlag = np.loadtxt('../data/risk_label.csv', delimiter=",", dtype=int)
print(dataFlag.shape)

(847, 50, 42)
(847, 2)


In [44]:
# 确定车辆位置关系 并且处理数据中的空值问题
# 排序方法为：本车 -> 左前车 -> 前车 -> 右前车 -> 左后车 -> 后车 -> 右后车
def DataSorting(sequence):
    sortedSeq = None
    for i in range(sequence.shape[0]):
        data = sequence[i, :, :]
        sortedData = np.zeros((50, 42))
        targetCar = data[:, 0:6]
        sortedData[:, 0:6] = targetCar
        for j in range(1, 7):
            flag = CarPosition(targetCar, data[:, j*6:j*6+6])
            sortedData = AddCar(sortedData, data[:, j*6:j*6+6], flag)
        sortedData = sortedData.reshape(1, 50, 42)
        if sortedSeq is None:
            sortedSeq = sortedData
        else:
            sortedSeq = np.append(sortedSeq, sortedData, axis=0)
    return sortedSeq

In [45]:
# 根据x,y坐标确定车辆的位置关系
def CarPosition(car1, car2):
    laneWidth = 1.5
    # 如果是空数据，则直接返回
    if car2[-1, 0] is None:
        return False
    # 右前车
    if car2[-1, 0] > car1[-1, 0] and car2[-1, 1] - car1[-1, 1] > laneWidth:
        return 3
    # 左前车
    elif car2[-1, 0] > car1[-1, 0] and car2[-1, 1] - car1[-1, 1] < -laneWidth:
        return 1
    # 前车
    elif car2[-1, 0] > car1[-1, 0] and abs(car2[-1, 1] - car1[-1, 1]) <= laneWidth:
        return 2
    # 右后车
    elif car2[-1, 0] < car1[-1, 0] and car2[-1, 1] - car1[-1, 1] > laneWidth:
        return 6
    # 左后车
    elif car2[-1, 0] < car1[-1, 0] and car2[-1, 1] - car1[-1, 1] < -laneWidth:
        return 4
     # 后车
    elif car2[-1, 0] < car1[-1, 0] and abs(car2[-1, 1] - car1[-1, 1]) <= laneWidth:
        return 5

    return False

In [46]:
# 根据车辆位置关系，重构数据
def AddCar(sequence, carData, carType):
    # 左前车
    if carType == 1:
        sequence[:, 6:12] = carData
    # 前车
    elif carType == 2:
        sequence[:, 12:18] = carData
    # 右后车
    elif carType == 3:
        sequence[:, 18:24] = carData
    # 左后车
    elif carType == 4:
        sequence[:, 24:30] = carData
    # 后车
    elif carType == 5:
        sequence[:, 30:36] = carData
    # 右后车
    elif carType == 6:
        sequence[:, 36:42] = carData
    return sequence

## 数据重构，根据位置关系调整数据顺序：本车 -> 左前车 -> 前车 -> 右前车 -> 左后车 -> 后车 -> 右后车

In [47]:
# 换道车辆数据重构
dataSortedLC = DataSorting(dataLC)
dataSortedLC.shape

(847, 50, 42)

In [48]:
# 车道保持数据重构
dataSortedLK = DataSorting(dataLK)
dataSortedLK.shape

(847, 50, 42)

## 筛选出换道高风险数据与低风险数据

In [49]:
lowRisk, highRisk = None, None
for i in range(dataSortedLC.shape[0]):
    dataI = dataSortedLC[i, :, :].reshape(1, 50, 42)
    # 高风险
    if dataFlag[i, -1] == 1:
        if highRisk is None: highRisk = dataI
        else: highRisk = np.append(highRisk, dataI, axis=0)
    # 低风险
    else:
        if lowRisk is None: lowRisk = dataI
        else: lowRisk = np.append(lowRisk, dataI, axis=0)
print("低风险数据维度：", lowRisk.shape)
print("高风险数据维度：", highRisk.shape)

低风险数据维度： (795, 50, 42)
高风险数据维度： (52, 50, 42)


In [50]:
## 提取x,y坐标以及x,y速度
def GetPosSpeed(sequence):
    outArray = None
    for i in range(sequence.shape[0]):
        itemData = None
        for j in range(7):
            data = sequence[i, :, j*6:j*6+4].reshape(1, sequence.shape[1], 1, 4)
            if itemData is None: itemData = data
            else: itemData = np.append(itemData, data, axis=2)
        if outArray is None: outArray = itemData
        else: outArray = np.append(outArray, itemData, axis=0)
    return outArray

## 划分训练集与测试集，划分比例train:test = 7:3
#### 各类数据的个数如下表所示
|     | 换道低风险 | 换道高风险 | 车道保持 |
|-----| ---- |-------| ---- |
| 训练集 | 557 | 36    | 593 |
| 测试集 | 238 | 16    | 254 |
| 合计 | 795 | 52    | 847 |

In [57]:
# 训练集
train_lowRisk = GetPosSpeed(lowRisk[:557, :, :])
train_highRisk = GetPosSpeed(highRisk[:36, :, :])
train_laneKeep = GetPosSpeed(dataSortedLK[:593, :, :])
print("训练集，低风险、高风险、车道保持：", train_lowRisk.shape, train_highRisk.shape, train_laneKeep.shape)

训练集，低风险、高风险、车道保持： (557, 50, 7, 4) (36, 50, 7, 4) (593, 50, 7, 4)


In [58]:
# 测试集
test_lowRisk = GetPosSpeed(lowRisk[557:, :, :])
test_highRisk = GetPosSpeed(highRisk[36:, :, :])
test_laneKeep = GetPosSpeed(dataSortedLK[593:, :, :])
print("测试集，低风险、高风险、车道保持：", test_lowRisk.shape, test_highRisk.shape, test_laneKeep.shape)

测试集，低风险、高风险、车道保持： (238, 50, 7, 4) (16, 50, 7, 4) (254, 50, 7, 4)


## 训练集测试集数据保存

In [59]:
np.save("../data/train_lowRisk.npy", train_lowRisk)
np.save("../data/train_highRisk.npy", train_highRisk)
np.save("../data/train_laneKeep.npy", train_laneKeep)
np.save("../data/test_lowRisk.npy", test_lowRisk)
np.save("../data/test_highRisk.npy", test_highRisk)
np.save("../data/test_laneKeep.npy", test_laneKeep)

In [60]:
import torch
a = torch.tensor(train_laneKeep)
torch.any(torch.isnan(a))

tensor(False)