In [2]:
import pandas as pd
import os

In [3]:
DATA_PRE_DIR = './datasets/radar'
DATA_AFTER_DIR = './datasets/cache'

FEATURES_COLUMNS = ['航班号(I170)', '经纬度(I130)', '系统接收时间', '地速(I160)', '几何高度(I140)', '飞行高度(I145)', '航向(I160)']

In [4]:
def get_longitude(value: str, **extra):
    if value is None or value is pd.NA or value.strip() == '':
        return pd.NA
    return float(value.split(',')[0])


def get_latitude(value: str, **extra):
    if value is None or value is pd.NA or value.strip() == '':
        return pd.NA
    return float(value.split(',')[1])

In [5]:
p = os.path.join(DATA_PRE_DIR, '20210401.txt')

In [6]:
df: pd.DataFrame = pd.read_table(p, sep='\t', encoding='UTF-8')
# 空串替换为NA
df.replace(to_replace=r'^\s*$', value=pd.NA, regex=True, inplace=True)
# 0.00替换为NA
df.replace(to_replace=r'^\s*?0*?\.?0*?\s*?$', value=pd.NA, regex=True, inplace=True)
# 无航班号和时间的删除
df.dropna(axis=0, how='any', subset=['航班号(I170)', '系统接收时间'], inplace=True)
df = df.loc[:, FEATURES_COLUMNS].copy(deep=True)
# 经纬度提取
df['经度'] = df.loc[:, '经纬度(I130)'].apply(get_longitude)
df['纬度'] = df.loc[:, '经纬度(I130)'].apply(get_latitude)
df.drop(['经纬度(I130)'], axis=1, inplace=True)
# 插值
# TODO
# 类型
# df[['航班号(I170)', '系统接收时间']] = df[['航班号(I170)', '系统接收时间']].astype(str)
# df[['经度', '纬度', '地速(I160)', '几何高度(I140)', '飞行高度(I145)', '航向(I160)']] = df[['经度', '纬度', '地速(I160)', '几何高度(I140)', '飞行高度(I145)', '航向(I160)']].astype(float)

pnames = set(df['航班号(I170)'].to_list())
df.drop(['飞行高度(I145)'], axis=1, inplace=True)
df.rename(columns={
    '航班号(I170)': '航班号',
    '地速(I160)': '速度',
    '几何高度(I140)': '高度',
    '航向(I160)': '航向'
}, inplace=True)

df.loc[1564338:1564339, ('航班号', '经度')]

Unnamed: 0,航班号,经度
1564338,AKJC1234,
1564339,QTR895,97.268


In [7]:
df.loc[19:30, ('航班号', '高度')]

Unnamed: 0,航班号,高度
19,OKA3044,10858.5
20,HXA4942,
21,CES2770,1249.68
22,HXA4884,541.02
23,HXA4884,541.02
24,HXA4884,541.02
25,CLX7441,10660.38
26,CLX7441,10660.38
27,CES2278,10332.72
28,CES2376,7056.12


In [9]:
df.loc[19:30, ('高度', '航班号', '高度')].to_numpy().tolist()

[['10858.50', 'OKA3044 ', '10858.50'],
 [<NA>, 'HXA4942 ', <NA>],
 ['1249.68', 'CES2770 ', '1249.68'],
 ['541.02', 'HXA4884 ', '541.02'],
 ['541.02', 'HXA4884 ', '541.02'],
 ['541.02', 'HXA4884 ', '541.02'],
 ['10660.38', 'CLX7441 ', '10660.38'],
 ['10660.38', 'CLX7441 ', '10660.38'],
 ['10332.72', 'CES2278 ', '10332.72'],
 ['7056.12', 'CES2376 ', '7056.12'],
 ['9448.80', 'CSC8814 ', '9448.80'],
 ['9448.80', 'CSC8814 ', '9448.80']]

In [10]:
df.loc[19:30, ('航班号', '高度')]

Unnamed: 0,航班号,高度
19,OKA3044,10858.5
20,HXA4942,
21,CES2770,1249.68
22,HXA4884,541.02
23,HXA4884,541.02
24,HXA4884,541.02
25,CLX7441,10660.38
26,CLX7441,10660.38
27,CES2278,10332.72
28,CES2376,7056.12


In [11]:
df.loc[19:30, ('航班号', '高度')].T

Unnamed: 0,19,20,21,22,23,24,25,26,27,28,29,30
航班号,OKA3044,HXA4942,CES2770,HXA4884,HXA4884,HXA4884,CLX7441,CLX7441,CES2278,CES2376,CSC8814,CSC8814
高度,10858.50,,1249.68,541.02,541.02,541.02,10660.38,10660.38,10332.72,7056.12,9448.80,9448.80


In [12]:
df.loc[19:30, ('航班号', '高度')].T.to_numpy().tolist()

[['OKA3044 ',
  'HXA4942 ',
  'CES2770 ',
  'HXA4884 ',
  'HXA4884 ',
  'HXA4884 ',
  'CLX7441 ',
  'CLX7441 ',
  'CES2278 ',
  'CES2376 ',
  'CSC8814 ',
  'CSC8814 '],
 ['10858.50',
  <NA>,
  '1249.68',
  '541.02',
  '541.02',
  '541.02',
  '10660.38',
  '10660.38',
  '10332.72',
  '7056.12',
  '9448.80',
  '9448.80']]