## Import packs and read round2 information

In [None]:
from scipy import interpolate
import pandas as pd
import numpy as np
from sklearn.cluster import DBSCAN

In [None]:
path = 'round2/'
training = pd.read_csv(path+'round2_competition.csv')#read data
sensors = pd.read_csv(path+'round2_sensors.csv')#read sensors data
validation = pd.read_csv(path+'Submission_db_NN_huigui.csv')#Read Submission file(as a submission model)
testing = training.loc[training.id.isin(validation.id)]#Get test set（the aircraft that needs to location）
training = training.dropna()#Delete information that has no location in the training set


## Basic functions

In [None]:
# Coordinate transformation, WGS84 and ECEF
from math import radians, cos, sin, asin, sqrt, pi

# WGS84 to ECEF
def llh2ecef(llh):#Latitude, longitude, altitude
    """Converts from WGS84 lat/lon/height to ellipsoid-earth ECEF"""
    DTOR = pi / 180.0
    # radians to degrees
    RTOD = 180.0 / pi
    WGS84_A = 6378137.0
    WGS84_F = 1.0/298.257223563
    WGS84_B = WGS84_A * (1 - WGS84_F)
    WGS84_ECC_SQ = 1 - WGS84_B * WGS84_B / (WGS84_A * WGS84_A)
    WGS84_ECC = sqrt(WGS84_ECC_SQ)
    lat = llh[0] * DTOR
    lng = llh[1] * DTOR
    alt = llh[2]
    slat = sin(lat)
    slng = sin(lng)
    clat = cos(lat)
    clng = cos(lng)
    d = sqrt(1 - (slat * slat * WGS84_ECC_SQ))
    rn = WGS84_A / d
    x = (rn + alt) * clat * clng
    y = (rn + alt) * clat * slng
    z = (rn * (1 - WGS84_ECC_SQ) + alt) * slat
    x=round(x/1000,6)
    y=round(y/1000,6)
    z=round(z/1000,6)
    return [x, y, z]#The unit of return is km

# ECEF to WGS84
def ecef2lla(aa):
    
    x,y,z = aa[0]*1000, aa[1]*1000, aa[2]*1000
    # x, y and z are scalars or vectors in meters
    x = np.array([x]).reshape(np.array([x]).shape[-1], 1)
    y = np.array([y]).reshape(np.array([y]).shape[-1], 1)
    z = np.array([z]).reshape(np.array([z]).shape[-1], 1)

    a=6378137
    a_sq=a**2
    e = 8.181919084261345e-2
    e_sq = 6.69437999014e-3

    f = 1/298.257223563
    b = a*(1-f)

    # calculations:
    r = np.sqrt(x**2 + y**2)
    ep_sq  = (a**2-b**2)/b**2
    ee = (a**2-b**2)
    f = (54*b**2)*(z**2)
    g = r**2 + (1 - e_sq)*(z**2) - e_sq*ee*2
    c = (e_sq**2)*f*r**2/(g**3)
    s = (1 + c + np.sqrt(c**2 + 2*c))**(1/3.)
    p = f/(3.*(g**2)*(s + (1./s) + 1)**2)
    q = np.sqrt(1 + 2*p*e_sq**2)
    r_0 = -(p*e_sq*r)/(1+q) + np.sqrt(0.5*(a**2)*(1+(1./q)) - p*(z**2)*(1-e_sq)/(q*(1+q)) - 0.5*p*(r**2))
    u = np.sqrt((r - e_sq*r_0)**2 + z**2)
    v = np.sqrt((r - e_sq*r_0)**2 + (1 - e_sq)*z**2)
    z_0 = (b**2)*z/(a*v)
    h = u*(1 - b**2/(a*v))
    phi = np.arctan((z + ep_sq*z_0)/r)
    lambd = np.arctan2(y, x)

    
    return [list(phi*180/np.pi)[0][0] , (lambd*180/np.pi)[0][0] , list(h)[0][0]]

In [None]:
# Removal of discrete points
def three_sigma(dataset, n= 1):
    mean = np.mean(dataset[:,1])
    sigma = np.std(dataset[:,1])
 
    remove_idx = np.where(abs(dataset[:,1] - mean) > n * sigma)
    new_data = np.delete(dataset, remove_idx, 0)
 
    return new_data

In [None]:
# Rearrange table
def getmeasurements(df):
    '''
    input:df # such as round2_competition.csv
    outputs:df[id, S, time, db ]
    '''
    measurements = df['measurements'].tolist()
    ID = df['id'].tolist()
    timeAtServer = df['timeAtServer'].tolist()
    
    measurements_list = []
    
    for index, measurement in enumerate(measurements):
        # split the str df['measurements']
        measurement = measurement[2:-2].split('],[')#Split according to the first dimension of the list
        for value in measurement:
            value = value.split(',')#Split according to the second dimension of the list
            value = [ float(x) for x in value ]
            measurements_list.append([ID[index], timeAtServer[index]]+value)
    
    dff = pd.DataFrame(measurements_list) #转换为dataframe
    dff.rename(columns={0:'id', 1:'timeAtServer', 2:'serial', 3:'time', 4:'db'}, inplace = True)       
    return dff

# Get accurate sensor time difference

## Coordinate transformation of known data

In [None]:
# Coordinate transformation
# Coordinate transformation of known points in the training data
train_local_llh = training[['latitude','longitude','baroAltitude']].values
train_local_xyz = []
for i in train_local_llh:
    train_local_xyz.append(llh2ecef(i))
train_local_xyz = np.array(train_local_xyz).T
training['x'] = train_local_xyz[0]
training['y'] = train_local_xyz[1]
training['z'] = train_local_xyz[2]

# Coordinate transformation of sensor position
S_local_llh = sensors[['latitude','longitude','height']].values
S_local_xyz = []
for i in S_local_llh:
    S_local_xyz.append(llh2ecef(i))
S_local_xyz = np.array(S_local_xyz).T
sensors['S_x'] = S_local_xyz[0]
sensors['S_y'] = S_local_xyz[1]
sensors['S_z'] = S_local_xyz[2]

## Split information and make it into a usable DataFrame

### Split and restructuring training data

In [None]:
# split train data
training_split = getmeasurements(training)

In [None]:
# merge the location of sensors to train data
training_split = pd.merge(training_split, sensors, on=['serial'])
training_split.rename(columns={'latitude':'S_latitude','longitude':'S_longitude','height':'S_height'}, inplace = True)
del training_split['type']

# merge the location of signal source to train data
training_split = pd.merge(training_split, training[['id','latitude','longitude','geoAltitude','baroAltitude','aircraft','x','y','z']], on=['id'])
training_split.sort_values(['id','serial'], inplace=True)
training_split.reset_index(drop=True, inplace=True)

In [None]:
# Calculate the theoretical flight time of each sensor received message for training_split
dd = training_split[['S_x','S_y','S_z','x','y','z']].values.T
tof = ((dd[0] - dd[3])**2 + (dd[1] - dd[4])**2 + (dd[2] - dd[5])**2)**0.5 * 10000/2.9965
training_split['TOF'] = tof
training_split.head()

### Split and restructuring predictable data

In [None]:
testing_list = getmeasurements(testing)

# merge the location of sensors to test_data
test_data = pd.merge(testing_list, sensors, on=['serial'])
test_data.rename(columns={'latitude':'S_latitude','longitude':'S_longitude','height':'S_height'}, inplace = True)
del test_data['type']

# merge the location of signal source to test data
test_data = pd.merge(test_data, testing[['id','baroAltitude','aircraft']], on=['id'])
test_data.sort_values(['id','serial'], inplace=True)
test_data.reset_index(drop=True, inplace=True)
test_data

## According to the sensor group, calculate the time difference between two sensors

In [None]:
# load sensor pair information
pair_set = np.load('pair_set.npy')

In [None]:
i = 0
test_without_shift = pd.DataFrame({'id':[],'time':[], 'S1':[], 'S2':[], 'tdoa':[]})

# The data is traversed according to the sensor
for pair in pair_set:
    i+=1
    try:
        training_split_2true = training_split[['id','serial','time','TOF','aircraft']].loc[(training_split.serial.isin([pair[0],pair[1]]))]
        df_out = training_split_2true.set_index(['id',training_split_2true.groupby('id').cumcount()+1]).unstack().sort_index(level=1, axis=1)
        df_out.columns = df_out.columns.map('{0[0]}_{0[1]}'.format)
        df_out.dropna(inplace = True)
        df_out.drop(['aircraft_2'], axis=1,inplace = True)
        df_out.reset_index()

        test_2true = test_data[['id','serial','time','aircraft']].loc[(test_data.serial.isin([pair[0],pair[1]]))]
        test_out = test_2true.set_index(['id',test_2true.groupby('id').cumcount()+1]).unstack().sort_index(level=1, axis=1)
        test_out.columns = test_out.columns.map('{0[0]}_{0[1]}'.format)
        test_out.dropna(inplace = True)
        test_out.drop(['aircraft_2'], axis=1,inplace = True)
        test_out.reset_index()


        xy = np.array([df_out.time_1,df_out.time_1.values - df_out.time_2.values -(df_out.TOF_1.values - df_out.TOF_2.values)]).T
        test_xy = np.array([test_out.time_1,test_out.time_1.values - test_out.time_2.values, test_out.index]).T

        # For the first time, linear fitting is used to train the data clock error,
        # The error function is subtracted from the test data
        xy = three_sigma(xy,n = 1)
        z1 = np.polyfit(xy[:,0],xy[:,1], 1)  # Linear fitting
        p1 = np.poly1d(z1)
        xy[:,1] = xy[:,1] - p1(xy[:,0])
        test_xy[:,1] = test_xy[:,1] - p1(test_xy[:,0])

        test_xy = test_xy[np.where(xy[0,0] <test_xy[:,0])]
        test_xy = test_xy[np.where(xy[-1,0]>test_xy[:,0])]
        
        # The second time, the clock error of the training data was fitted with the 7-degree curve
        # The error function is subtracted from the test data
        xy = three_sigma(xy,n = 3)
        z1 = np.polyfit(xy[:,0],xy[:,1], 7)  #7th degree polynomial fitting
        p1 = np.poly1d(z1)
        z2 = p1(xy[:,0])
        xy[:,1] = xy[:,1] - z2
        test_xy[:,1] = test_xy[:,1] - p1(test_xy[:,0])

        # The third time, the clock error of the training data was fitted with the 20-degree curve
        # The error function is subtracted from the test data
        xy = three_sigma(xy,n = 3)
        z1 = np.polyfit(xy[:,0],xy[:,1], 20)  #20th degree polynomial fitting
        p1 = np.poly1d(z1)
        z2 = p1(xy[:,0])
        xy[:,1] = xy[:,1] - z2
        test_xy[:,1] = test_xy[:,1] - p1(test_xy[:,0])
        
        # The forth time, the clock error of the training data was fitted with the 20-degree curve
        # The error function is subtracted from the test data
        xy = three_sigma(xy,n = 3)
        test_xy = three_sigma(test_xy,n = 3)
        z1 = np.polyfit(xy[:,0],xy[:,1], 20)  #20th degree polynomial fitting
        p1 = np.poly1d(z1)
        z2 = p1(xy[:,0])
        xy[:,1] = xy[:,1] - z2
        test_xy[:,1] = test_xy[:,1] - p1(test_xy[:,0])
        
        # If the residual clock error is less than 1000ns, no further operation is carried out
        # The time difference can be directly used
        if (max(xy[:,1]) - min(xy[:,1]))<1000:
            test_xy = three_sigma(test_xy,n = 3)
            
        # If the residual clock error is greater than 1000ns and less than 5000ns, 
        # interpolation method is used for further error processing
        elif 1000<(max(xy[:,1]) - min(xy[:,1]))<50000:
            test_xy = test_xy[np.where(xy[0,0] <test_xy[:,0])]
            test_xy = test_xy[np.where(xy[-1,0]>test_xy[:,0])]
            test_xy = three_sigma(test_xy,n = 3)
            f=interpolate.interp1d(xy[:,0],xy[:,1],kind='slinear')
            xy[:,1]=xy[:,1] - f(xy[:,0])
            test_xy[:,1]=test_xy[:,1] - f(test_xy[:,0])
            test_xy = three_sigma(test_xy,n = 7)
            
        # If the remaining clock error is greater than 5000ns, this part of the data is discarded
        else:
            continue
            
        # merge data and coexist as DataFrame
        idd = test_xy[:,2]
        S1 = [pair[0]]*len(idd)
        S2 = [pair[1]]*len(idd)
        time = test_xy[:,0]
        tdoa = test_xy[:,1]
        test_without_shift = pd.concat([test_without_shift, pd.DataFrame({'id':idd,'time':time, 'S1':S1, 'S2':S2, 'tdoa':tdoa})])

        print(i,pair)
    except:
        print(i, pair, 'error')
test_without_shift
# 5080185 rows × 5 columns

In [None]:
# Sort and save the test data minus clock errors
test_without_shift.sort_values(['id','S1','S2'], inplace=True)
test_without_shift.to_csv('test_without_shift.csv', index=False)

# The 'result_new.csv' was obtained by TDOA positioning with the sensor time difference

The TDOA code is in a separate file

# The location results were fitted according to the trajectory

## Read the required files

In [None]:
training = pd.read_csv('round2/round2_competition.csv')#read data
result_new = pd.read_csv('round2/result_new.csv')
result =  pd.read_csv('round2/Submission_db_NN_huigui.csv')
result = pd.merge(result, result_new[['id','lattitude','longtitude','height']], on=['id'], how = 'left')
result = pd.merge(result, training[['id','aircraft','timeAtServer' ]], on=['id'], how = 'left')
# result.loc[(~result.lattitude.isna()), 'latitude'] = result.loc[(~result.lattitude.isna())]['lattitude']
# result.loc[(~result.lattitude.isna()), 'longitude'] = result.loc[(~result.lattitude.isna())]['longtitude']
result

In [None]:
# Calculate distances using latitude and longitude coordinates
from math import radians, cos, sin, asin, sqrt, pi, atan, pow
def geodistance(start, destination):
    lng1, lat1,lng2, lat2 = start[0],start[1],destination[0],destination[1]
    # Latitude and longitude are converted into radians
    lng1, lat1, lng2, lat2 = map(radians, [float(lng1), float(lat1), float(lng2), float(lat2)])
    dlon = lng2 - lng1
    dlat = lat2 - lat1
    a = sin(dlat / 2) ** 2 + cos(lat1) * cos(lat2) * sin(dlon / 2) ** 2
    distance = 2 * asin(sqrt(a)) * 6371   # The average radius of the earth,6371km
    distance = round(distance, 3)
    return distance    

In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from pandas import Series
from pandas.plotting import autocorrelation_plot
from matplotlib import pyplot

# Use latitude and longitude to remove outliers
def three_sigma(dataset, n = 1):
    mean = np.mean(dataset[:,0])
    sigma = np.std(dataset[:,0])
    remove_idx = np.where(abs(dataset[:,0] - mean) > n * sigma)
    dataset = np.delete(dataset, remove_idx, 0)
    
    mean = np.mean(dataset[:,1])
    sigma = np.std(dataset[:,1])
    remove_idx = np.where(abs(dataset[:,1] - mean) > n * sigma)
    new_data = np.delete(dataset, remove_idx, 0)
 
    return new_data

location = pd.DataFrame({'id':[], 'latitude':[], 'longitude':[]})
i = 0 # count
for aircraft, group in result.groupby('aircraft'):
    i+=1
    # This aircraft has poor positioning results and is not to be used
    if aircraft == 2820:
        continue
    try:
        groupdata = np.array([group.lattitude_x.tolist(),group.longtitude_x.tolist(),
                              group.timeAtServer.tolist(),group.height.tolist(),group.geoAltitude.tolist()]).T
        # Remove the NAN
        remove_idx = np.where(np.isnan(groupdata)==True)
        groupdata = np.delete(groupdata, remove_idx, 0)
        # Remove the value with large height error
        remove_idx = np.where(np.abs(groupdata[:,4]-groupdata[:,3])>30000)
        groupdata = np.delete(groupdata, remove_idx, 0)
        # Remove the outliers
        groupdata = three_sigma(groupdata, n = 3)
        # Get the data you need
        lat,lon,time = groupdata[:,0], groupdata[:,1], groupdata[:,2]

        # Cubic curves were fitted with time to latitude and longitude respectively
        z1 = np.polyfit(time, lat, 3)  #3th degree polynomial fitting
        p1 = np.poly1d(z1)
        z2 = np.polyfit(time, lon, 3)  #3th degree polynomial fitting
        p2 = np.poly1d(z2)

        # Subtract the fitting curve and straighten out the trajectory
        groupdata[:,0] = groupdata[:,0] - p1(groupdata[:,2])
        groupdata[:,1] = groupdata[:,1] - p2(groupdata[:,2])

        # Remove the error points are removed at aircraft speed
        
        d2 = np.abs(np.array([geodistance(groupdata[i+1,0:2], groupdata[i,0:2])/ (groupdata[i+1,2]-groupdata[i,2])
                              for i in range(0, groupdata.shape[0]-1)]))
        groupdata = groupdata[np.where(d2 < 0.25)[0]+1]

        # Remove the error points according to the turn speed of the aircraft
        k = 1
        d2 = np.abs(np.array([(geodistance(groupdata[i+k,0:2], groupdata[i-k,0:2]) -
                              geodistance(groupdata[i+k,0:2], groupdata[i,0:2]) -
                              geodistance(groupdata[i,0:2], groupdata[i-k,0:2]))/
                               (groupdata[i+k,2]-groupdata[k,2])
                              for i in range(k, groupdata.shape[0]-k)]))
        groupdata = groupdata[np.where(d2 < 0.02)[0]+k]

        # Add the fitting curve and restore the trajectory
        groupdata[:,0] = groupdata[:,0] + p1(groupdata[:,2])
        groupdata[:,1] = groupdata[:,1] + p2(groupdata[:,2])

        # To locate the data in accordance with a certain range
        labeldata = np.array([group.id.tolist(),group.timeAtServer.tolist()]).T
        labeldata = labeldata[np.where(min(groupdata[:,2]) <labeldata[:,1])]
        labeldata = labeldata[np.where(max(groupdata[:,2])>labeldata[:,1])]
        idd = labeldata[:,0]
        timeAtServer = labeldata[:,1]

        # If the fitting point is less than 50, the trajectory is abandoned
        if len(groupdata[:,2])<50:
            plt.close()
            continue

        # Linear interpolation is performed using the screened data
        f=interpolate.interp1d(groupdata[:,2],groupdata[:,0],kind='slinear')
        new_lat=f(timeAtServer)
        f=interpolate.interp1d(groupdata[:,2],groupdata[:,1],kind='slinear')
        new_lon=f(timeAtServer)

        # The interpolated trajectories were fitted by piecewise curves
        # Define the parameters of piecewise curve fitting
        if aircraft not in [451,546,601,824,1008,1020,1633,1913,2758,2841 ]:
            num_divide = 120 # Sectional interval size
            cishu = 3
        elif aircraft in [1020,1913,2758,2841]:
            num_divide = 10000
            cishu = 20
        else:
            num_divide = 300
            cishu = 3
            
        # Piecewise fitting
        num_points = len(idd) # num of points
        num_section = num_points // num_divide
        print(num_section)
        start = 0
        lat_section = pd.DataFrame({'id':[], 'latitude':[], 'longitude':[]})
        new_lat2 = new_lon2 = []
        for section in range(num_section+1):
            end = start+num_divide
            if end > num_points:
                end = num_points

            z2 = np.polyfit(timeAtServer[start:end],new_lat[start:end],cishu)
            p2 = np.poly1d(z2)

            new_lat2 = new_lat2 +p2(timeAtServer[start:end]).tolist()
            z2 = np.polyfit(timeAtServer[start:end],new_lon[start:end],cishu)
            p2 = np.poly1d(z2)
            new_lon2 += p2(timeAtServer[start:end]).tolist()
            start += num_divide
            
        # Merge and save data
        location = pd.DataFrame({'id':idd, 'latitude':new_lat2, 'longitude':new_lon2})
        locations = pd.concat([locations,location])
        print(aircraft,len(new_lon))
        
    except:
        plt.close()
        print('error')
locations.sort_values('id', inplace = True)
locations

In [None]:
# Save the final result
Submission =  pd.read_csv('round2/Submission_db_NN_huigui.csv')
Submission = pd.merge(Submission, locations, on=['id'], how = 'left')
Submission = Submission[['id','latitude_y','longitude_y','geoAltitude']]
Submission.rename(columns={'latitude_y':'latitude','longitude_y':'longitude'}, inplace = True)
Submission.to_csv('Submission.csv', index=False)