In [None]:
#calculating the roc values

In [1]:

import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier

In [1]:
%%timeit
def calc_roc(dataset):
    e = 0.00000001
    vx = dataset['vx'].values
    vy = dataset['vy'].values
    vz = dataset['vz'].values

    ax = dataset['ax'].values
    ay = dataset['ay'].values
    az = dataset['az'].values

    numerator = np.sqrt(np.power((np.square(vx) + np.square(vy) + np.square(vz)), 3))
    denominator = np.square(np.multiply(ax, vy) - np.multiply(vz, ay))
    denominator = denominator + np.square(np.multiply(ax, vz) - np.multiply(vx, az))
    denominator = denominator + np.square(np.multiply(ay, vx) - np.multiply(vy, ax))
    denominator = np.sqrt(denominator)

    return numerator / (denominator + e)

def extract_zero_and_nonzero_rows(file_path, class_label):
    try:
        df = pd.read_table(file_path, delimiter=',', encoding='latin1')
        
        b_alt = df['baroaltitude'].iloc[0]
        s_speed = df['velocity']
        c_time = df['time']
        f_time = pd.to_datetime(c_time.iloc[0])
        
        df['dz'] = b_alt - df['baroaltitude']
        df['dt'] = pd.to_datetime(c_time) - f_time
        c_course = df['lastupdatepos']
        df['vx'] = s_speed * np.sin(c_course) * 0.0174
        df['vy'] = s_speed * np.cos(c_course) * 0.0174

        df['dz'] = df['dz'].astype(float)
        df['vz'] = df['dz'] / df['dt'].dt.total_seconds()
        df['dvx'] = -df['vx'].diff()
        df['dvy'] = -df['vy'].diff()
        df['dvz'] = -df['vz'].diff()
        df['ax'] = df['dvx'] / df['dt'].dt.total_seconds()
        df['ay'] = df['dvy'] / df['dt'].dt.total_seconds()
        df['az'] = df['dvz'] / df['dt'].dt.total_seconds()
        df['acc'] = s_speed / df['dt'].dt.total_seconds()
        df['roc'] = calc_roc(df)

        df['class'] = class_label

        zero_rows_mask = (df['ax'] == 0) & (df['ay'] == 0) & (df['az'] == 0)
        zero_rows = df[zero_rows_mask]
        non_zero_rows = df[~zero_rows_mask]

        return zero_rows, non_zero_rows
        
    except Exception as e:
        print(f"An error occurred while processing file: {file_path}")
        print(f"Error message: {str(e)}")
        return None, None

def get_class_label(file_path):
    directory_name = os.path.dirname(file_path)
    class_label = None
    if 'Small_aircraft' in directory_name:
        class_label = 0
    elif 'Large_aircraft' in directory_name:
        class_label=1
    elif 'Miedium_aircraft' in directory_name:
        class_label = 2

    return class_label

def process_files(root_directory):
    zero_rows_data = pd.DataFrame() 
    non_zero_rows_data = pd.DataFrame()  
    
    for root, dirs, files in os.walk(root_directory):
        for file in files:
            file_path = os.path.join(root, file)
            print(f"Processing file: {file_path}")
            class_label = get_class_label(file_path)
            zero_rows, non_zero_rows = extract_zero_and_nonzero_rows(file_path, class_label)
            
            if zero_rows is not None:
                zero_rows_data = pd.concat([zero_rows_data, zero_rows])
            
            if non_zero_rows is not None:
                non_zero_rows_data = pd.concat([non_zero_rows_data, non_zero_rows])
            
    return zero_rows_data, non_zero_rows_data

root_directory = 'test_file'
zero_rows_data, non_zero_rows_data = process_files(root_directory)

# print("Zero Rows Data:")
# print(zero_rows_data)

# print("Non-Zero Rows Data:")
# print(non_zero_rows_data)

Processing file: test_file/Small_Aircraft_Propeller/VTEHB/2022-10-11
Processing file: test_file/Small_Aircraft_Propeller/VTEHB/2022-10-20
Processing file: test_file/Small_Aircraft_Propeller/VTEHB/2022-11-02
Processing file: test_file/Small_Aircraft_Propeller/VTEHB/2022-11-04
Processing file: test_file/Small_Aircraft_Propeller/VTHLT1/2022-10-11
Processing file: test_file/Small_Aircraft_Propeller/VTRBV/2022-10-11
Processing file: test_file/Small_Aircraft_Propeller/VUAUF/2022-10-11
Processing file: test_file/Small_Aircraft_Propeller/VUMPB/2022-10-11
Processing file: test_file/Small_Aircraft_Propeller/VUMPB/2022-10-20
Processing file: test_file/Small_Aircraft_Propeller/VTVPA/2022-10-12
Processing file: test_file/Small_Aircraft_Propeller/VTVPA/2022-10-28
Processing file: test_file/Small_Aircraft_Propeller/VTCAH/2022-10-14
Processing file: test_file/Small_Aircraft_Propeller/VTCAH/2022-10-17
Processing file: test_file/Small_Aircraft_Propeller/VTCAH/2022-11-04
Processing file: test_file/Small_

  denominator = denominator + np.square(np.multiply(ay, vx) - np.multiply(vy, ax))


Processing file: test_file/Fighter/BR4/2022-11-02
Processing file: test_file/Fighter/VI/2022-11-02
Processing file: test_file/Fighter/TEM2/2022-10-20
Processing file: test_file/Fighter/BA2/2022-10-28
Processing file: test_file/Fighter/BA2/2022-11-02
Processing file: test_file/Fighter/BA2/2022-11-04
Processing file: test_file/Fighter/BA2/2022-11-09
Processing file: test_file/Fighter/BA2/2022-11-12
Processing file: test_file/Fighter/BA2/2022-11-14
Processing file: test_file/Fighter/H/2022-10-28
Processing file: test_file/Fighter/SH/2022-10-28
Processing file: test_file/Fighter/SH/2022-11-02
Processing file: test_file/Fighter/SH/2022-11-09
Processing file: test_file/Fighter/WZ/2022-10-28
Processing file: test_file/Fighter/WZ/2022-11-12
Processing file: test_file/Fighter/WZ/2022-11-14
Processing file: test_file/Fighter/HD/2022-11-02
Processing file: test_file/Fighter/MR/2022-11-02
Processing file: test_file/Fighter/SKD3/2022-11-02
Processing file: test_file/Fighter/HE/2022-11-04
Processing

  denominator = denominator + np.square(np.multiply(ay, vx) - np.multiply(vy, ax))


Processing file: test_file/Civilian/BBC_FLIGHTS/BBC122/2022-10-11
Processing file: test_file/Civilian/BBC_FLIGHTS/BBC722/2022-10-17
Processing file: test_file/Civilian/BBC_FLIGHTS/BBC493/2022-10-11
Processing file: test_file/Civilian/BBC_FLIGHTS/BBC338/2022-10-20
Processing file: test_file/Civilian/BBC_FLIGHTS/BBC136/2022-10-14
Processing file: test_file/Civilian/BBC_FLIGHTS/BBC236/2022-10-11
Processing file: test_file/Civilian/BBC_FLIGHTS/BBC335/2022-10-18
Processing file: test_file/Civilian/BBC_FLIGHTS/BBC462/2022-10-11
Processing file: test_file/Civilian/BBC_FLIGHTS/BBC207/2022-10-20
Processing file: test_file/Civilian/BBC_FLIGHTS/BBC128/2022-10-14
Processing file: test_file/Civilian/BBC_FLIGHTS/BBC128/2022-10-17
Processing file: test_file/Civilian/BBC_FLIGHTS/BBC349/2022-11-14
Processing file: test_file/Civilian/BBC_FLIGHTS/BBC371/2022-10-14
Processing file: test_file/Civilian/BBC_FLIGHTS/BBC371/2022-10-17
Processing file: test_file/Civilian/BBC_FLIGHTS/BBC371/2022-10-20
Processing

In [None]:
#preaparing dataframe for dtree classiefier

In [2]:
import pandas as pd

non_zero_row_data = non_zero_rows_data.iloc[2:]



print("DataFrame after removing the first two rows:")
print(non_zero_row_data)

DataFrame after removing the first two rows:
                   Time  Latitude  Longitude    Speed     Course      IFF  \
3   2022-10-11 11:02:26   28.4544    77.1689  68.6957  144.66000  33455.0   
4   2022-10-11 11:02:27   28.4530    77.1711  67.0388  141.07200  33455.0   
5   2022-10-11 11:02:28   28.4527    77.1716  66.9966  140.41200  33455.0   
6   2022-10-11 11:02:29   28.4523    77.1722  67.1507  139.21400  33455.0   
7   2022-10-11 11:02:30   28.4521    77.1725  66.9139  142.43100  33455.0   
..                  ...       ...        ...      ...        ...      ...   
54  2022-10-14 11:59:43   29.1897    79.5207  56.3699   13.06930  33661.0   
55  2022-10-14 11:59:47   29.1916    79.5213  56.5524   13.29860  33661.0   
56  2022-10-14 11:59:51   29.1935    79.5211  56.6010   11.30990  33661.0   
57  2022-10-14 11:59:55   29.1954    79.5214  56.5624   11.30990  33661.0   
58  2022-10-14 11:59:59   29.1975    79.5199  56.5218    5.10217  33661.0   

    TrackStatus  BaroAltFlag  

In [3]:
import pandas as pd


non_zero_rows_data_without_nan = non_zero_rows_data.dropna()


print(non_zero_rows_data_without_nan)


                   Time  Latitude  Longitude    Speed     Course      IFF  \
3   2022-10-11 11:02:26   28.4544    77.1689  68.6957  144.66000  33455.0   
4   2022-10-11 11:02:27   28.4530    77.1711  67.0388  141.07200  33455.0   
5   2022-10-11 11:02:28   28.4527    77.1716  66.9966  140.41200  33455.0   
6   2022-10-11 11:02:29   28.4523    77.1722  67.1507  139.21400  33455.0   
7   2022-10-11 11:02:30   28.4521    77.1725  66.9139  142.43100  33455.0   
..                  ...       ...        ...      ...        ...      ...   
54  2022-10-14 11:59:43   29.1897    79.5207  56.3699   13.06930  33661.0   
55  2022-10-14 11:59:47   29.1916    79.5213  56.5524   13.29860  33661.0   
56  2022-10-14 11:59:51   29.1935    79.5211  56.6010   11.30990  33661.0   
57  2022-10-14 11:59:55   29.1954    79.5214  56.5624   11.30990  33661.0   
58  2022-10-14 11:59:59   29.1975    79.5199  56.5218    5.10217  33661.0   

    TrackStatus  BaroAltFlag  BaroAlt  GeoAltFlag  ...        vz       dvx 

In [None]:
#applying dtree 

In [4]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn import metrics
import numpy as np


non_zero_rows_data_without_nan['dt'] = non_zero_rows_data_without_nan['dt'].dt.total_seconds()

non_zero_rows_data_without_nan = non_zero_rows_data_without_nan.dropna()


non_zero_rows_data_without_nan = non_zero_rows_data_without_nan.replace([np.inf, -np.inf], np.finfo(np.float32).max)

non_numeric_columns = ['time']
X_non = non_zero_rows_data_without_nan.drop(columns=['class'] + non_numeric_columns).select_dtypes(include=['number']).values

y_non = non_zero_rows_data_without_nan['class'].values

X_train, X_test, y_train, y_test = train_test_split(X_non, y_non, test_size=0.33, random_state=42)

print(X_train.shape)
print(type(X_train))
print(y_train.shape)

clf = DecisionTreeClassifier()
dt_trained = clf.fit(X_train, y_train)

y_pred = dt_trained.predict(X_test)
print("Predicted labels:", y_pred)

accuracy = metrics.accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  non_zero_rows_data_without_nan['dt'] = non_zero_rows_data_without_nan['dt'].dt.total_seconds()


(823139, 24)
<class 'numpy.ndarray'>
(823139,)
Predicted labels: [0 1 2 ... 0 0 1]
Accuracy: 0.9983153564020156


In [5]:
zero_rows_data_without_time = zero_rows_data.drop(columns=['Time'])

In [None]:
#preparing dataset for roc

In [6]:
print(zero_rows_data_without_time)

     Latitude  Longitude     Speed   Course      IFF  TrackStatus  \
2     28.4544    77.1689   69.8139  142.431  33455.0            1   
3     27.4960    76.2930  197.6580  215.238  30524.0            1   
9     27.4467    76.2604  215.0080  208.301  30524.0            1   
20    27.4170    76.2398  143.1430  212.471  30524.0            1   
90    27.2565    76.1291  131.9180  215.587  30524.0            1   
..        ...        ...       ...      ...      ...          ...   
51    29.2900    79.2412   66.4641  107.879  33531.0            1   
55    29.2873    79.2516   65.2458  107.354  33531.0            1   
95    29.2620    79.3313   57.2994  108.122  33531.0            1   
140   29.2433    79.4147   56.6934  107.475  33311.0            1   
144   29.2415    79.4202   56.8853  108.435  33311.0            1   

     BaroAltFlag  BaroAlt  GeoAltFlag  Geo Altitude  ...   vz  dvx  dvy  dvz  \
2              1     2194           1          2247  ...  0.0 -0.0 -0.0 -0.0   
3          

In [None]:
#training the prepared dataset

In [7]:
print(zero_rows_data.info())
X_ann = zero_rows_data_without_time.drop(columns=['class']).values
y_ann = zero_rows_data_without_time['class'].values

X_ann_train, X_ann_test, y_ann_train, y_ann_test = train_test_split(X_ann, y_ann, test_size=0.33, random_state=42)

print(X_ann_train.shape)
print(y_ann_train.shape)


<class 'pandas.core.frame.DataFrame'>
Int64Index: 10129 entries, 2 to 144
Data columns (total 30 columns):
 #   Column        Non-Null Count  Dtype          
---  ------        --------------  -----          
 0   Time          10129 non-null  object         
 1   Latitude      10129 non-null  float64        
 2   Longitude     10129 non-null  float64        
 3   Speed         10129 non-null  float64        
 4   Course        10129 non-null  float64        
 5   IFF           10129 non-null  float64        
 6   TrackStatus   10129 non-null  int64          
 7   BaroAltFlag   10129 non-null  int64          
 8   BaroAlt       10129 non-null  int64          
 9   GeoAltFlag    10129 non-null  int64          
 10  Geo Altitude  10129 non-null  int64          
 11  CTN           10129 non-null  object         
 12  Call_Sign     10129 non-null  object         
 13  date_only     10129 non-null  object         
 14  time_only     10129 non-null  object         
 15  Height        10129 n

In [8]:
import tensorflow 

2023-07-04 09:46:06.119320: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [9]:
print(zero_rows_data)

                    Time  Latitude  Longitude     Speed   Course      IFF  \
2    2022-10-11 11:02:25   28.4544    77.1689   69.8139  142.431  33455.0   
3    2022-11-02 13:43:07   27.4960    76.2930  197.6580  215.238  30524.0   
9    2022-11-02 13:44:07   27.4467    76.2604  215.0080  208.301  30524.0   
20   2022-11-02 13:44:33   27.4170    76.2398  143.1430  212.471  30524.0   
90   2022-11-02 13:47:02   27.2565    76.1291  131.9180  215.587  30524.0   
..                   ...       ...        ...       ...      ...      ...   
51   2022-10-12 12:38:46   29.2900    79.2412   66.4641  107.879  33531.0   
55   2022-10-12 12:39:02   29.2873    79.2516   65.2458  107.354  33531.0   
95   2022-10-12 12:41:07   29.2620    79.3313   57.2994  108.122  33531.0   
140  2022-10-14 10:56:20   29.2433    79.4147   56.6934  107.475  33311.0   
144  2022-10-14 10:56:30   29.2415    79.4202   56.8853  108.435  33311.0   

     TrackStatus  BaroAltFlag  BaroAlt  GeoAltFlag  ...   vz  dvx  dvy  dvz

In [None]:
#applying the ann

In [10]:
import numpy as np


mask = np.array([all(isinstance(val, float) for val in row) for row in X_ann_train])
X_ann_train_filtered = X_ann_train[mask]
y_ann_train_filtered = y_ann_train[mask]


X_ann_train_filtered = X_ann_train_filtered.astype('float32')
y_ann_train_filtered = y_ann_train_filtered.astype('int32')


In [14]:
import keras
from sklearn.preprocessing import LabelEncoder


In [19]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.preprocessing import LabelEncoder


data = zero_rows_data

X = data[['Latitude', 'Longitude']].values
y = data['class'].values


X_normalized = (X - np.mean(X, axis=0)) / np.std(X, axis=0)


le = LabelEncoder()
y_encoded = le.fit_transform(y)


X_train, X_test, y_train, y_test = train_test_split(X_normalized, y_encoded, test_size=0.2, random_state=42)


model = keras.models.Sequential([
    keras.layers.Dense(32, input_shape=(2,), activation='relu'),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(16, activation='relu'),
    keras.layers.Dense(3, activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['acc'])


model.fit(X_train, y_train, batch_size=1, epochs=5)


loss, accuracy = model.evaluate(X_test, y_test)
print('Test Loss:', loss)
print('Test Accuracy:', accuracy)


model.summary()


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test Loss: 0.4787808656692505
Test Accuracy: 0.8376110792160034
Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_15 (Dense)            (None, 32)                96        
                                                                 
 dropout_5 (Dropout)         (None, 32)                0         
                                                                 
 dense_16 (Dense)            (None, 16)                528       
                                                                 
 dense_17 (Dense)            (None, 3)                 51        
                                                                 
Total params: 675
Trainable params: 675
Non-trainable params: 0
_________________________________________________________________


In [23]:

y_pred_probs = model.predict(X_test)

y_pred = np.argmax(y_pred_probs, axis=1)


accuracy = np.sum(y_pred == y_test) / len(y_test)

print('Test Accuracy:', accuracy)


Test Accuracy: 0.8376110562685094


In [25]:
model_json=model.to_json()
with open("model.json","w") as json_file:
    json_file.write(model_json)

model.save_weights("model.h5")
print("Model Saved")

Model Saved


In [29]:
model.save('/home/Siddhartha/test_file/roc_mod.h5')


In [32]:
from keras.models import model_from_json


model_json = model.to_json()
with open('/home/Siddhartha/test_file/model1.json', 'w') as json_file:
    json_file.write(model_json)


model.save_weights('/home/Siddhartha/test_file/model1.h5')

print("Model saved to disk.")


Model saved to disk.


In [34]:
from sklearn.metrics import accuracy_score




y_pred_prob = model.predict(X_test)


y_pred = np.argmax(y_pred_prob, axis=1)

confidence = np.max(y_pred_prob, axis=1)


accuracy = accuracy_score(y_test, y_pred)

print("Accuracy:", accuracy)
print("Confidence:", confidence)


Accuracy: 0.8376110562685094
Confidence: [0.9459528  0.5980696  0.7022519  ... 0.7954829  0.94167197 0.9422621 ]
