#Importing Libraries

In [1]:
import numpy as np
import pandas as pd
import re
from scipy.io import arff
from sklearn.model_selection import train_test_split
import os
import glob

#Mounting Google Drive

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


#Reading Data

In [3]:
def read_arff(file_path):
    data, meta = arff.loadarff(open(file_path, 'r'))
    return pd.DataFrame(data)


data_folder = 'drive/MyDrive/Phase1/NATOPS/NATOPS/NATOPSDimension1_TEST.arff'

read_arff(data_folder)

Unnamed: 0,channel_0_0,channel_0_1,channel_0_2,channel_0_3,channel_0_4,channel_0_5,channel_0_6,channel_0_7,channel_0_8,channel_0_9,...,channel_0_42,channel_0_43,channel_0_44,channel_0_45,channel_0_46,channel_0_47,channel_0_48,channel_0_49,channel_0_50,classAttribute
0,-0.597549,-0.581217,-0.592066,-0.577286,-0.568285,-0.579710,-0.613225,-0.581178,-0.583130,-0.593056,...,-0.944309,-0.532257,-0.538328,-0.713343,-0.742404,-0.823212,-0.801864,-0.762702,-0.682196,b'4.0'
1,-0.607456,-0.611583,-0.605547,-0.600113,-0.593437,-0.586732,-0.593451,-0.596853,-0.583299,-0.600165,...,-0.881784,-0.884101,-0.828076,-0.753074,-0.612031,-0.557092,-0.600117,-0.633856,-0.633755,b'5.0'
2,-0.495153,-0.451345,-0.451346,-0.491650,-0.448914,-0.441454,-0.438898,-0.461153,-0.396852,-0.360589,...,-0.521409,-0.507334,-0.504648,-0.497998,-0.495097,-0.487448,-0.503305,-0.499306,-0.501495,b'6.0'
3,-0.547021,-0.549974,-0.542614,-0.544792,-0.552459,-0.540752,-0.551519,-0.551412,-0.541995,-0.547951,...,-0.457718,-0.468404,-0.469408,-0.476372,-0.481220,-0.481070,-0.472350,-0.501773,-0.487282,b'1.0'
4,-0.492635,-0.513056,-0.633234,-1.017439,-1.305868,-1.615854,-1.886416,-2.041730,-2.105306,-1.924894,...,-1.880119,-1.606070,-1.342443,-1.125806,-0.934316,-0.728774,-0.651624,-0.628753,-0.639729,b'4.0'
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
175,-0.553245,-0.551704,-0.548044,-0.544929,-0.546446,-0.546651,-0.554797,-0.580788,-0.634431,-0.679319,...,-0.606554,-0.662612,-0.709257,-0.727722,-0.721847,-0.687693,-0.655425,-0.627145,-0.613324,b'6.0'
176,-0.411974,-0.427559,-0.439430,-0.452880,-0.450422,-0.449396,-0.452276,-0.451605,-0.454152,-0.452007,...,-0.418897,-0.425598,-0.424234,-0.444902,-0.453351,-0.455989,-0.462371,-0.461314,-0.460288,b'3.0'
177,-0.596643,-0.601261,-0.603440,-0.620041,-0.644571,-0.726249,-0.816761,-0.950272,-1.189699,-1.456629,...,-0.729317,-0.708935,-0.692440,-0.693657,-0.683554,-0.645831,-0.641527,-0.640459,-0.636408,b'5.0'
178,-0.658060,-0.647922,-0.633930,-0.635635,-0.621982,-0.626305,-0.616651,-0.619751,-0.627013,-0.628345,...,-0.980285,-0.634341,-0.623997,-0.706020,-0.745160,-0.701983,-0.597178,-0.573916,-0.605648,b'4.0'


#Combining All Dimensions Into One Big Table

##Sorting Test Paths

In [4]:
def sort_key(file_name):
    # Extract the numeric part from the file name using regular expression
    file_name = os.path.basename(file_name)
    match = re.search(r'\d+', file_name)
    return int(match.group()) if match else float('inf')

##Combining Test Data

In [5]:
def combine_test_dimensions(folder_path):


    combined_test_df = pd.DataFrame()
    pattern = os.path.join(folder_path, "NATOPSDimension*_TEST.arff")

    test_file_paths = glob.glob(pattern)
    sorted_test_fp = sorted(test_file_paths, key=sort_key)
    # print(sorted_test_fp)
    for i in range(len(sorted_test_fp)):
      test_df = pd.DataFrame()
      df = read_arff(sorted_test_fp[i])
      col_value_list = []
      for j in  range(len(df.columns)-1):
        col = 'channel_'+str(i)+'_'+str(j)
        col_value_list.extend(df[col].tolist())
      test_df[f'Dimension{i+1}']= col_value_list
      combined_test_df = pd.concat([combined_test_df, test_df], axis = 1)

    combined_test_df = combined_test_df.assign(ClassAttribute=df['classAttribute'].tolist()*51)
    combined_test_df = combined_test_df.assign(TEST_or_TRAIN = 'TEST')
    return combined_test_df

path = '/content/drive/MyDrive/Phase1/NATOPS/NATOPS/'
test = combine_test_dimensions(path)
test

Unnamed: 0,Dimension1,Dimension2,Dimension3,Dimension4,Dimension5,Dimension6,Dimension7,Dimension8,Dimension9,Dimension10,...,Dimension17,Dimension18,Dimension19,Dimension20,Dimension21,Dimension22,Dimension23,Dimension24,ClassAttribute,TEST_or_TRAIN
0,-0.597549,-1.897467,-0.689874,0.597063,-1.922938,-0.836496,-0.586639,-0.827417,-0.100443,0.631310,...,-1.593898,-0.737468,-0.707356,-1.643834,-0.498332,0.463551,-1.617470,-0.806243,b'4.0',TEST
1,-0.607456,-1.879784,-0.614554,0.531186,-1.844268,-0.820261,-0.530811,-0.715952,-0.044630,0.603517,...,-1.406289,-0.669134,-0.430546,-1.619691,-0.651275,0.684677,-1.646534,-0.817587,b'5.0',TEST
2,-0.495153,-1.720262,-0.857496,0.501371,-1.964585,-0.531747,-0.673079,-0.654826,-0.306139,0.610460,...,-1.527113,-0.325738,-0.391788,-1.457567,-0.797109,0.652537,-1.758182,-0.531482,b'6.0',TEST
3,-0.547021,-1.812588,-0.755462,0.586411,-1.786277,-0.771850,-0.675401,-0.723700,-0.103184,0.623494,...,-1.388057,-0.573828,-0.499020,-1.621940,-0.826510,0.425884,-1.536722,-0.775954,b'1.0',TEST
4,-0.492635,-1.796077,-0.904479,0.536695,-1.859551,-0.601436,-0.698642,-0.720977,-0.347292,0.537046,...,-1.415580,-0.339807,-0.691423,-1.594635,-0.793481,0.403997,-1.630467,-0.599775,b'4.0',TEST
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9175,-0.613324,-1.701488,-0.689185,0.568447,-1.818381,-0.382463,-0.642010,-0.666132,-0.244126,0.564339,...,-1.416439,-0.227229,-0.546785,-1.475781,-0.744290,0.532968,-1.683821,-0.253524,b'6.0',TEST
9176,-0.460288,-1.700741,-1.010038,0.432675,-1.584284,-0.544483,-0.663979,-0.558569,-0.411888,0.580918,...,-1.211787,-0.117745,-0.359054,-1.403299,-0.907333,0.344529,-1.227246,-0.578737,b'3.0',TEST
9177,-0.636408,-1.821902,-0.683674,0.613025,-1.856640,-0.474193,-0.672065,-0.671302,-0.200278,0.592010,...,-1.433850,-0.270363,-0.583348,-1.601456,-0.769593,0.718601,-1.537754,-0.325793,b'5.0',TEST
9178,-0.605648,-1.990175,-0.624211,0.607400,-1.979971,-0.586442,-0.607658,-0.802529,-0.104502,0.634571,...,-1.507784,-0.411483,-0.504087,-1.718659,-0.676891,0.553946,-1.675705,-0.667249,b'4.0',TEST


##Shape of test

In [51]:
test.shape


(9180, 26)

##Combining Training Data

In [7]:
def combine_train_dimensions(folder_path):


    combined_train_df = pd.DataFrame()
    pattern = os.path.join(folder_path, "NATOPSDimension*_TRAIN.arff")

    train_file_paths = glob.glob(pattern)
    sorted_train_fp = sorted(train_file_paths, key=sort_key)
    # print(sorted_test_fp)
    for i in range(len(sorted_train_fp)):
      train_df = pd.DataFrame()
      df = read_arff(sorted_train_fp[i])
      col_value_list = []
      for j in  range(len(df.columns)-1):
        col = 'channel_'+str(i)+'_'+str(j)
        col_value_list.extend(df[col].tolist())
      train_df[f'Dimension{i+1}']= col_value_list
      combined_train_df = pd.concat([combined_train_df, train_df], axis = 1)

    combined_train_df = combined_train_df.assign(ClassAttribute=df['classAttribute'].tolist()*51)
    combined_train_df = combined_train_df.assign(TEST_or_TRAIN = 'TRAIN')
    return combined_train_df

path = '/content/drive/MyDrive/Phase1/NATOPS/NATOPS/'
train = combine_train_dimensions(path)
train

Unnamed: 0,Dimension1,Dimension2,Dimension3,Dimension4,Dimension5,Dimension6,Dimension7,Dimension8,Dimension9,Dimension10,...,Dimension17,Dimension18,Dimension19,Dimension20,Dimension21,Dimension22,Dimension23,Dimension24,ClassAttribute,TEST_or_TRAIN
0,-0.372758,-1.821679,-0.846321,0.465208,-2.015072,-0.839242,-0.564097,-0.796225,-0.149604,0.599967,...,-1.534954,-0.673190,-0.536343,-1.626957,-0.594337,0.619205,-1.771773,-0.810086,b'4.0',TRAIN
1,-0.547370,-1.600105,-0.809446,0.556062,-1.669622,-0.748726,-0.668990,-0.673415,-0.162021,0.622368,...,-1.271334,-0.495517,-0.557755,-1.416602,-0.849636,0.618919,-1.497652,-0.754927,b'3.0',TRAIN
2,-0.587062,-1.755034,-0.648786,0.542660,-1.759520,-0.573142,-0.683921,-0.750000,-0.146066,0.588525,...,-1.406456,-0.372397,-0.729303,-1.516087,-0.464015,0.417640,-1.549212,-0.564249,b'3.0',TRAIN
3,-0.514671,-1.893971,-0.748957,0.571942,-1.988466,-0.745504,-0.563803,-0.729743,-0.132236,0.576847,...,-1.504254,-0.595728,-0.392323,-1.661124,-0.780828,0.439658,-1.701396,-0.809376,b'4.0',TRAIN
4,-0.718601,-2.153186,-0.859093,0.808480,-2.175653,-0.763930,-0.689536,-0.803106,-0.269982,0.717469,...,-1.801784,-0.574368,-0.531636,-1.875322,-0.876736,0.634141,-1.956089,-0.797694,b'3.0',TRAIN
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9175,-0.495331,-1.769731,-0.791706,0.302474,-1.814133,-0.333784,-0.583258,-0.587102,-0.436378,0.351775,...,-1.502345,-0.108903,-0.390164,-1.507034,-0.813575,0.281669,-1.620188,-0.405255,b'4.0',TRAIN
9176,-0.564019,-2.121457,-0.906610,0.508566,-2.220399,-0.190477,-0.655761,-0.799259,-0.482954,0.450815,...,-1.699828,-0.071720,-0.732399,-1.771570,-0.796377,0.377814,-1.985870,-0.372097,b'5.0',TRAIN
9177,-0.554701,-1.741121,-0.581661,0.540708,-1.764734,-0.812273,-0.545602,-0.690857,0.008526,0.631474,...,-1.366152,-0.598647,-0.500746,-1.498131,-0.632232,0.691991,-1.481622,-0.664324,b'1.0',TRAIN
9178,-0.497487,-2.122190,-0.960263,0.452134,-2.522624,-0.412244,-0.665426,-0.772461,-0.488896,0.497571,...,-1.857872,-0.152399,-0.613075,-1.968880,-0.856508,0.443306,-2.186420,-0.502940,b'4.0',TRAIN


##Shape of Training Data


In [53]:
train.shape

(9180, 26)

#Combined Table Of TEST and TRAINING Data

In [12]:
combined_table = pd.concat([train, test], ignore_index= True)
combined_table['sid'] = range(len(combined_table))
combined_table

Unnamed: 0,Dimension1,Dimension2,Dimension3,Dimension4,Dimension5,Dimension6,Dimension7,Dimension8,Dimension9,Dimension10,...,Dimension18,Dimension19,Dimension20,Dimension21,Dimension22,Dimension23,Dimension24,ClassAttribute,TEST_or_TRAIN,sid
0,-0.372758,-1.821679,-0.846321,0.465208,-2.015072,-0.839242,-0.564097,-0.796225,-0.149604,0.599967,...,-0.673190,-0.536343,-1.626957,-0.594337,0.619205,-1.771773,-0.810086,b'4.0',TRAIN,0
1,-0.547370,-1.600105,-0.809446,0.556062,-1.669622,-0.748726,-0.668990,-0.673415,-0.162021,0.622368,...,-0.495517,-0.557755,-1.416602,-0.849636,0.618919,-1.497652,-0.754927,b'3.0',TRAIN,1
2,-0.587062,-1.755034,-0.648786,0.542660,-1.759520,-0.573142,-0.683921,-0.750000,-0.146066,0.588525,...,-0.372397,-0.729303,-1.516087,-0.464015,0.417640,-1.549212,-0.564249,b'3.0',TRAIN,2
3,-0.514671,-1.893971,-0.748957,0.571942,-1.988466,-0.745504,-0.563803,-0.729743,-0.132236,0.576847,...,-0.595728,-0.392323,-1.661124,-0.780828,0.439658,-1.701396,-0.809376,b'4.0',TRAIN,3
4,-0.718601,-2.153186,-0.859093,0.808480,-2.175653,-0.763930,-0.689536,-0.803106,-0.269982,0.717469,...,-0.574368,-0.531636,-1.875322,-0.876736,0.634141,-1.956089,-0.797694,b'3.0',TRAIN,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18355,-0.613324,-1.701488,-0.689185,0.568447,-1.818381,-0.382463,-0.642010,-0.666132,-0.244126,0.564339,...,-0.227229,-0.546785,-1.475781,-0.744290,0.532968,-1.683821,-0.253524,b'6.0',TEST,18355
18356,-0.460288,-1.700741,-1.010038,0.432675,-1.584284,-0.544483,-0.663979,-0.558569,-0.411888,0.580918,...,-0.117745,-0.359054,-1.403299,-0.907333,0.344529,-1.227246,-0.578737,b'3.0',TEST,18356
18357,-0.636408,-1.821902,-0.683674,0.613025,-1.856640,-0.474193,-0.672065,-0.671302,-0.200278,0.592010,...,-0.270363,-0.583348,-1.601456,-0.769593,0.718601,-1.537754,-0.325793,b'5.0',TEST,18357
18358,-0.605648,-1.990175,-0.624211,0.607400,-1.979971,-0.586442,-0.607658,-0.802529,-0.104502,0.634571,...,-0.411483,-0.504087,-1.718659,-0.676891,0.553946,-1.675705,-0.667249,b'4.0',TEST,18358


##Checking the Combined Table Data Against TEST Data And Train Data

###Head


In [55]:
train.head()

Unnamed: 0,Dimension1,Dimension2,Dimension3,Dimension4,Dimension5,Dimension6,Dimension7,Dimension8,Dimension9,Dimension10,...,Dimension17,Dimension18,Dimension19,Dimension20,Dimension21,Dimension22,Dimension23,Dimension24,ClassAttribute,TEST_or_TRAIN
0,-0.372758,-1.821679,-0.846321,0.465208,-2.015072,-0.839242,-0.564097,-0.796225,-0.149604,0.599967,...,-1.534954,-0.67319,-0.536343,-1.626957,-0.594337,0.619205,-1.771773,-0.810086,b'4.0',TRAIN
1,-0.54737,-1.600105,-0.809446,0.556062,-1.669622,-0.748726,-0.66899,-0.673415,-0.162021,0.622368,...,-1.271334,-0.495517,-0.557755,-1.416602,-0.849636,0.618919,-1.497652,-0.754927,b'3.0',TRAIN
2,-0.587062,-1.755034,-0.648786,0.54266,-1.75952,-0.573142,-0.683921,-0.75,-0.146066,0.588525,...,-1.406456,-0.372397,-0.729303,-1.516087,-0.464015,0.41764,-1.549212,-0.564249,b'3.0',TRAIN
3,-0.514671,-1.893971,-0.748957,0.571942,-1.988466,-0.745504,-0.563803,-0.729743,-0.132236,0.576847,...,-1.504254,-0.595728,-0.392323,-1.661124,-0.780828,0.439658,-1.701396,-0.809376,b'4.0',TRAIN
4,-0.718601,-2.153186,-0.859093,0.80848,-2.175653,-0.76393,-0.689536,-0.803106,-0.269982,0.717469,...,-1.801784,-0.574368,-0.531636,-1.875322,-0.876736,0.634141,-1.956089,-0.797694,b'3.0',TRAIN


In [56]:
combined_table.head()

Unnamed: 0,Dimension1,Dimension2,Dimension3,Dimension4,Dimension5,Dimension6,Dimension7,Dimension8,Dimension9,Dimension10,...,Dimension17,Dimension18,Dimension19,Dimension20,Dimension21,Dimension22,Dimension23,Dimension24,ClassAttribute,TEST_or_TRAIN
0,-0.372758,-1.821679,-0.846321,0.465208,-2.015072,-0.839242,-0.564097,-0.796225,-0.149604,0.599967,...,-1.534954,-0.67319,-0.536343,-1.626957,-0.594337,0.619205,-1.771773,-0.810086,b'4.0',TRAIN
1,-0.54737,-1.600105,-0.809446,0.556062,-1.669622,-0.748726,-0.66899,-0.673415,-0.162021,0.622368,...,-1.271334,-0.495517,-0.557755,-1.416602,-0.849636,0.618919,-1.497652,-0.754927,b'3.0',TRAIN
2,-0.587062,-1.755034,-0.648786,0.54266,-1.75952,-0.573142,-0.683921,-0.75,-0.146066,0.588525,...,-1.406456,-0.372397,-0.729303,-1.516087,-0.464015,0.41764,-1.549212,-0.564249,b'3.0',TRAIN
3,-0.514671,-1.893971,-0.748957,0.571942,-1.988466,-0.745504,-0.563803,-0.729743,-0.132236,0.576847,...,-1.504254,-0.595728,-0.392323,-1.661124,-0.780828,0.439658,-1.701396,-0.809376,b'4.0',TRAIN
4,-0.718601,-2.153186,-0.859093,0.80848,-2.175653,-0.76393,-0.689536,-0.803106,-0.269982,0.717469,...,-1.801784,-0.574368,-0.531636,-1.875322,-0.876736,0.634141,-1.956089,-0.797694,b'3.0',TRAIN


###Tail

In [57]:
test.tail()

Unnamed: 0,Dimension1,Dimension2,Dimension3,Dimension4,Dimension5,Dimension6,Dimension7,Dimension8,Dimension9,Dimension10,...,Dimension17,Dimension18,Dimension19,Dimension20,Dimension21,Dimension22,Dimension23,Dimension24,ClassAttribute,TEST_or_TRAIN
9175,-0.613324,-1.701488,-0.689185,0.568447,-1.818381,-0.382463,-0.64201,-0.666132,-0.244126,0.564339,...,-1.416439,-0.227229,-0.546785,-1.475781,-0.74429,0.532968,-1.683821,-0.253524,b'6.0',TEST
9176,-0.460288,-1.700741,-1.010038,0.432675,-1.584284,-0.544483,-0.663979,-0.558569,-0.411888,0.580918,...,-1.211787,-0.117745,-0.359054,-1.403299,-0.907333,0.344529,-1.227246,-0.578737,b'3.0',TEST
9177,-0.636408,-1.821902,-0.683674,0.613025,-1.85664,-0.474193,-0.672065,-0.671302,-0.200278,0.59201,...,-1.43385,-0.270363,-0.583348,-1.601456,-0.769593,0.718601,-1.537754,-0.325793,b'5.0',TEST
9178,-0.605648,-1.990175,-0.624211,0.6074,-1.979971,-0.586442,-0.607658,-0.802529,-0.104502,0.634571,...,-1.507784,-0.411483,-0.504087,-1.718659,-0.676891,0.553946,-1.675705,-0.667249,b'4.0',TEST
9179,-0.651644,-1.911442,-0.656298,0.469341,-2.086228,-0.64496,-0.725837,-0.787746,-0.116215,0.597154,...,-1.623406,-0.413167,-0.557817,-1.641795,-0.704824,0.562437,-1.895895,-0.454302,b'4.0',TEST


In [58]:
combined_table.tail()

Unnamed: 0,Dimension1,Dimension2,Dimension3,Dimension4,Dimension5,Dimension6,Dimension7,Dimension8,Dimension9,Dimension10,...,Dimension17,Dimension18,Dimension19,Dimension20,Dimension21,Dimension22,Dimension23,Dimension24,ClassAttribute,TEST_or_TRAIN
18355,-0.613324,-1.701488,-0.689185,0.568447,-1.818381,-0.382463,-0.64201,-0.666132,-0.244126,0.564339,...,-1.416439,-0.227229,-0.546785,-1.475781,-0.74429,0.532968,-1.683821,-0.253524,b'6.0',TEST
18356,-0.460288,-1.700741,-1.010038,0.432675,-1.584284,-0.544483,-0.663979,-0.558569,-0.411888,0.580918,...,-1.211787,-0.117745,-0.359054,-1.403299,-0.907333,0.344529,-1.227246,-0.578737,b'3.0',TEST
18357,-0.636408,-1.821902,-0.683674,0.613025,-1.85664,-0.474193,-0.672065,-0.671302,-0.200278,0.59201,...,-1.43385,-0.270363,-0.583348,-1.601456,-0.769593,0.718601,-1.537754,-0.325793,b'5.0',TEST
18358,-0.605648,-1.990175,-0.624211,0.6074,-1.979971,-0.586442,-0.607658,-0.802529,-0.104502,0.634571,...,-1.507784,-0.411483,-0.504087,-1.718659,-0.676891,0.553946,-1.675705,-0.667249,b'4.0',TEST
18359,-0.651644,-1.911442,-0.656298,0.469341,-2.086228,-0.64496,-0.725837,-0.787746,-0.116215,0.597154,...,-1.623406,-0.413167,-0.557817,-1.641795,-0.704824,0.562437,-1.895895,-0.454302,b'4.0',TEST


##Shape Of The Combined Table Data

In [59]:
combined_table.shape

(18360, 26)

##Creating a CSV File For The Combined Table Data

In [61]:
csv_path= 'drive/MyDrive/Phase1/NATOPS/NATOPS/Multi_variate_data.csv'
combined_table.to_csv(csv_path, index=True)