Importing all the necessary libraries

In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

Read from the file containing sensor data

In [2]:
sensorData = pd.read_csv('Data/task_data.csv')
sensorData.head(5)

Unnamed: 0,sample index,class_label,sensor0,sensor1,sensor2,sensor3,sensor4,sensor5,sensor6,sensor7,sensor8,sensor9
0,sample0,1.0,0.834251,0.726081,0.535904,0.214896,0.873788,0.767605,0.111308,0.557526,0.59965,0.665569
1,sample1,1.0,0.804059,0.253135,0.869867,0.334285,0.604075,0.494045,0.833575,0.19419,0.014966,0.802918
2,sample2,1.0,0.694404,0.595777,0.581294,0.799003,0.762857,0.651393,0.075905,0.007186,0.659633,0.831009
3,sample3,1.0,0.78369,0.03878,0.285043,0.627305,0.80062,0.48634,0.827723,0.339807,0.731343,0.892359
4,sample4,1.0,0.788835,0.174433,0.34877,0.938244,0.692065,0.37762,0.18376,0.616805,0.492899,0.930969


Check for any potentially missing data

In [3]:
sensorData.info()
sensorData.shape

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 12 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   sample index  400 non-null    object 
 1   class_label   400 non-null    float64
 2   sensor0       400 non-null    float64
 3   sensor1       400 non-null    float64
 4   sensor2       400 non-null    float64
 5   sensor3       400 non-null    float64
 6   sensor4       400 non-null    float64
 7   sensor5       400 non-null    float64
 8   sensor6       400 non-null    float64
 9   sensor7       400 non-null    float64
 10  sensor8       400 non-null    float64
 11  sensor9       400 non-null    float64
dtypes: float64(11), object(1)
memory usage: 37.6+ KB


(400, 12)

Checking the importance of each sensor data using training & testing models

In [10]:
#Define the output y
y = sensorData.loc[:,['class_label']]

#Creating a list to save the sensor accuracy
my_list = []

#Looping and testing around each column of sensor data (This can be better written as a function)

i=0

while (i<len(sensorData.columns)-2):      

    #Define the input X
    sensorloc = 'sensor'+str(i)
    X = sensorData.loc[:,[sensorloc]]

    #Split the input and output data to test and training sets
    X_train, X_test, y_train, y_test = train_test_split(
                                                        X,
                                                        y,
                                                        test_size=0.3,
                                                        random_state=21)

    #Train the model
    logreg = LogisticRegression()
    logreg.fit(X_train, y_train.values.ravel())

    #Test the model
    y_pred = logreg.predict(X_test)

    #Model performance metrics 
    accuracyScore = accuracy_score(y_test, y_pred)
    f1score = f1_score(y_test, y_pred, average='weighted', labels=np.unique(y_pred))   
    #print(confusion_matrix(y_test, y_pred))
    #print(classification_report(y_test, y_pred))
    print('Sensor'+str(i))
    print("Accuracy score = " + str(accuracyScore))
    print("F1 score = " + str(f1score))
    
    #Taking only the accuracyScore metric identify importance
    my_list.append([sensorloc,accuracyScore])

    i=i+1


Sensor0
Accuracy score = 0.8333333333333334
F1 score = 0.8333333333333334
Sensor1
Accuracy score = 0.4583333333333333
F1 score = 0.4592753623188406
Sensor2
Accuracy score = 0.3333333333333333
F1 score = 0.324009324009324
Sensor3
Accuracy score = 0.7666666666666667
F1 score = 0.7670559881349649
Sensor4
Accuracy score = 0.8083333333333333
F1 score = 0.8086138015933849
Sensor5
Accuracy score = 0.5416666666666666
F1 score = 0.5404245709123757
Sensor6
Accuracy score = 0.4583333333333333
F1 score = 0.6285714285714286
Sensor7
Accuracy score = 0.425
F1 score = 0.42535939995833044
Sensor8
Accuracy score = 0.8916666666666667
F1 score = 0.8915832107757313
Sensor9
Accuracy score = 0.5333333333333333
F1 score = 0.5315164998146088


Saving the obtained information in csv file

In [5]:
#Convert the list containing sensor acuuracy data to DataFrame obj
df = pd.DataFrame(my_list)
df.columns = ['Name','Accuracy']
df = df.set_index('Name') 

#Sort the importance of the sensor in descending order and save in csv data file
df_sorted = df.sort_values('Accuracy', ascending=False)
df_sorted.to_csv('Data/sorted_data.csv')