In [1]:
!pip install pandas
!pip install pandas
!pip install sklearn
!pip install matplotlib

Collecting pandas
  Downloading pandas-1.2.4-cp37-cp37m-manylinux1_x86_64.whl (9.9 MB)
[K     |████████████████████████████████| 9.9 MB 4.3 MB/s eta 0:00:01
Collecting numpy>=1.16.5
  Downloading numpy-1.20.2-cp37-cp37m-manylinux2010_x86_64.whl (15.3 MB)
[K     |████████████████████████████████| 15.3 MB 62.9 MB/s eta 0:00:01
Installing collected packages: numpy, pandas
Successfully installed numpy-1.20.2 pandas-1.2.4
Collecting sklearn
  Downloading sklearn-0.0.tar.gz (1.1 kB)
Collecting scikit-learn
  Downloading scikit_learn-0.24.2-cp37-cp37m-manylinux2010_x86_64.whl (22.3 MB)
[K     |████████████████████████████████| 22.3 MB 4.1 MB/s eta 0:00:01
[?25hCollecting threadpoolctl>=2.0.0
  Downloading threadpoolctl-2.1.0-py3-none-any.whl (12 kB)
Collecting scipy>=0.19.1
  Downloading scipy-1.6.3-cp37-cp37m-manylinux1_x86_64.whl (27.4 MB)
[K     |████████████████████████████████| 27.4 MB 46.0 MB/s eta 0:00:01
[?25hCollecting joblib>=0.11
  Downloading joblib-1.0.1-py3-none-any.whl (3

In [2]:
# The Pandas library helps parse CSV files, or files delimited by specific character.
# In this case we use pandas to process our training and test data so we can easily feed it into our program.
import pandas as pd

# From tree we take the DTC class, which will create a decision tree trained with our data.
from sklearn.tree import DecisionTreeClassifier as dtc
from sklearn import tree
# From metrics we use acuracy_score to test our outcome against our expectations.
from sklearn import metrics

import matplotlib.pyplot as plt

In [3]:
# Some constant global variables, labels for our CSV since it has no headers,
# the labels we'll use for training, and an array to hold the performance scores
featureNames2sensor = ["sd_front", "sd_left"]
featureNames4sensor = ["sd_front", "sd_left", "sd_right", "sd_back"]
featureNames24sensor = [("US"+ str(x)) for x in range(1, 24)]
scores = []

In [4]:
# A function to parse our datafiles using Pandas, returns a data frame. It works for training and test sets.
def getDataSet(filename, featureNames):
    # We specify that there's no index column and feed our header names so the values process
    # as we expect them to.
    colNames = featureNames + ["move"]
    dataset = pd.read_csv(filename, index_col=None, header=None, names=colNames)
    classes = [
        "Move-Forward",
        "Slight-Right-Turn",
        "Sharp-Right-Turn",
        "Slight-Left-Turn",
    ]
    # We simplify our "label" field, which is our goal, using a simple function that returns a number representing one of the 4 possible moves
    dataset["move"] = dataset["move"].map(lambda n: classes.index(n) + 1)
    return dataset

In [5]:
def runTest(numSensors, featureNames):
    # We call our get set function to generate the sets we'll be using through the program.
    trainingset = getDataSet("sensor_readings_" + str(numSensors) + "_training.csv", featureNames)
    testset = getDataSet("sensor_readings_" + str(numSensors) + "_test.csv", featureNames)
    
    # We use the DecisionTreeClassifier constructor to generate a DTC using entropy to decide on splits.
    # We feed it our training set and the associated goals to train it.
    moveDecider = dtc(criterion="entropy").fit(trainingset[featureNames], trainingset.move)
    # To test it, we feed it out test set and it returns an array of predictions.
    testPredictions = moveDecider.predict(testset[featureNames])
    # We compare our result to our expectations (the test set goals) and store the percentage in our score array.
    currentScore = metrics.accuracy_score(testset.move, testPredictions)
    scores.append(currentScore)
    figName = str(numSensors) + "-sensor-dtc.png"
    fig = plt.figure(figsize=(20,20))
    tree.plot_tree(moveDecider)
    fig.savefig(figName)
    fig.clf()
    print("Tree saved as " + figName)
    


In [6]:
runTest(2, featureNames2sensor)
print("Accuracy for intial Decision Tree for 2 sensor data:", scores[0])
if scores[0] == 1.0:
    print("a score of 1 means our predictions were perfect.")
runTest(4, featureNames4sensor)
print("Accuracy for intial Decision Tree for 4 sensor data:", scores[1])
if scores[1] == 1.0:
    print("a score of 1 means our predictions were perfect.")
runTest(24, featureNames24sensor)
print("Accuracy for intial Decision Tree for 24 sensor data:", scores[2])
if scores[2] == 1.0:
    print("a score of 1 means our predictions were perfect.")


Tree saved as 2-sensor-dtc.png
Accuracy for intial Decision Tree for 2 sensor data: 1.0
a score of 1 means our predictions were perfect.
Tree saved as 4-sensor-dtc.png
Accuracy for intial Decision Tree for 4 sensor data: 1.0
a score of 1 means our predictions were perfect.
Tree saved as 24-sensor-dtc.png
Accuracy for intial Decision Tree for 24 sensor data: 0.9693287037037037


<Figure size 1440x1440 with 0 Axes>

<Figure size 1440x1440 with 0 Axes>

<Figure size 1440x1440 with 0 Axes>

In [7]:
fig = plt.figure(figsize=(10,10))
ax = fig.add_axes([0,0,1,1])
deciders = ["2 sensor", "4 sensor", "24 sensor"]
ax.bar(deciders, scores)
ax.set_ylabel("Scores")
ax.set_xlabel("Accuracy %")
ax.set_ylim(0,1)
ax.set_title("Classifier Scores in %")
fig.savefig("sensor-comparisson.png", bbox_inches="tight")

fig.clf()

<Figure size 720x720 with 0 Axes>