In [1]:
import pandas as pd
import numpy as np
# import matplotlib.pyplot as plt
# import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

## 1. Load Dataset

In [None]:
data = pd.read_csv("../../training_data/combined.csv")
print(data.head())

     scan_0    scan_1    scan_2    scan_3    scan_4    scan_5    scan_6  \
0  1.390706  1.390105  1.389930  1.390180  1.390855  1.391958  1.393489   
1  1.423212  1.422564  1.422350  1.422572  1.423230  1.424325  1.425858   
2  1.476366  1.475695  1.475474  1.475706  1.476390  1.477526  1.479117   
3  1.545824  1.545104  1.544858  1.545084  1.545784  1.546958  1.548608   
4  1.596116  1.595363  1.595099  1.595323  1.596036  1.597239  1.598932   

     scan_7    scan_8    scan_9  ...  scan_356  scan_357  scan_358  scan_359  \
0  1.395451  1.397847  1.400681  ...  1.395549  1.393583  1.392048  1.390942   
1  1.427831  1.430248  1.433113  ...  1.428191  1.426146  1.424542  1.423377   
2  1.481165  1.483674  1.486647  ...  1.482173  1.480052  1.478389  1.477181   
3  1.550736  1.553346  1.556442  ...  1.551830  1.549593  1.547835  1.546553   
4  1.601120  1.603806  1.606993  ...  1.602279  1.599959  1.598135  1.596803   

   linear_x  linear_y  linear_z  angular_x  angular_y  angular_z  
0

## 2. Data cleaning

In [8]:
def replace_inf_values(data, max_range=8.0):
    """
    Replace inf with max_range and -inf with 0 in a DataFrame.
    """
    for col in data.columns:
        col_values = data[col].values
        col_values[np.isposinf(col_values)] = max_range
        col_values[np.isneginf(col_values)] = 0.0
        data[col] = col_values

    return data

data = replace_inf_values(data, max_range=8.0)

In [11]:
# Get a True value if inf exists
np.isinf(data.values).any()

False

In [12]:
X = data.iloc[:, :-6]  # Features: scan_0 to scan_359
y = data.iloc[:, -6:]  # Labels: linear_x to angular_z

In [13]:
# Strip leading and trailing spaces from column names
y.columns = y.columns.str.strip()

# Check column names to ensure they are cleaned
print(y.columns)

Index(['linear_x', 'linear_y', 'linear_z', 'angular_x', 'angular_y',
       'angular_z'],
      dtype='object')


In [14]:
y['label'] = y.apply(lambda row: f"{row['linear_x']}_{row['angular_z']}", axis=1)
y = y['label']  # Use the combined label as the target variable

# label
# "0.5_0.0"   # Move forward
# "0.0_1.0"   # Turn left
# "0.5_1.0"   # Turn while moving forward
# "0.0_0.0"   # Stop

## 3. Train-Test Split

In [15]:
# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [16]:
# Train Random Forest Classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train) 

## 4. Evaluate

In [17]:
# Predict and Evaluate
y_pred = clf.predict(X_test)

# Accuracy
accuracy = accuracy_score(y_test, y_pred) * 100
print(f"Accuracy: {accuracy}%")

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

Accuracy: 88.52295409181636%
Confusion Matrix:
[[1 0 0 ... 0 0 0]
 [0 3 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 2 0 0]
 [0 0 0 ... 0 0 4]]


In [19]:
# Calculate F1 Score
from sklearn.metrics import f1_score
f1 = f1_score(y_test, y_pred, average='weighted')
print(f"Weighted F1 Score: {f1}")

Weighted F1 Score: 0.8703918516448176


In [20]:
import joblib
joblib.dump(clf, "model_maze.pkl")  # Save the model

['model_maze.pkl']

## Testing with Another Dataset

In [21]:
test_data = pd.read_csv("../../training_data/square_dataset.csv") 

In [23]:
# Cleaning
def replace_inf_values(data, max_range=8.0):
    """
    Replace inf with max_range and -inf with 0 in a DataFrame.
    """
    for col in data.columns:
        col_values = data[col].values
        col_values[np.isposinf(col_values)] = max_range
        col_values[np.isneginf(col_values)] = 0.0
        data[col] = col_values

    return data

data = replace_inf_values(data, max_range=8.0)

In [24]:
# Predict on the test dataset
y_pred = clf.predict(X_test)

In [43]:
# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")


# Print confusion matrix
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))


Accuracy: 0.8294970161977835
Confusion Matrix:
[[2 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 1 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 3 0]
 [0 0 0 ... 0 0 3]]
