In [1]:
import pandas as pd
import os

In [2]:
dataset_dir = '../datasets/wall-following-robot'

if not os.path.exists(dataset_dir):
    os.makedirs(dataset_dir)

In [3]:
col_names = [ 'sensor_{}'.format(i) for i in range(24) ] + [ 'class' ]
# Read CSV as dtype=str so that numeric values are not changed through conversion to a float type 
df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/00194/sensor_readings_24.data', dtype=str, header=None, names=col_names)

In [4]:
row_count = df.shape[0]
resampled_df = df.sample(frac=2, replace=True, random_state=row_count)

In [5]:
twice_df = pd.concat([df] * 2)

In [6]:
print('=== Class Counts ===')
print('--- Original ---')
print(df['class'].value_counts())
print('--- Repeated (twice) ---')
print(twice_df['class'].value_counts())
print('--- Resampled (double) ---')
print(resampled_df['class'].value_counts())

=== Class Counts ===
--- Original ---
Move-Forward         2205
Sharp-Right-Turn     2097
Slight-Right-Turn     826
Slight-Left-Turn      328
Name: class, dtype: int64
--- Repeated (twice) ---
Move-Forward         4410
Sharp-Right-Turn     4194
Slight-Right-Turn    1652
Slight-Left-Turn      656
Name: class, dtype: int64
--- Resampled (double) ---
Move-Forward         4420
Sharp-Right-Turn     4154
Slight-Right-Turn    1646
Slight-Left-Turn      692
Name: class, dtype: int64


In [7]:
twice_df.to_csv(os.path.join(dataset_dir, 'wall-following-robot-twice.csv'), index=False)
resampled_df.to_csv(os.path.join(dataset_dir, 'wall-following-robot-resampled.csv'), index=False)