<a href="https://colab.research.google.com/github/saanikagupta/End-to-End-Autonomous-Mobile-Robot-Navigation/blob/master/Data%20Preprocessing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)

# Google Drive

## Mounting Google Drive

In [0]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
!pwd
# !ls 'drive/My Drive/'

/content


## Moving Dataset

In [0]:
import shutil
shutil.move("drive/My Drive/dataset.rar", "dataset.rar")
get_ipython().system_raw("unrar x dataset.rar")
shutil.move("dataset.rar", "drive/My Drive/dataset.rar")

'drive/My Drive/dataset.rar'

# Preprocessing

In [0]:
# Deleting a directory
!rm -rf normalized_dataset

## Normalizing

In [0]:
import os
import pandas as pd
from pathlib import Path
import shutil
from sklearn.preprocessing import MinMaxScaler

In [0]:
# Create a new directory
base_dir = 'normalized_dataset'
os.mkdir(base_dir)

In [0]:
entries = Path('dataset/')

In [0]:
for entry in entries.iterdir():
  # print(entry.name)
  df = pd.read_csv(entry, header = 0)
  df = df.iloc[:, 1:]
  #df.head()
  
  # create scaler
  scaler = MinMaxScaler()
  
  # fit scaler on data
  scaler.fit(df)
  
  # apply transform
  normalized = scaler.transform(df)
  # type(normalized)
  
  new_path = base_dir + '/' + entry.name
  normalized_df = pd.DataFrame(data = normalized, index = None, columns = None)
  normalized_df.to_csv(new_path, index = True)
  # print(new_path)

In [0]:
# After zipping, download the zipped folder containing the dataset
!zip -r normalized_dataset.zip normalized_dataset

  adding: normalized_dataset/ (stored 0%)
  adding: normalized_dataset/las_thetag_dg_w_e2g1_7.csv (deflated 56%)
  adding: normalized_dataset/las_thetag_dg_w_e4g1_10.csv (deflated 56%)
  adding: normalized_dataset/las_thetag_dg_w_e6g1_1.csv (deflated 56%)
  adding: normalized_dataset/las_thetag_dg_w_e3g1_3.csv (deflated 56%)
  adding: normalized_dataset/las_thetag_dg_w_e8g1_5.csv (deflated 56%)
  adding: normalized_dataset/las_thetag_dg_w_e1g1_15.csv (deflated 56%)
  adding: normalized_dataset/las_thetag_dg_w_e2g1_6.csv (deflated 56%)
  adding: normalized_dataset/las_thetag_dg_w_e3g1_17.csv (deflated 56%)
  adding: normalized_dataset/las_thetag_dg_w_e7g1_5.csv (deflated 56%)
  adding: normalized_dataset/las_thetag_dg_w_e7g1_19.csv (deflated 56%)
  adding: normalized_dataset/las_thetag_dg_w_e1g1_7.csv (deflated 56%)
  adding: normalized_dataset/las_thetag_dg_w_e6g1_3.csv (deflated 56%)
  adding: normalized_dataset/las_thetag_dg_w_e7g1_4.csv (deflated 56%)
  adding: normalized_dataset/la

In [0]:
# Moving the generated normalized dataset to Google Drive
shutil.move("normalized_dataset.zip", "drive/My Drive/normalized_dataset.zip")

'drive/My Drive/normalized_dataset.zip'

## Oversampling

In [0]:
import numpy as np
import glob
import pandas as pd
import os
import shutil

In [0]:
# !rm -rf normalized_dataset
shutil.move("drive/My Drive/normalized_dataset.zip", "normalized_dataset.zip")
!unzip 'normalized_dataset.zip'
shutil.move("normalized_dataset.zip", "drive/My Drive/normalized_dataset.zip")

In [0]:
base_dir = 'balanced_normalized_dataset'
# !rm -rf balanced_normalized_dataset

# Making a new directory to store the generated files
os.mkdir(base_dir)

In [0]:
# Writing a custom function for oversampling
def custom_balance(csv_list):
  for CSV in csv_list:
    np_array = np.genfromtxt(CSV, delimiter=',')
    np_array = np_array[1:, 1:]
    
    # Counting the number of three classes
    class_zero = 0
    class_one = 0
    class_two = 0
    class_zero_list = []
    class_one_list = []
    class_two_list = []
    for x in range(len(np_array)):
      if(np_array[x, -1] == 1):
        class_one = class_one + 1
        class_one_list.append(np_array[x, :])
        
      elif(np_array[x, -1] == 0):
        class_zero = class_zero + 1
        class_zero_list.append(np_array[x, :])
      
      else:
        class_two = class_two + 1
        class_two_list.append(np_array[x, :])

    # Printing the number of data points for each class in a particular CSV file
    print('0:', class_zero, '0.5:', class_two, '1:', class_one, CSV, len(np_array))
    val = max(class_zero, class_one)
    new_val = class_two // (val * 2)
    
    # Copying the minority class for oversampling
    class_zero_list = class_zero_list * new_val
    class_one_list = class_one_list * new_val
    
    # Printing the shapes after oversampling
    print('Final shapes:', np.array(class_zero_list).shape, np.array(class_one_list).shape, np.array(class_two_list).shape)
    
    # Stacking the arrays
    np_array = np.vstack((class_zero_list, class_one_list, class_two_list))
    # print(type(np_array))
    
    # Setting path to save the generated CSV file
    file_name = CSV.split('/')
    new_path = base_dir + '/' + file_name[1]
    
    # Dumping the generated NumPy array into a CSV file and saving it in the given path
    pd.DataFrame(np_array).to_csv(new_path)

In [0]:
csv_list = glob.glob('normalized_dataset/*.csv')

In [30]:
custom_balance(csv_list)

0: 8 0.5: 70 1: 7 normalized_dataset/las_thetag_dg_w_e2g1_7.csv 85
Final shapes: (32, 1023) (28, 1023) (70, 1023)
0: 7 0.5: 65 1: 7 normalized_dataset/las_thetag_dg_w_e4g1_10.csv 79
Final shapes: (28, 1023) (28, 1023) (65, 1023)
0: 4 0.5: 50 1: 3 normalized_dataset/las_thetag_dg_w_e6g1_1.csv 57
Final shapes: (24, 1023) (18, 1023) (50, 1023)
0: 8 0.5: 57 1: 6 normalized_dataset/las_thetag_dg_w_e3g1_3.csv 71
Final shapes: (24, 1023) (18, 1023) (57, 1023)
0: 4 0.5: 57 1: 6 normalized_dataset/las_thetag_dg_w_e8g1_5.csv 67
Final shapes: (16, 1023) (24, 1023) (57, 1023)
0: 7 0.5: 66 1: 6 normalized_dataset/las_thetag_dg_w_e1g1_15.csv 79
Final shapes: (28, 1023) (24, 1023) (66, 1023)
0: 8 0.5: 65 1: 6 normalized_dataset/las_thetag_dg_w_e2g1_6.csv 79
Final shapes: (32, 1023) (24, 1023) (65, 1023)
0: 7 0.5: 52 1: 7 normalized_dataset/las_thetag_dg_w_e3g1_17.csv 66
Final shapes: (21, 1023) (21, 1023) (52, 1023)
0: 8 0.5: 65 1: 8 normalized_dataset/las_thetag_dg_w_e7g1_5.csv 81
Final shapes: (32,

In [0]:
# After zipping, download the zipped folder containing the dataset
!zip -r balanced_normalized_dataset.zip balanced_normalized_dataset

In [0]:
import shutil
shutil.move("balanced_normalized_dataset.zip", "drive/My Drive/balanced_normalized_dataset.zip")