In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix
import plotly.express as px

In [2]:
def create_data_frame(line,surface_type):
    returnVal=None
    ifGroundTruth=False
    if 'Odometry:Pose' in line: 
        line=line.replace('[rosout][INFO]','').strip()
        line=line.split(': Odometry:Pose information ')
        date=line[0].split(' ')[0]
        time=line[0].split(' ')[1]
        x=line[1].split(',')[0].replace('x=','')
        y=line[1].split(',')[1].replace('y=','')
        theta=line[1].split(',')[2].replace('theta=','')
        v=line[1].split(',')[3].replace('velocity=','')
        o=line[1].split(',')[4].replace('omega=','')
        d=line[1].split(',')[5].replace('distance_travelled=','')
        returnVal = {'Date':date,'Time':time, 'X':float(x),'Y':float(y),'Theta':float(theta),'Velocity':float(v),'Omega':float(o),'Odometry_Distance':float(d),'Surface_Type':surface_type}
    elif ' Scan Data: Distance Travelled' in line:
        line=line.replace('[rosout][INFO]','').strip()
        line=line.split(': Scan Data: ')
        date=line[0].split(' ')[0]
        time=line[0].split(' ')[1]
        ground_truth=line[1].split(',')[0].replace('Distance Travelled=','')
        error=line[1].split(',')[1].replace('Error=','')
        returnVal={'Ground_Truth_Distance':float(ground_truth),'Error':float(error)}
        ifGroundTruth=True
    return returnVal,ifGroundTruth


def create_CSV(path,surface_type):
    df_individiual = pd.DataFrame(columns=['Date','Time', 'X','Y','Theta','Velocity','Omega','Odometry_Distance','Ground_Truth_Distance','Error','Surface_Type']) 
    with open(path) as f:
            f = f.readlines()
            prevOdoData = None
            groundTruthVal = []
            errorVal=[]
            for line in f:
                print(line)
                if line.startswith( '[rosout][INFO]' ):
                    value_dict,ifGroundTruth = create_data_frame(line,surface_type)
                    if not ifGroundTruth and value_dict != None:
                        if(prevOdoData != None and len(groundTruthVal) > 0):
                            prevOdoData["Ground_Truth_Distance"] = sum(groundTruthVal)/len(groundTruthVal)
                            prevOdoData["Error"] = sum(errorVal)/len(errorVal)
                        df_individiual=df_individiual.append(prevOdoData,ignore_index=True)
                        prevOdoData = value_dict
                        groundTruthVal = []
                        errorVal = []
                    elif(value_dict != None):
                        groundTruthVal.append(value_dict['Ground_Truth_Distance'])
                        errorVal.append(value_dict['Error'])
    return df_individiual
    

In [3]:
path="LOGS/"
surface=os.listdir( path )
df_all_data = pd.DataFrame(columns=['Date','Time', 'X','Y','Theta','Velocity','Omega','Odometry_Distance','Ground_Truth_Distance','Error','Surface_Type']) 
for surface_type in surface:
    files=os.listdir( path+surface_type )
    for file_index in range(len(files)):
        path_test=path+surface_type+'/'+files[file_index]
        df_individiual=create_CSV(path_test,surface_type)
        df_individiual.to_csv('CSV/'+surface_type+str(file_index)+'.csv')
        df_all_data=df_all_data.append(df_individiual,ignore_index=True)

NotADirectoryError: [Errno 20] Not a directory: 'LOGS/.DS_Store'

In [None]:
df_all_data

In [None]:
df_all_data.shape

### Converting the Date Time columns into datetime format as timestamp

In [None]:
df_all_data['timestamp'] = df_all_data['Date'] + ' ' + df_all_data['Time']
df_all_data['timestamp'] = pd.to_datetime(df_all_data['timestamp'])
df_all_data=df_all_data.drop(columns=['Date', 'Time','Omega'])
df_all_data.to_csv('out.csv', sep=',')
copydf = df_all_data.copy(deep=True)

In [None]:
df_all_data.shape

In [None]:
df_all_data.tail(5)

In [None]:
df_all_data.head(5)

##  Distribution of Surface type 

In [None]:
target = df_all_data['Surface_Type'].value_counts().reset_index().rename(columns={'index': 'Surface Type','Surface_Type':'Count'})
target

In [None]:
sns.countplot(y = df_all_data['Surface_Type'], data = df_all_data)
plt.show()

##  Preprocessing Data

### 1. Empty  Records

In [None]:
df_all_data.isnull().sum()

In [None]:
df_all_data = df_all_data.dropna()
df_all_data.isnull().sum()

### 2. Duplicate  Records

In [None]:
df_all_data['is_duplicate'] = df_all_data.duplicated()
df_all_data['is_duplicate'].value_counts()

In [None]:
df_all_data = df_all_data.drop_duplicates()

In [None]:
df_all_data.isnull().sum()

In [None]:
df_all_data=df_all_data.drop(columns=['is_duplicate'])
df_all_data

In [None]:
corr = df_all_data.corr()
corr

In [None]:
fig, ax = plt.subplots(1,1, figsize = (15,6))

hm = sns.heatmap(df_all_data.iloc[::].corr(),
                ax = ax,
                cmap = 'coolwarm',
                annot = True,
                fmt = '.2f',
                linewidths = 0.05)
fig.subplots_adjust(top=0.93)
fig.suptitle('Correlation Heatmap for dataset', 
              fontsize=14, 
              fontweight='bold')


In [None]:
fig = plt.figure(figsize=(15,15))
ax = fig.add_subplot(311)
ax.set_title('Distribution of Orientation X,Y,Theta,Valocity,Odometry,Ground Truth, Error',
             fontsize=14, 
             fontweight='bold')
df_all_data.iloc[:,0:7].boxplot()


In [None]:
plt.figure(figsize=(26, 16))
for i, col in enumerate(df_all_data.columns[0:7]):
    ax = plt.subplot(4, 3, i + 1)
    sns.histplot(df_all_data[col], bins=100, label='train')

In [None]:
def clean_data(data):
    data['timestamp'] = data['Date'] + ' ' + data['Time']
    data['timestamp'] = pd.to_datetime(data['timestamp'])
    data=data.drop(columns=['Date', 'Time','Omega'])
    data = data.drop_duplicates()
    data = data.dropna()
    return data


In [None]:
data_test=pd.read_csv('CSV/tile25.csv')  
data_test=clean_data(data_test)
x = np.array(data_test['Error'])
y = np.array(data_test['timestamp'])
fig = px.line(x, y)
fig.show()

In [None]:
fig = px.bar(data_test,x='timestamp', y="Error", title="Error Rise Over Time")
fig.show()

In [None]:
data_test=pd.read_csv('CSV/tile25.csv')  
data_test=clean_data(data_test)
x = np.array(data_test['Error'])
y = np.array(data_test['timestamp'])
fig = px.line(x, y,color=data_test['Velocity'])
fig.show()

In [None]:
fig = px.bar(data_test,x='timestamp', y="Error", color="Velocity", title="Error Rise Over Time",color_continuous_scale='Bluered_r')
fig.show()

In [None]:
data_test=pd.read_csv('CSV/carpet0.csv')  
data_test=clean_data(data_test)
x = np.array(data_test['Error'])
y = np.array(data_test['timestamp'])
fig = px.line(x, y,color=data_test['Velocity'])
fig.show()