In [1]:
import pandas as pd
import math
import os

In [2]:
def ParseDriveDataFormat(file):
    
    cols = {}
    cols_desc = {}
    with open(file) as f:
        i = 0
        for line in f:
            c = line.splitlines()[0].split(": ")[0]
            d = " ".join(line.splitlines()[0].split(": ")[1:]).strip()
            cols[i] = c
            cols_desc[c] = d
            i = i + 1
        
    return cols, cols_desc

In [3]:
def ParseDriveData(folder, drive_date, drive, columnnames, chunk = False):
    my_data = {}
    fullpath = folder + drive_date + "_drive_" + drive + "_sync" + "/oxts/data"
    for file in os.listdir(fullpath):
        if file.endswith(".txt"):
            filename = os.fsdecode(file)
            entry = int(filename.split(".")[0])
            with open(fullpath + "/" + filename) as f:
                data = f.read().splitlines()[0]
                data = [float(x) for x in data.split(" ")]
                my_data[entry] = data
            
    df = pd.DataFrame.from_dict(my_data, orient='index')
    df = df.rename(columns = columnnames)
    
    df = df.sort_index()
    
    if chunk:
        df = ChunkDriveData(df, columnnames)
        
    df['Break'] = False
    
    for i in range(0, len(df.index) - 1):
        if df['af'].iloc[i + 1] < 0 and df['af'].iloc[i + 1] <= df['af'].iloc[i]:
            df['Break'].iloc[i] = True
    
    return df

    

In [4]:
def ChunkDriveData(data, columnnames):
    print(columnnames)
    
    chunked_data = {}
    for i in range(0, len(data.index), 10):
        chunk = data[i: i + 10]
        for j in columnnames.values():
            if j in chunked_data:
                chunked_data[j].append(chunk[j].mean())
            else: 
                chunked_data[j] = [chunk[j].mean()]
    
    chunked_data = pd.DataFrame.from_dict(chunked_data)
    return chunked_data

In [5]:
def ParseBoxData(folder, drive_date, drive, chunk = False):
    count_files = 0
    frame = []
    static_dynamic = []
    object_type = []
    x1 = []
    y1 = []
    x2 = []
    y2 = []
    
    for filename in os.listdir(folder):
        if filename.startswith(drive_date + "_drive_" + drive + "_sync_"):
            count_files += 1
            filename_split = filename.split("_")
            with open(folder + filename) as f:
                max_area = 0
                max_a1 = 0
                max_b1 = 0
                max_a2 = 0
                max_b2 = 0
                sd_flag = 'static'
                objectf = 'Car'
                for line in f:
                    print(line)
                    data = line.split()
                    a1 = float(data[2])
                    b1 = float(data[3])
                    a2 = float(data[4])
                    b2 = float(data[5])
                    if ((b2 - b1) * (a2 - a1)) > max_area:
                        max_a1 = a1
                        max_b1 = b1
                        max_a2 = a2
                        max_b2 = b2
                        sd_flag = data[0]
                        objectf = data[1]
                        
                frame.append(int(filename_split[len(filename_split)-1].split(".txt")[0]))
                static_dynamic.append(sd_flag)
                object_type.append(objectf)
                x1.append(max_a1)
                y1.append(max_b1)
                x2.append(max_a2)
                y2.append(max_b2)
                
    df = pd.DataFrame.from_dict({'frame':frame,
                                 'static_dynamic':static_dynamic,
                                 'object_type':object_type,
                                 'x1':x1,
                                 'y1':y1,
                                 'x2': x2,
                                 'y2': y2},
                                orient='columns')
    df = df.set_index('frame')
    return df
                
            

In [6]:
def ConcatData(dataformatfile, drivedatafolder, boxdatafolder, drive_date, drive, chunk = False):
    cols, cols_desc = ParseDriveDataFormat(dataformatfile)
    drive_df = ParseDriveData(drivedatafolder, drive_date, drive, cols, chunk = chunk)
    box_df = ParseBoxData(boxdatafolder, drive_date, drive, chunk = chunk)
    box_df = pd.get_dummies(box_df)
    df = pd.concat([box_df, drive_df], axis = 1)
    df['frame'] = df.index
    return df.dropna(axis=0, how='any')

In [7]:
test = ConcatData("drive_data/testing/2011_09_26_drive_0005_sync/oxts/dataformat.txt", 
                 "drive_data/testing/",
                  "box_data/testing/boxes/",
                  '2011_09_26', 
                  '0005'
                 )

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)


static Van 173.32 878.81 225.56 930.39

static Van 88.04 973.47 375.0 1242.0

dynamic Van 172.1 653.99 215.71 698.06

static Car 175.08 0.0 286.96 237.35

static Car 184.19 823.37 247.51 933.66

dynamic Van 172.75 444.53 213.4 484.75

static Car 183.7 712.55 288.41 852.37

static Car 185.08 612.42 258.39 732.98

static Car 186.23 253.64 255.67 359.58

static Van 178.86 301.47 239.98 375.87

static Car 178.43 555.7 232.34 630.51

static Car 181.11 528.74 223.63 594.51

static Car 183.07 512.13 214.72 556.41

static Van 153.61 1083.01 243.47 1242.0

static Van 131.1 1155.5 279.76 1242.0

dynamic Van 150.75 845.29 241.08 992.4

static Van 140.83 1042.1 242.11 1242.0

dynamic Van 158.5 581.11 244.06 699.1

dynamic Van 154.72 804.28 225.08 942.22

dynamic Van 157.14 601.9 242.91 720.74

dynamic Van 152.87 823.76 225.07 965.68

dynamic Van 150.57 866.32 241.22 1016.95

static Van 139.79 1055.21 244.05 1242.0

static Van 155.25 1090.88 244.31 1242.0

static Van 130.74 1156.7 283.04 1242.0

st

In [9]:
test.columns

Index(['x1', 'x2', 'y1', 'y2', 'object_type_Car', 'object_type_Van',
       'static_dynamic_dynamic', 'static_dynamic_static', 'lat', 'lon', 'alt',
       'roll', 'pitch', 'yaw', 'vn', 've', 'vf', 'vl', 'vu', 'ax', 'ay', 'az',
       'af', 'al', 'au', 'wx', 'wy', 'wz', 'wf', 'wl', 'wu', 'pos_accuracy',
       'vel_accuracy', 'navstat', 'numsats', 'posmode', 'velmode', 'orimode',
       'Break', 'frame'],
      dtype='object')

In [13]:
test[['x1', 'x2', 'y1', 'y2', 'object_type_Car', 'object_type_Van',
       'static_dynamic_dynamic', 'static_dynamic_static', 'vn', 've', 'vf', 'vl', 'vu', 'ax', 'ay', 'az',
       'af']].head()

Unnamed: 0,x1,x2,y1,y2,object_type_Car,object_type_Van,static_dynamic_dynamic,static_dynamic_static,vn,ve,vf,vl,vu,ax,ay,az,af
0,161.77,292.43,296.8,455.33,0.0,1.0,0.0,1.0,-3.325632,1.138431,3.514768,0.037625,-0.038789,-0.294375,0.037167,9.995702,-0.30581
1,156.04,284.67,294.95,452.3,0.0,1.0,0.0,1.0,-3.288107,1.166745,3.488639,0.054704,0.011018,-0.140118,1.014271,10.536285,-0.11934
2,150.49,277.15,293.15,449.36,0.0,1.0,0.0,1.0,-3.241157,1.234696,3.468542,0.020906,0.087431,-0.375994,0.924644,10.214913,-0.282148
3,145.1,269.86,291.38,446.5,0.0,1.0,0.0,1.0,-3.170711,1.250562,3.408435,0.022682,0.073462,-0.82717,0.462578,9.30188,-0.656318
4,146.66,269.52,290.34,444.48,0.0,1.0,0.0,1.0,-3.103448,1.250491,3.345644,0.044487,-0.000962,-0.635356,0.548596,9.129737,-0.427872


In [18]:
frames = []

testing_drives = ['0005', '0059']
training_drives = ['0001', '0002', '0017', '0018', '0057', '0059']

# Aggregate the training data
for i in training_drives:
    df = ConcatData("dataformat.txt", 
                 "drive_data/training/",
                  "box_data/training/boxes/",
                  '2011_09_26', 
                    i
                 )
    frames.append(df)
    

# Aggregate the testing data
for i in testing_drives:
    df = ConcatData("dataformat.txt", 
                 "drive_data/testing/",
                  "box_data/testing/boxes/",
                  '2011_09_26', 
                    i
                 )
    frames.append(df)
    
i = 0
for frame in frames:
    print(frame.shape)
    frame.to_csv("processed_data/drive" + str(i) + ".csv")
    i = i + 1

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)


(107, 38)
(76, 39)
(113, 39)
(269, 40)
(360, 40)
(175, 40)
(153, 40)
(196, 39)
