In [10]:
import pandas as pd
import math
import os

In [11]:
def ParseDriveDataFormat(file):
    
    cols = {}
    cols_desc = {}
    with open(file) as f:
        i = 0
        for line in f:
            c = line.splitlines()[0].split(": ")[0]
            d = " ".join(line.splitlines()[0].split(": ")[1:]).strip()
            cols[i] = c
            cols_desc[c] = d
            i = i + 1
        
    return cols, cols_desc

In [12]:
def ParseDriveData(folder, drive_date, drive, columnnames, chunk = False):
    my_data = {}
    fullpath = folder + drive_date + "_drive_" + drive + "_sync" + "/oxts/data"
    for file in os.listdir(fullpath):
        if file.endswith(".txt"):
            filename = os.fsdecode(file)
            entry = int(filename.split(".")[0])
            with open(fullpath + "/" + filename) as f:
                data = f.read().splitlines()[0]
                data = [float(x) for x in data.split(" ")]
                my_data[entry] = data
            
    df = pd.DataFrame.from_dict(my_data, orient='index')
    df = df.rename(columns = columnnames)
    
    df = df.sort_index()
    
    if chunk:
        df = ChunkDriveData(df, columnnames)
        
    df['Break'] = False
    
    for i in range(0, len(df.index) - 1):
        if df['af'].iloc[i + 1] < 0 and df['af'].iloc[i + 1] <= df['af'].iloc[i]:
            df['Break'].iloc[i] = True
    
    return df

    

In [13]:
def ChunkDriveData(data, columnnames):
    print(columnnames)
    
    chunked_data = {}
    for i in range(0, len(data.index), 10):
        chunk = data[i: i + 10]
        for j in columnnames.values():
            if j in chunked_data:
                chunked_data[j].append(chunk[j].mean())
            else: 
                chunked_data[j] = [chunk[j].mean()]
    
    chunked_data = pd.DataFrame.from_dict(chunked_data)
    return chunked_data

In [14]:
def ParseBoxData(folder, drive_date, drive, chunk = False):
    count_files = 0
    frame = []
    static_dynamic = []
    object_type = []
    x1 = []
    y1 = []
    x2 = []
    y2 = []
    
    for filename in os.listdir(folder):
        if filename.startswith(drive_date + "_drive_" + drive + "_sync_"):
            count_files += 1
            filename_split = filename.split("_")
            with open(folder + filename) as f:
                max_area = 0
                max_a1 = 0
                max_b1 = 0
                max_a2 = 0
                max_b2 = 0
                sd_flag = 'static'
                objectf = 'Car'
                for line in f:
                    data = line.split()
                    a1 = float(data[2])
                    b1 = float(data[3])
                    a2 = float(data[4])
                    b2 = float(data[5])
                    if ((b2 - b1) * (a2 - a1)) > max_area:
                        max_a1 = a1
                        max_b1 = b1
                        max_a2 = a2
                        max_b2 = b2
                        sd_flag = data[0]
                        objectf = data[1]
                        
                frame.append(int(filename_split[len(filename_split)-1].split(".txt")[0]))
                static_dynamic.append(sd_flag)
                object_type.append(objectf)
                x1.append(max_a1)
                y1.append(max_b1)
                x2.append(max_a2)
                y2.append(max_b2)
                
    df = pd.DataFrame.from_dict({'frame':frame,
                                 'static_dynamic':static_dynamic,
                                 'object_type':object_type,
                                 'x1':x1,
                                 'y1':y1,
                                 'x2': x2,
                                 'y2': y2},
                                orient='columns')
    df = df.set_index('frame')
    return df
                
            

In [15]:
def ConcatData(dataformatfile, drivedatafolder, boxdatafolder, drive_date, drive, chunk = False):
    cols, cols_desc = ParseDriveDataFormat(dataformatfile)
    drive_df = ParseDriveData(drivedatafolder, drive_date, drive, cols, chunk = chunk)
    box_df = ParseBoxData(boxdatafolder, drive_date, drive, chunk = chunk)
    box_df = pd.get_dummies(box_df)
    df = pd.concat([box_df, drive_df], axis = 1)
    df['frame'] = df.index
    return df.dropna(axis=0, how='any')

In [16]:
test = ConcatData("drive_data/testing/2011_09_26_drive_0005_sync/oxts/dataformat.txt", 
                 "drive_data/testing/",
                  "box_data/testing/boxes/",
                  '2011_09_26', 
                  '0005'
                 )

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)


In [17]:
test

Unnamed: 0,x1,x2,y1,y2,object_type_Car,object_type_Van,static_dynamic_dynamic,static_dynamic_static,lat,lon,...,wu,pos_accuracy,vel_accuracy,navstat,numsats,posmode,velmode,orimode,Break,frame
0,161.77,292.43,296.80,455.33,0.0,1.0,0.0,1.0,49.011213,8.422885,...,0.145630,0.492294,0.068884,4.0,10.0,4.0,4.0,0.0,False,0
1,156.04,284.67,294.95,452.30,0.0,1.0,0.0,1.0,49.011210,8.422887,...,0.139954,0.492294,0.068884,4.0,10.0,4.0,4.0,0.0,True,1
2,150.49,277.15,293.15,449.36,0.0,1.0,0.0,1.0,49.011207,8.422888,...,0.125977,0.519609,0.072918,4.0,10.0,4.0,4.0,0.0,True,2
3,145.10,269.86,291.38,446.50,0.0,1.0,0.0,1.0,49.011205,8.422889,...,0.129507,0.519609,0.072918,4.0,10.0,4.0,4.0,0.0,False,3
4,146.66,269.52,290.34,444.48,0.0,1.0,0.0,1.0,49.011202,8.422890,...,0.128826,0.519609,0.072918,4.0,10.0,4.0,4.0,0.0,False,4
5,156.49,198.96,371.28,456.16,0.0,1.0,0.0,1.0,49.011199,8.422892,...,0.124690,0.519609,0.072918,4.0,10.0,4.0,4.0,0.0,True,5
6,159.15,201.77,379.42,464.55,0.0,1.0,0.0,1.0,49.011197,8.422893,...,0.123087,0.519609,0.072918,4.0,10.0,4.0,4.0,0.0,True,6
7,161.84,204.64,387.63,473.02,0.0,1.0,0.0,1.0,49.011194,8.422895,...,0.119485,0.519609,0.072918,4.0,10.0,4.0,4.0,0.0,False,7
8,164.56,207.56,395.92,481.56,0.0,1.0,0.0,1.0,49.011192,8.422897,...,0.114402,0.519609,0.072918,4.0,10.0,4.0,4.0,0.0,True,8
9,167.30,210.52,404.29,490.18,0.0,1.0,0.0,1.0,49.011190,8.422898,...,0.114596,0.519609,0.072918,4.0,10.0,4.0,4.0,0.0,False,9


In [18]:
frames = []

testing_drives = ['0005', '0059']
training_drives = ['0001', '0002', '0017', '0018', '0057', '0059']

# Aggregate the training data
for i in training_drives:
    df = ConcatData("dataformat.txt", 
                 "drive_data/training/",
                  "box_data/training/boxes/",
                  '2011_09_26', 
                    i
                 )
    frames.append(df)
    

# Aggregate the testing data
for i in testing_drives:
    df = ConcatData("dataformat.txt", 
                 "drive_data/testing/",
                  "box_data/testing/boxes/",
                  '2011_09_26', 
                    i
                 )
    frames.append(df)
    
i = 0
for frame in frames:
    print(frame.shape)
    frame.to_csv("processed_data/drive" + str(i) + ".csv")
    i = i + 1

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)


(107, 38)
(76, 39)
(113, 39)
(269, 40)
(360, 40)
(175, 40)
(153, 40)
(196, 39)
