In [2]:
def read_dataset(READ_PATH):
    """
    Reads the csv into a dataframe from specified path
    """
    df = pd.read_csv(READ_PATH)
    print("Read the dataset from {} \n {}".format(READ_PATH,
                                               df.head()))
    
    return df


def drop_columns(df, columns):
    """
    Removes specified columns
    **DOM ID, PMT ID, and directions x, y, z of the doms are unecessary
    at this time for PointNet**
    """
    df = df.drop(columns, axis=1)
    print("Dropped columns...{} \n {}".format(columns,
                                              df.head()))
    
    return df


def save_dataframe(df, WRITE_PATH):
    """
    Saves dataframe at specified path
    """
    df.to_csv(WRITE_PATH + "simplified_data.csv", index=False)
    print("Saved dataframe at {}".format(WRITE_PATH))
    

def validate_data(old_df, new_df, columns):
    """
    Ensure newly saved dataframe is correct:
    1. DF Length: Old df and new df should be of same row length
    2. Column Count: New df columns + removed columns should 
       equal old df columns when added together
    """
    try:
        len(old_df) == len(new_df)
    except:
        print("Dataframe lengths don't match!")
        
        try:
            len(new_df.columns) + len(columns) == len(old_df.columns)
        except:
            print("Columns from the new dataframe were not correctly deleted.")
    else:
        return True
    
    
def main():
    read_df = read_dataset(READ_PATH)
    df = drop_columns(read_df, columns)
    
    if validate_data(read_df, df, columns):
        save_dataframe(df, WRITE_PATH)
    else:
        print("Dataframe was not saved due to errors. Note exceptions")
        
        
if __name__== "__main__":
    import pandas as pd
    
    columns = ["dom_id",
               "pmt_id",
               "dir_x",
               "dir_y",
               "dir_z",
               "tot"]
    
    READ_PATH = "../../data/time_data.csv"
    WRITE_PATH = "../../data/"
    
    main()

Read the dataset from ../../data/time_data.csv 
    dom_id  pmt_id   pos_x    pos_y    pos_z  dir_x  dir_y  dir_z   tot  time  \
0   321.0     5.0 -17.661   32.245   65.231 -0.460 -0.266 -0.847  26.0   0.0   
1  1653.0    23.0  11.595   85.465   65.459 -0.955 -0.000  0.296  27.0   0.0   
2   275.0     9.0 -36.464   67.166  160.189  0.415  0.720 -0.556  26.0   0.0   
3  1660.0    23.0  61.660  101.635  169.059 -0.955 -0.000  0.296  26.0   0.0   
4   966.0    16.0 -54.510  -78.323   94.341 -0.827  0.478 -0.296  24.0   0.0   

   label  group  
0      0      0  
1      0      0  
2      0      0  
3      0      0  
4      0      0  
Dropped columns...['dom_id', 'pmt_id', 'dir_x', 'dir_y', 'dir_z', 'tot'] 
     pos_x    pos_y    pos_z  time  label  group
0 -17.661   32.245   65.231   0.0      0      0
1  11.595   85.465   65.459   0.0      0      0
2 -36.464   67.166  160.189   0.0      0      0
3  61.660  101.635  169.059   0.0      0      0
4 -54.510  -78.323   94.341   0.0      0      0