In [1]:
import pandas as pd
import glob
import geopandas as gpd

In [2]:
def getIxensumM3(df):
    # init mass delta series
    s = pd.Series(index=df.index, name='mass_delta')

    # initiate the empty and full massa ship with first loading session
    mass_empty = df.iloc[0]['dredgeSessionMassEmptyShip']
    mass_full = df.iloc[0]['dredgeSessionMassFullShip']
    dM = mass_full - mass_empty
    s[df.index[0]] = dM

    # compute delta mass for all dumping sessions
    for index, row in df[1::].iterrows():    
        if row['dredgeSessionType'] == 'LOADING':
            mass_empty = mass_full
            mass_full = row['dredgeSessionMassFullShip']
            dM = mass_full - mass_empty
            s[index] = dM
        if row['dredgeSessionType'] == 'DUMPING':
            mass_empty = row['dredgeSessionMassEmptyShip']
            dM = mass_full - mass_empty
            s[index] = dM
            mass_full = mass_empty

    # compute total dumping TDS
    m3_dumping = df.loc[df['dredgeSessionType'] == 'LOADING']['dredgeSessionM3'].sum()
    maxdM = s[df.loc[df['dredgeSessionType'] == 'DUMPING'].index].max()

    # index of DUMPING sessions with highest delta mass 
    ix_maxdM = (s==maxdM).argmax()
    
    return ix_maxdM, m3_dumping

In [3]:
folder = r'D:\Projects\Pr\3317.20\MARSreizen\parser_utf16//*'
file_out = r'D:\Projects\Pr\3317.20\MARSreizen\parser_utf8/mette-maria.txt'
bv_shp = r'D:\OMS_Waddenzee\trunk\fews\Config\MapLayerFiles\Baggervakken//Baggervakken.shp'

In [4]:
file_list = glob.glob(folder, recursive=True)

In [5]:
df = pd.read_csv(file_list[0], sep='\t', encoding='utf-16-le')
for i in file_list[1::]:
    # print (i)
    df2 = pd.read_csv(i, sep='\t', encoding='utf-16-le')
    df = df.append(df2)
df = df.reset_index(drop=True)    

In [6]:
df.sort_values(['tripId', 'dredgeSessionEndDt'], inplace=True)

In [7]:
# select shapefile
right = gpd.read_file(bv_shp)[['ID_MARS', 'tds_m3']]

# apply a leftish join on
df = df.merge(right, how='left', left_on='dredgeSessionLocation', right_on='ID_MARS')
# compute kuubs
df['dredgeSessionM3'] = df['dredgeSessionQuantity'] * df['tds_m3']
# drop the unwanted columns
df.drop(['ID_MARS','tds_m3'], axis=1, inplace=True)

In [8]:
uq_tripId = df['tripId'].unique()
for tripId in uq_tripId:
    # get slice of dataframe based on tripId
    df3 = df.loc[df['tripId'] == tripId].copy()
    
    # get the index of the dumping sessions with the highest delta mass
    # this is supposed to be the session with the most amount dumped M3
    # and get the sum of all loaded M3
    dumpix, sumM3 = getIxensumM3(df3)
    
    # assign these M3 values in the main dataframe
    df.loc[df.index == dumpix, 'dredgeSessionM3'] = sumM3

In [9]:
s_nan = dict(zip(df.columns,['-',-999,'-','-',-999,-999,'-','-','-','-','-','-',-999,'1-1-1970 00:00','1-1-1970 00:00',-999,'0:00:00',-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,'0:00:00',-999,-999,-999,-999,-999,-999,-999,-999,-999,'-', '-','1-1-1970 00:00','1-1-1970 00:00','0:00:00',-999,-999,-999,-999,-999]))

In [10]:
df.fillna(s_nan, inplace=True)
df[['ls_endTs','dredgeSessionId']] = df[['ls_endTs','dredgeSessionId']].astype(int)
# write to csv
df.to_csv(file_out, sep='\t', index=False)