In [1]:
import pandas as pd
import glob
import geopandas as gpd
import sys
import getopt

In [2]:
def getIxensumM3(df):
    # init mass delta series
    s = pd.Series(index=df.index, name='mass_delta')

    # initiate the empty and full massa ship with first loading session
    mass_empty = df.iloc[0]['dredgeSessionMassEmptyShip']
    mass_full = df.iloc[0]['dredgeSessionMassFullShip']
    dM = mass_full - mass_empty
    s[df.index[0]] = dM

    # compute delta mass for all dumping sessions
    for index, row in df[1::].iterrows():    
        if row['dredgeSessionType'] == 'LOADING':
            mass_empty = mass_full
            mass_full = row['dredgeSessionMassFullShip']
            dM = mass_full - mass_empty
            s[index] = dM
        if row['dredgeSessionType'] == 'DUMPING':
            mass_empty = row['dredgeSessionMassEmptyShip']
            dM = mass_full - mass_empty
            s[index] = dM
            mass_full = mass_empty


    # compute total dumping TDS
    m3_dumping = df.loc[df['dredgeSessionType'] == 'LOADING']['dredgeSessionM3'].sum()

    maxdM = s[df.loc[df['dredgeSessionType'] == 'DUMPING'].index].max()
    # index of DUMPING sessions with highest delta mass 
    ix_maxdM = (s==maxdM).idxmax()
    
    return ix_maxdM, m3_dumping

In [3]:
inputfile = r"C:\Users\hoek.HKV\Desktop\Import\data.marinusg.tripnr_0_TLS.Marinus G-1566670641058.txt"
outputfile = r"C:\Users\hoek.HKV\Desktop\ImportInterim\data.marinusg.tripnr_0_TLS.Marinus G-1566670641058.txt"
shapefile = r"D:\FEWSProjecten\OmsWaddenzee\trunk\fews\Config\MapLayerFiles\Baggervakken\Baggervakken.shp"

In [4]:
df = pd.read_csv(r'{}'.format(inputfile), sep='\t', encoding='utf-16-le')
df = df.reset_index(drop=True)  

In [5]:
df.sort_values(['tripId', 'dredgeSessionEndDt'], inplace=True)
df['dredgeSessionLocation']=df['dredgeSessionLocation'].astype(str)

# select shapefile
right = gpd.read_file(shapefile)[['ID_MARS', 'TDS_M3']]

df = df.merge(right, how='left', left_on='dredgeSessionLocation', right_on='ID_MARS')

In [6]:
# compute kuubs
df['dredgeSessionM3'] = df['dredgeSessionQuantity'] / df['TDS_M3'] # aanpassing 26-01-2016 van '*' naar '/'
# drop the unwanted columns
df.drop(['ID_MARS','TDS_M3'], axis=1, inplace=True)
df = df.loc[df['dredgeSessionType'].isin(['DUMPING','LOADING'])]
# In[9]:

uq_tripId = df['tripId'].unique()
for tripId in uq_tripId:
    # get slice of dataframe based on tripId
    df3 = df.loc[df['tripId'] == tripId].copy()

    # get the index of the dumping sessions with the highest delta mass
    # this is supposed to be the session with the most amount dumped M3
    # and get the sum of all loaded M3
    dumpix, sumM3 = getIxensumM3(df3)

    # assign these M3 values in the main dataframe
    df.loc[df.index == dumpix, 'dredgeSessionM3'] = sumM3

In [7]:
s_nan = dict(zip(df.columns,['-',-999,'-','-',-999,-999,'-','-','-','-','-','-',-999,'1-1-1970 00:00','1-1-1970 00:00',-999,'0:00:00',-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,'0:00:00',-999,-999,-999,-999,-999,-999,-999,-999,-999,'-', '-','1-1-1970 00:00','1-1-1970 00:00','0:00:00',-999,-999,-999,-999,-999]))
df.fillna(s_nan, inplace=True)
df[['ls_endTs', 'dredgeSessionId', 'dredgeSessionQuantity', 'tripM3', 'tripM3_calc']] = df[['ls_endTs','dredgeSessionId', 'dredgeSessionQuantity', 'tripM3', 'tripM3_calc']].astype(int)

In [8]:
# write to csv
df.to_csv(outputfile, sep='\t', index=False)

In [None]:



# In[3]:

# MAIN PROGRAM  
def main(argv):
    # input argument checking
    try:
        opts, args = getopt.getopt(argv,"hi:o:s:",["ipath=","opath=","spath="])
    except getopt.GetoptError:
        print ('usage: MARS_parser.py -i <inputfile> -o <outputfile> -s <shapefile>')
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print ('updateDepth.py -i <inputfile> -o <outputfile> -s <shapefile>')
            sys.exit()
        elif opt in ("-i", "--inputfile"):
            inputfile = arg
        elif opt in ("-o", "--outputfile"):
            outputfile = arg
        elif opt in ("-s", "--shapefile"):
            shapefile = arg

    # In[5]:

  


    # In[6]:

    df.sort_values(['tripId', 'dredgeSessionEndDt'], inplace=True)

    # select shapefile
    right = gpd.read_file(shapefile)[['ID_MARS', 'TDS_M3']]

    # apply a leftish join on
    right['ID_MARS']=right['ID_MARS'].astype(str)
    df['dredgeSessionLocation']=df['dredgeSessionLocation'].astype(str)
    df = df.merge(right, how='left', left_on='dredgeSessionLocation', right_on='ID_MARS')
    # compute kuubs
    df['dredgeSessionM3'] = df['dredgeSessionQuantity'] / df['TDS_M3'] # aanpassing 26-01-2016 van '*' naar '/'
    # drop the unwanted columns
    df.drop(['ID_MARS','TDS_M3'], axis=1, inplace=True)
    df = df.loc[df['dredgeSessionType'].isin(['DUMPING','LOADING'])]
    # In[9]:

    uq_tripId = df['tripId'].unique()
    for tripId in uq_tripId:
        # get slice of dataframe based on tripId
        df3 = df.loc[df['tripId'] == tripId].copy()
        
        # get the index of the dumping sessions with the highest delta mass
        # this is supposed to be the session with the most amount dumped M3
        # and get the sum of all loaded M3
        dumpix, sumM3 = getIxensumM3(df3)
        
        # assign these M3 values in the main dataframe
        df.loc[df.index == dumpix, 'dredgeSessionM3'] = sumM3


    # In[9]:

    s_nan = dict(zip(df.columns,['-',-999,'-','-',-999,-999,'-','-','-','-','-','-',-999,'1-1-1970 00:00','1-1-1970 00:00',-999,'0:00:00',-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,'0:00:00',-999,-999,-999,-999,-999,-999,-999,-999,-999,'-', '-','1-1-1970 00:00','1-1-1970 00:00','0:00:00',-999,-999,-999,-999,-999]))


    # In[10]:
    


    df.fillna(s_nan, inplace=True)
    df[['ls_endTs', 'dredgeSessionId', 'dredgeSessionQuantity', 'tripM3', 'tripM3_calc']] = df[['ls_endTs','dredgeSessionId', 'dredgeSessionQuantity', 'tripM3', 'tripM3_calc']].astype(int)
    # write to csv
    df.to_csv(outputfile, sep='\t', index=False)

         
if __name__ == "__main__":
    main(sys.argv[1:])
