# Summary

*   This file loads and merges two xlsx spreadsheets, which include oil vessel arrival data into Portland for 2020 and 2021.
*   Key columns are: Arrival date, Terminal (for final delivery), Product Type, Product Amount (Incoming and Outgoing)
*   Possible key columns with empty or mostly empty data: Arrival time.
*   Separate columns were added for Year/Month/Day

# For updating after our project ends:
User uploading data from Excel should do the following first (in Excel):

1.   delete empty rows above column names;
2.   delete unneeded sheets;
3.   save as csv.

# Tidying next steps: 
*   Change data type of Arrival Time values to DateTime, so that this notebook can correctly load Arrival Time values in the future?
*   Should this notebook produce the final, tidied dataframe in a way that can be accessed from a different notebook?




In [56]:
import pandas as pd

url_2020 = 'https://raw.githubusercontent.com/ds5110/stinky/master/2020%20SMRO%20VESSEL%20ARRIVALS.csv'
url_2021 = 'https://raw.githubusercontent.com/ds5110/stinky/master/2021%20SMRO%20VESSEL%20ARRIVALS.csv'

df_2020 = pd.read_csv(url_2020)
df_2021 = pd.read_csv(url_2021)

In [57]:
df_2020.head()

Unnamed: 0,2020 VESSEL ARRIVALS SMRO,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12,Unnamed: 13
0,,,,,,,,,,,,,,
1,,,,,,,,,,,,,,
2,ARRIVAL\nDATE,ARRIVAL\nTIME (if avail),TERMINAL,SHIP NAME,LAST PORT\n OF CALL (if avail),PRODUCT TYPE - OIL,PRODUCT AMOUNT - OIL\nMultiply metric tons by ...,,RUNNING TOTAL,PRODUCT TYPE - NON-OIL,PRODUCT AMOUNT - NON OIL,,RUNNING TOTAL\n(Specify Units),
3,,,TERMINAL,SHIP'S NAME,,,Incoming,Outgoing,,,Incoming,Outgoing,,
4,1/2/20,,Citgo,New England,St.John,23 - Unleaded Gasoline,80000,,80000,,,,0,


In [58]:
df_2021.head()

Unnamed: 0,2021 VESSEL ARRIVALS SMRO,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12,Unnamed: 13
0,,,,,,,,,,,,,,
1,,,,,,,,,,,,,,
2,ARRIVAL\nDATE,ARRIVAL\nTIME (if avail),TERMINAL,SHIP NAME,LAST PORT\n OF CALL (if avail),PRODUCT TYPE - OIL,PRODUCT AMOUNT - OIL\nMultiply metric tons by ...,,RUNNING TOTAL,PRODUCT TYPE - NON-OIL,PRODUCT AMOUNT - NON OIL,,RUNNING TOTAL\n(Specify Units),
3,,,TERMINAL,SHIP'S NAME,,,Incoming,Outgoing,,,Incoming,Outgoing,,
4,1/4/21,,Citgo,Iver Prosperity,St. John,23 - Unleaded Gasoline,70000,,70000,,,,0,


In [59]:
# drop last column
df_2020.drop(df_2020.columns[-1], axis=1, inplace=True)

# set correct column name
df_2020.columns = df_2020.iloc[2]

# drop empty rows
df_2020.drop(labels=[0, 1, 2, 3], axis=0, inplace=True)

# rename columns
df_2020.columns = ['ARRIVAL DATE', 'ARRIVAL TIME (if avail)', 'TERMINAL', 'SHIP NAME', 'LAST PORT OF CALL (if avail)', 'PRODUCT TYPE - OIL', 'PRODUCT AMOUNT - OIL, Incoming (Multiply metric tons by 7.5 for approx bbls)', 'PRODUCT AMOUNT - OIL, Outgoing', 'RUNNING TOTAL', 'PRODUCT TYPE - NON-OIL',	'PRODUCT AMOUNT - NON OIL, Incoming', 'PRODUCT AMOUNT - NON OIL, Outgoing', 'Running Total (Specify units)']

# drop rows in which Arrival Time is NaN
df_2020.drop(df_2020.loc[pd.isna(df_2020['ARRIVAL DATE'])].index, axis=0, inplace=True)

df_2020

Unnamed: 0,ARRIVAL DATE,ARRIVAL TIME (if avail),TERMINAL,SHIP NAME,LAST PORT OF CALL (if avail),PRODUCT TYPE - OIL,"PRODUCT AMOUNT - OIL, Incoming (Multiply metric tons by 7.5 for approx bbls)","PRODUCT AMOUNT - OIL, Outgoing",RUNNING TOTAL,PRODUCT TYPE - NON-OIL,"PRODUCT AMOUNT - NON OIL, Incoming","PRODUCT AMOUNT - NON OIL, Outgoing",Running Total (Specify units)
4,1/2/20,,Citgo,New England,St.John,23 - Unleaded Gasoline,80000,,80000,,,,0
5,1/2/20,,Citgo,New England,St.John,28 - Premium Unleaded Gasoline,20000,,100000,,,,0
6,1/2/20,,Citgo,New England,St.John,29 - Diesel (aka ULSD),22000,,122000,,,,0
7,1/2/20,,Citgo,New England,St.John,23 - Unleaded Gasoline,60000,,182000,,,,0
8,1/2/20,,Citgo,New England,St.John,23 - Unleaded Gasoline,20000,,202000,,,,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
559,12/4/20,,Global,New England,,06 - #6 Fuel Oil (aka IFO 380),2032,,22141950,,,,0
560,12/23/20,,Global,New England,,"02 - #2 Fuel Oil (aka FO, MGO, DMA)",,1452,22140498,,,,0
561,12/9/20,,Sprague,RTC-102,New Jersey,"02 - #2 Fuel Oil (aka FO, MGO, DMA)",75000,,22215498,,,,0
562,12/14/20,,Sprague,MT.LYSIAS,,29 - Diesel (aka ULSD),75000,,22290498,,,,0


In [60]:
# Creating separate year/month/day columns
df_2020['ARRIVAL DATE']=pd.to_datetime(df_2020['ARRIVAL DATE'])
df_2020['Year']=df_2020['ARRIVAL DATE'].dt.year
df_2020['Month']=df_2020['ARRIVAL DATE'].dt.month
df_2020['Day']=df_2020['ARRIVAL DATE'].dt.day
df_2020

Unnamed: 0,ARRIVAL DATE,ARRIVAL TIME (if avail),TERMINAL,SHIP NAME,LAST PORT OF CALL (if avail),PRODUCT TYPE - OIL,"PRODUCT AMOUNT - OIL, Incoming (Multiply metric tons by 7.5 for approx bbls)","PRODUCT AMOUNT - OIL, Outgoing",RUNNING TOTAL,PRODUCT TYPE - NON-OIL,"PRODUCT AMOUNT - NON OIL, Incoming","PRODUCT AMOUNT - NON OIL, Outgoing",Running Total (Specify units),Year,Month,Day
4,2020-01-02,,Citgo,New England,St.John,23 - Unleaded Gasoline,80000,,80000,,,,0,2020,1,2
5,2020-01-02,,Citgo,New England,St.John,28 - Premium Unleaded Gasoline,20000,,100000,,,,0,2020,1,2
6,2020-01-02,,Citgo,New England,St.John,29 - Diesel (aka ULSD),22000,,122000,,,,0,2020,1,2
7,2020-01-02,,Citgo,New England,St.John,23 - Unleaded Gasoline,60000,,182000,,,,0,2020,1,2
8,2020-01-02,,Citgo,New England,St.John,23 - Unleaded Gasoline,20000,,202000,,,,0,2020,1,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
559,2020-12-04,,Global,New England,,06 - #6 Fuel Oil (aka IFO 380),2032,,22141950,,,,0,2020,12,4
560,2020-12-23,,Global,New England,,"02 - #2 Fuel Oil (aka FO, MGO, DMA)",,1452,22140498,,,,0,2020,12,23
561,2020-12-09,,Sprague,RTC-102,New Jersey,"02 - #2 Fuel Oil (aka FO, MGO, DMA)",75000,,22215498,,,,0,2020,12,9
562,2020-12-14,,Sprague,MT.LYSIAS,,29 - Diesel (aka ULSD),75000,,22290498,,,,0,2020,12,14


In [61]:
# drop last column
df_2021.drop(df_2021.columns[-1], axis=1, inplace=True)

# set correct column name
df_2021.columns = df_2021.iloc[2]

# drop empty rows
df_2021.drop(labels=[0, 1, 2, 3], axis=0, inplace=True)

# rename columns
df_2021.columns = ['ARRIVAL DATE', 'ARRIVAL TIME (if avail)', 'TERMINAL', \
                   'SHIP NAME', 'LAST PORT OF CALL (if avail)', \
                   'PRODUCT TYPE - OIL', 'PRODUCT AMOUNT - OIL, Incoming (Multiply metric tons by 7.5 for approx bbls)', \
                   'PRODUCT AMOUNT - OIL, Outgoing', 'RUNNING TOTAL', \
                   'PRODUCT TYPE - NON-OIL',	\
                   'PRODUCT AMOUNT - NON OIL, Incoming', \
                   'PRODUCT AMOUNT - NON OIL, Outgoing', 'Running Total (Specify units)']

# drop rows in which Arrival Time is NaN
df_2021.drop(df_2021.loc[pd.isna(df_2021['ARRIVAL DATE'])].index, axis=0, inplace=True)

df_2021

Unnamed: 0,ARRIVAL DATE,ARRIVAL TIME (if avail),TERMINAL,SHIP NAME,LAST PORT OF CALL (if avail),PRODUCT TYPE - OIL,"PRODUCT AMOUNT - OIL, Incoming (Multiply metric tons by 7.5 for approx bbls)","PRODUCT AMOUNT - OIL, Outgoing",RUNNING TOTAL,PRODUCT TYPE - NON-OIL,"PRODUCT AMOUNT - NON OIL, Incoming","PRODUCT AMOUNT - NON OIL, Outgoing",Running Total (Specify units)
4,1/4/21,,Citgo,Iver Prosperity,St. John,23 - Unleaded Gasoline,70000,,70000,,,,0
5,1/4/21,,Citgo,Iver Prosperity,St. John,28 - Premium Unleaded Gasoline,32000,,140000,,,,0
6,1/4/21,,Citgo,Iver Prosperity,St. John,29 - Diesel (aka ULSD),42000,,172000,,,,0
7,1/10/21,,Citgo,New England,St. John,23 - Unleaded Gasoline,90000,,214000,,,,0
8,1/7/21,,Citgo,DBL-104,St. John,34 - Ethanol (aka E-100),15000,,304000,,,,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
209,5/30/21,,Global,New England,,06 - #6 Fuel Oil (aka IFO 380),4540,,9037702,,,,0
210,5/4/21,,Sprague,Nor'easter,St. John,"02 - #2 Fuel Oil (aka FO, MGO, DMA)",25000,,9042242,,,,0
211,5/4/21,,Sprague,Nor'easter,St. John,25 - Jet Fuel,35000,,9067242,,,,0
212,5/21/21,,Portland Pipeline,Olympisky Prospect,,61 - Crude Oil,658000,,9102242,,,,0


In [62]:
# Correcting an error in a date to actual date format
df_2021["ARRIVAL DATE"].replace({"2/13/20221": "2/13/2021"}, inplace=True)

# Creating separate year/month/day columns
df_2021['ARRIVAL DATE']=pd.to_datetime(df_2021['ARRIVAL DATE'])
df_2021['Year']=df_2021['ARRIVAL DATE'].dt.year
df_2021['Month']=df_2021['ARRIVAL DATE'].dt.month
df_2021['Day']=df_2021['ARRIVAL DATE'].dt.day
df_2021

Unnamed: 0,ARRIVAL DATE,ARRIVAL TIME (if avail),TERMINAL,SHIP NAME,LAST PORT OF CALL (if avail),PRODUCT TYPE - OIL,"PRODUCT AMOUNT - OIL, Incoming (Multiply metric tons by 7.5 for approx bbls)","PRODUCT AMOUNT - OIL, Outgoing",RUNNING TOTAL,PRODUCT TYPE - NON-OIL,"PRODUCT AMOUNT - NON OIL, Incoming","PRODUCT AMOUNT - NON OIL, Outgoing",Running Total (Specify units),Year,Month,Day
4,2021-01-04,,Citgo,Iver Prosperity,St. John,23 - Unleaded Gasoline,70000,,70000,,,,0,2021,1,4
5,2021-01-04,,Citgo,Iver Prosperity,St. John,28 - Premium Unleaded Gasoline,32000,,140000,,,,0,2021,1,4
6,2021-01-04,,Citgo,Iver Prosperity,St. John,29 - Diesel (aka ULSD),42000,,172000,,,,0,2021,1,4
7,2021-01-10,,Citgo,New England,St. John,23 - Unleaded Gasoline,90000,,214000,,,,0,2021,1,10
8,2021-01-07,,Citgo,DBL-104,St. John,34 - Ethanol (aka E-100),15000,,304000,,,,0,2021,1,7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
209,2021-05-30,,Global,New England,,06 - #6 Fuel Oil (aka IFO 380),4540,,9037702,,,,0,2021,5,30
210,2021-05-04,,Sprague,Nor'easter,St. John,"02 - #2 Fuel Oil (aka FO, MGO, DMA)",25000,,9042242,,,,0,2021,5,4
211,2021-05-04,,Sprague,Nor'easter,St. John,25 - Jet Fuel,35000,,9067242,,,,0,2021,5,4
212,2021-05-21,,Portland Pipeline,Olympisky Prospect,,61 - Crude Oil,658000,,9102242,,,,0,2021,5,21


In [63]:
# Merge two df
df_vessels = df_2020.append(df_2021, ignore_index=True)
df_vessels

Unnamed: 0,ARRIVAL DATE,ARRIVAL TIME (if avail),TERMINAL,SHIP NAME,LAST PORT OF CALL (if avail),PRODUCT TYPE - OIL,"PRODUCT AMOUNT - OIL, Incoming (Multiply metric tons by 7.5 for approx bbls)","PRODUCT AMOUNT - OIL, Outgoing",RUNNING TOTAL,PRODUCT TYPE - NON-OIL,"PRODUCT AMOUNT - NON OIL, Incoming","PRODUCT AMOUNT - NON OIL, Outgoing",Running Total (Specify units),Year,Month,Day
0,2020-01-02,,Citgo,New England,St.John,23 - Unleaded Gasoline,80000,,80000,,,,0,2020,1,2
1,2020-01-02,,Citgo,New England,St.John,28 - Premium Unleaded Gasoline,20000,,100000,,,,0,2020,1,2
2,2020-01-02,,Citgo,New England,St.John,29 - Diesel (aka ULSD),22000,,122000,,,,0,2020,1,2
3,2020-01-02,,Citgo,New England,St.John,23 - Unleaded Gasoline,60000,,182000,,,,0,2020,1,2
4,2020-01-02,,Citgo,New England,St.John,23 - Unleaded Gasoline,20000,,202000,,,,0,2020,1,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
765,2021-05-30,,Global,New England,,06 - #6 Fuel Oil (aka IFO 380),4540,,9037702,,,,0,2021,5,30
766,2021-05-04,,Sprague,Nor'easter,St. John,"02 - #2 Fuel Oil (aka FO, MGO, DMA)",25000,,9042242,,,,0,2021,5,4
767,2021-05-04,,Sprague,Nor'easter,St. John,25 - Jet Fuel,35000,,9067242,,,,0,2021,5,4
768,2021-05-21,,Portland Pipeline,Olympisky Prospect,,61 - Crude Oil,658000,,9102242,,,,0,2021,5,21


In [64]:
# Sorting the dates chronologically after merging the datasets
df_vessels=df_vessels.sort_values(by='ARRIVAL DATE')

# Resetting index
df_vessels=df_vessels.reset_index(drop=True)

In [65]:
df_vessels

Unnamed: 0,ARRIVAL DATE,ARRIVAL TIME (if avail),TERMINAL,SHIP NAME,LAST PORT OF CALL (if avail),PRODUCT TYPE - OIL,"PRODUCT AMOUNT - OIL, Incoming (Multiply metric tons by 7.5 for approx bbls)","PRODUCT AMOUNT - OIL, Outgoing",RUNNING TOTAL,PRODUCT TYPE - NON-OIL,"PRODUCT AMOUNT - NON OIL, Incoming","PRODUCT AMOUNT - NON OIL, Outgoing",Running Total (Specify units),Year,Month,Day
0,2020-01-02,,Citgo,New England,St.John,23 - Unleaded Gasoline,80000,,80000,,,,0,2020,1,2
1,2020-01-02,,Citgo,New England,St.John,28 - Premium Unleaded Gasoline,20000,,100000,,,,0,2020,1,2
2,2020-01-02,,Citgo,New England,St.John,29 - Diesel (aka ULSD),22000,,122000,,,,0,2020,1,2
3,2020-01-02,,Citgo,New England,St.John,23 - Unleaded Gasoline,60000,,182000,,,,0,2020,1,2
4,2020-01-02,,Citgo,New England,St.John,23 - Unleaded Gasoline,20000,,202000,,,,0,2020,1,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
765,2021-05-29,,Global,New England,,"02 - #2 Fuel Oil (aka FO, MGO, DMA)",1525,,9032790,,,,0,2021,5,29
766,2021-05-29,,Global,New England,,06 - #6 Fuel Oil (aka IFO 380),2371,,9034315,,,,0,2021,5,29
767,2021-05-30,,Global,New England,,"02 - #2 Fuel Oil (aka FO, MGO, DMA)",1016,,9036686,,,,0,2021,5,30
768,2021-05-30,,Global,New England,,06 - #6 Fuel Oil (aka IFO 380),4540,,9037702,,,,0,2021,5,30


In [66]:
# find out type of Product Amount column values - includes float values
print([type(x) for x in df_vessels['PRODUCT AMOUNT - OIL, Incoming (Multiply metric tons by 7.5 for approx bbls)']])

[<class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 's

In [67]:
# convert Product Amount columns and last column to float
df_vessels['PRODUCT AMOUNT - OIL, Incoming (Multiply metric tons by 7.5 for approx bbls)']\
 = [float(str(x).replace(',', '')) for x in df_vessels['PRODUCT AMOUNT - OIL, Incoming (Multiply metric tons by 7.5 for approx bbls)']]
df_vessels['PRODUCT AMOUNT - OIL, Outgoing'] = [float(str(x).replace(',', '')) for x in df_vessels['PRODUCT AMOUNT - OIL, Outgoing']]
df_vessels['Running Total (Specify units)'] = [float(str(x).replace(',', '')) for x in df_vessels['PRODUCT AMOUNT - OIL, Outgoing']]
df_vessels

Unnamed: 0,ARRIVAL DATE,ARRIVAL TIME (if avail),TERMINAL,SHIP NAME,LAST PORT OF CALL (if avail),PRODUCT TYPE - OIL,"PRODUCT AMOUNT - OIL, Incoming (Multiply metric tons by 7.5 for approx bbls)","PRODUCT AMOUNT - OIL, Outgoing",RUNNING TOTAL,PRODUCT TYPE - NON-OIL,"PRODUCT AMOUNT - NON OIL, Incoming","PRODUCT AMOUNT - NON OIL, Outgoing",Running Total (Specify units),Year,Month,Day
0,2020-01-02,,Citgo,New England,St.John,23 - Unleaded Gasoline,80000.0,,80000,,,,,2020,1,2
1,2020-01-02,,Citgo,New England,St.John,28 - Premium Unleaded Gasoline,20000.0,,100000,,,,,2020,1,2
2,2020-01-02,,Citgo,New England,St.John,29 - Diesel (aka ULSD),22000.0,,122000,,,,,2020,1,2
3,2020-01-02,,Citgo,New England,St.John,23 - Unleaded Gasoline,60000.0,,182000,,,,,2020,1,2
4,2020-01-02,,Citgo,New England,St.John,23 - Unleaded Gasoline,20000.0,,202000,,,,,2020,1,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
765,2021-05-29,,Global,New England,,"02 - #2 Fuel Oil (aka FO, MGO, DMA)",1525.0,,9032790,,,,,2021,5,29
766,2021-05-29,,Global,New England,,06 - #6 Fuel Oil (aka IFO 380),2371.0,,9034315,,,,,2021,5,29
767,2021-05-30,,Global,New England,,"02 - #2 Fuel Oil (aka FO, MGO, DMA)",1016.0,,9036686,,,,,2021,5,30
768,2021-05-30,,Global,New England,,06 - #6 Fuel Oil (aka IFO 380),4540.0,,9037702,,,,,2021,5,30


In [68]:
# replace NaN in Product Amount - Oil, Incoming and Outgoing with 0
df_vessels['PRODUCT AMOUNT - OIL, Outgoing'].fillna(0, inplace=True)
df_vessels['PRODUCT AMOUNT - OIL, Incoming (Multiply metric tons by 7.5 for approx bbls)'].fillna(0, inplace=True)
df_vessels

Unnamed: 0,ARRIVAL DATE,ARRIVAL TIME (if avail),TERMINAL,SHIP NAME,LAST PORT OF CALL (if avail),PRODUCT TYPE - OIL,"PRODUCT AMOUNT - OIL, Incoming (Multiply metric tons by 7.5 for approx bbls)","PRODUCT AMOUNT - OIL, Outgoing",RUNNING TOTAL,PRODUCT TYPE - NON-OIL,"PRODUCT AMOUNT - NON OIL, Incoming","PRODUCT AMOUNT - NON OIL, Outgoing",Running Total (Specify units),Year,Month,Day
0,2020-01-02,,Citgo,New England,St.John,23 - Unleaded Gasoline,80000.0,0.0,80000,,,,,2020,1,2
1,2020-01-02,,Citgo,New England,St.John,28 - Premium Unleaded Gasoline,20000.0,0.0,100000,,,,,2020,1,2
2,2020-01-02,,Citgo,New England,St.John,29 - Diesel (aka ULSD),22000.0,0.0,122000,,,,,2020,1,2
3,2020-01-02,,Citgo,New England,St.John,23 - Unleaded Gasoline,60000.0,0.0,182000,,,,,2020,1,2
4,2020-01-02,,Citgo,New England,St.John,23 - Unleaded Gasoline,20000.0,0.0,202000,,,,,2020,1,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
765,2021-05-29,,Global,New England,,"02 - #2 Fuel Oil (aka FO, MGO, DMA)",1525.0,0.0,9032790,,,,,2021,5,29
766,2021-05-29,,Global,New England,,06 - #6 Fuel Oil (aka IFO 380),2371.0,0.0,9034315,,,,,2021,5,29
767,2021-05-30,,Global,New England,,"02 - #2 Fuel Oil (aka FO, MGO, DMA)",1016.0,0.0,9036686,,,,,2021,5,30
768,2021-05-30,,Global,New England,,06 - #6 Fuel Oil (aka IFO 380),4540.0,0.0,9037702,,,,,2021,5,30


In [69]:
# Recalculate Running Total values (errors in xlsx files and to reflect merged df's)
running_total = []
net = df_vessels['PRODUCT AMOUNT - OIL, Incoming (Multiply metric tons by 7.5 for approx bbls)'][0]
running_total.append(net)
for i in range(1, len(df_vessels)):
  net += df_vessels['PRODUCT AMOUNT - OIL, Incoming (Multiply metric tons by 7.5 for approx bbls)'][i]
  net -= df_vessels['PRODUCT AMOUNT - OIL, Outgoing'][i]
  running_total.append(net)
df_vessels['RUNNING TOTAL'] = running_total

df_vessels

Unnamed: 0,ARRIVAL DATE,ARRIVAL TIME (if avail),TERMINAL,SHIP NAME,LAST PORT OF CALL (if avail),PRODUCT TYPE - OIL,"PRODUCT AMOUNT - OIL, Incoming (Multiply metric tons by 7.5 for approx bbls)","PRODUCT AMOUNT - OIL, Outgoing",RUNNING TOTAL,PRODUCT TYPE - NON-OIL,"PRODUCT AMOUNT - NON OIL, Incoming","PRODUCT AMOUNT - NON OIL, Outgoing",Running Total (Specify units),Year,Month,Day
0,2020-01-02,,Citgo,New England,St.John,23 - Unleaded Gasoline,80000.0,0.0,80000.0,,,,,2020,1,2
1,2020-01-02,,Citgo,New England,St.John,28 - Premium Unleaded Gasoline,20000.0,0.0,100000.0,,,,,2020,1,2
2,2020-01-02,,Citgo,New England,St.John,29 - Diesel (aka ULSD),22000.0,0.0,122000.0,,,,,2020,1,2
3,2020-01-02,,Citgo,New England,St.John,23 - Unleaded Gasoline,60000.0,0.0,182000.0,,,,,2020,1,2
4,2020-01-02,,Citgo,New England,St.John,23 - Unleaded Gasoline,20000.0,0.0,202000.0,,,,,2020,1,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
765,2021-05-29,,Global,New England,,"02 - #2 Fuel Oil (aka FO, MGO, DMA)",1525.0,0.0,32595056.0,,,,,2021,5,29
766,2021-05-29,,Global,New England,,06 - #6 Fuel Oil (aka IFO 380),2371.0,0.0,32597427.0,,,,,2021,5,29
767,2021-05-30,,Global,New England,,"02 - #2 Fuel Oil (aka FO, MGO, DMA)",1016.0,0.0,32598443.0,,,,,2021,5,30
768,2021-05-30,,Global,New England,,06 - #6 Fuel Oil (aka IFO 380),4540.0,0.0,32602983.0,,,,,2021,5,30
