# Tug & Boat Fleet Position Pipeline
This program fetches the most recently-reported AIS data for all vessels in the fleet.

## Imports, Variables & Function Definitions

In [1]:
import requests
import time
import os
import shutil # Used to copy timestamped csv file
from bs4 import BeautifulSoup
from csv import writer
from geopy.geocoders import Nominatim
from tqdm import tqdm


agent = {"User-Agent":'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36'}
geolocator = Nominatim(user_agent="jwh")

timestamp = time.strftime('%Y%m%d-%I_%M_%p')
DATA_DIR = 'data'
os.makedirs(DATA_DIR, exist_ok=True)
timestamp = time.strftime('%Y%m%d-%I_%M_%p')
filename = timestamp + '-Fleet Location.csv'

SCRAPE_FILE = os.path.join(DATA_DIR, filename)
latest_scrape_file =  os.path.join(DATA_DIR, 'updated-fleet-positions.csv')

boats = {
    "Brangus": "https://www.vesselfinder.com/vessels/BRANGUS-IMO-0-MMSI-366899270",
    "Calcasieu River": "https://www.vesselfinder.com/vessels/CALCASIEU-RIVER-IMO-0-MMSI-367313850",
    "Cavalier State": "https://www.vesselfinder.com/vessels/CAVALIER-STATE-IMO-0-MMSI-367340130",
    "Columbia River": "https://www.vesselfinder.com/vessels/COLUMBIA-RIVER-IMO-0-MMSI-367187130",
    "Cooper River": "https://www.vesselfinder.com/vessels/COOPER-RIVER-IMO-0-MMSI-366867370",
    "East River": "https://www.vesselfinder.com/vessels/EAST-RIVER-IMO-0-MMSI-366898790",
    "Evergreen State": "https://www.vesselfinder.com/vessels/EVERGREEN-STATE-IMO-0-MMSI-367156370",
    "Lone Star State": "https://www.vesselfinder.com/vessels/LONESTAR-STATE-IMO-0-MMSI-367183730",
    "McCormack Boys": "https://www.vesselfinder.com/vessels/MCCORMACK-BOYS-IMO-0-MMSI-366872170",
    "Miami River": "https://www.vesselfinder.com/vessels/MIAMI-RIVER-IMO-0-MMSI-366898810",
    "Muskegon River": "https://www.vesselfinder.com/vessels/MUSKEGON-RIVER-IMO-0-MMSI-367509760",
    "Ohio River": "https://www.vesselfinder.com/vessels/OHIO-RIVER-IMO-0-MMSI-366899290",
    "Pearl River": "https://www.vesselfinder.com/vessels/PEARL-RIVER-IMO-0-MMSI-367187120",
    "Saginaw River": "https://www.vesselfinder.com/vessels/SAGINAW-RIVER-IMO-0-MMSI-367511230",
    "St Johns River": "https://www.vesselfinder.com/vessels/SAINT-JOHNS-RIVER-IMO-0-MMSI-367313760",
    "St Louis River": "https://www.vesselfinder.com/vessels/ST-LOUIS-RIVER-IMO-0-MMSI-367609770",
    "Volunteer State": "https://www.vesselfinder.com/vessels/VOLUNTEER-STATE-IMO-0-MMSI-367314110",
    "Wolf River": "https://www.vesselfinder.com/vessels/WOLF-RIVER-IMO-0-MMSI-367060910"
    }

In [2]:
def fetch_fleet_positions(scrape_file=SCRAPE_FILE):
    def abbreviate_state(state):
        state_dict = {
        'Alabama': 'AL',
        'Alaska': 'AK',
        'American Samoa': 'AS',
        'Arizona': 'AZ',
        'Arkansas': 'AR',
        'California': 'CA',
        'Colorado': 'CO',
        'Connecticut': 'CT',
        'Delaware': 'DE',
        'District of Columbia': 'DC',
        'Florida': 'FL',
        'Georgia': 'GA',
        'Guam': 'GU',
        'Hawaii': 'HI',
        'Idaho': 'ID',
        'Illinois': 'IL',
        'Indiana': 'IN',
        'Iowa': 'IA',
        'Kansas': 'KS',
        'Kentucky': 'KY',
        'Louisiana': 'LA',
        'Maine': 'ME',
        'Maryland': 'MD',
        'Massachusetts': 'MA',
        'Michigan': 'MI',
        'Minnesota': 'MN',
        'Mississippi': 'MS',
        'Missouri': 'MO',
        'Montana': 'MT',
        'Nebraska': 'NE',
        'Nevada': 'NV',
        'New Hampshire': 'NH',
        'New Jersey': 'NJ',
        'New Mexico': 'NM',
        'New York': 'NY',
        'North Carolina': 'NC',
        'North Dakota': 'ND',
        'Northern Mariana Islands':'MP',
        'Ohio': 'OH',
        'Oklahoma': 'OK',
        'Oregon': 'OR',
        'Pennsylvania': 'PA',
        'Puerto Rico': 'PR',
        'Rhode Island': 'RI',
        'South Carolina': 'SC',
        'South Dakota': 'SD',
        'Tennessee': 'TN',
        'Texas': 'TX',
        'Utah': 'UT',
        'Vermont': 'VT',
        'Virgin Islands': 'VI',
        'Virginia': 'VA',
        'Washington': 'WA',
        'West Virginia': 'WV',
        'Wisconsin': 'WI',
        'Wyoming': 'WY'
        }
        if state in state_dict:
            state = state_dict[state]
        return state

    def get_city(raw):
        if 'city' in raw['address']:
            town = raw['address']['city']

        elif 'town' in raw['address']:
            town = raw['address']['town']
            
        elif 'village' in raw['address']:
            town = raw['address']['village']
            
        elif 'county' in raw['address']:
            town = raw['address']['county']
            
        else:
            town = 'Error'
            
        return town


    def convert_time(last_update):
        unit_to_minutes = {'day':24*60, 'hour': 60, 'min': 1}
        # Extract leading integer unit measurement
        number = ''.join(x for x in last_update if x.isdigit())
        number = int(float(number))
        for unit in unit_to_minutes:
            if unit in last_update:
                conversion = number * unit_to_minutes[unit]
                return conversion
        return last_update


    start = time.time()
    with open(scrape_file, 'w', newline='') as file:
        csv_writer = writer(file)

        # Add Headers
        csv_writer.writerow(['Vessel', 'Lat-Long','Last Update', 'Last Updated (Minutes)', 'City', 'State'])

        for boat, url in tqdm(boats.items()):
            response = requests.get(url, headers=agent)
            soup = BeautifulSoup(response.text,"html.parser")
            
            # Isolate table of interest
            table = soup.find_all(class_='v3')
            
            # Isolate and clean lat, long coordinates
            coords = table[9] # coordinate row
            text = coords.get_text() # Text from coordinate row
            latlon = text.split('/')
            latlon_clean = [val[:-2] for val in latlon]
            latlon_clean[1] = '-'+latlon_clean[1]  
            position = ', '.join(latlon_clean)
            
            # Reverse lat-lon lookup, isolating town, state
            physical = geolocator.reverse(position)
            vessel_raw = physical.raw
        
            town = get_city(vessel_raw)
            state = vessel_raw['address']['state']
            # Abbreviate state from full name to a 2-letter code
            state = abbreviate_state(state)
            
            # Isolate and clean last position update
            time_since_last_position = table[11].get_text() # coordinate row
            cleaned_time = time_since_last_position[:-3]
            converted_time = convert_time(cleaned_time)
            
            # Write results to spreadsheet row
            csv_writer.writerow([boat, position, cleaned_time, converted_time, town, state])
            
            # Wait n seconds in between requests
            time.sleep(1)
        
    end = time.time()
    duration = end - start
    
    # Copy timestamped file for exploration
    shutil.copyfile(SCRAPE_FILE, latest_scrape_file)


    print(f'Finished. This script took {round(duration, 2)} seconds to run')

In [6]:
def load_fleet_positions(path=latest_scrape_file):
    return pd.read_csv(path)

### Program Call
Execute cells below to:
* `fetch_fleet_positions()` - Get updated fleet position data as a .csv file.
* `load_fleet_positions()` - Get a Pandas DataFrame object of the data.

In [4]:
fetch_fleet_positions()

100%|██████████████████████████████████████████████████████████████████████████████████| 18/18 [00:31<00:00,  1.75s/it]

Finished. This script took 31.53 seconds to run





In [7]:
import pandas as pd
fleet = load_fleet_positions()

In [10]:
fleet

Unnamed: 0,Vessel,Lat-Long,Last Update,Last Updated (Minutes),City,State
0,Brangus,"39.37314, -74.42712",54 days ago,77760,Atlantic City,NJ
1,Calcasieu River,"32.76257, -79.92533",84 days ago,120960,Charleston,SC
2,Cavalier State,"32.02369, -81.04659",12 days ago,17280,Thunderbolt,GA
3,Columbia River,"30.38614, -81.53666",2 min ago,2,Jacksonville,FL
4,Cooper River,"32.83284, -79.94203",2 min ago,2,Charleston,SC
5,East River,"32.84329, -79.93221",1 min ago,1,North Charleston,SC
6,Evergreen State,"26.7719, -80.05117",11 mins ago,11,Palm Beach County,FL
7,Lone Star State,"30.49491, -88.0194",202 days ago,290880,Mobile County,AL
8,McCormack Boys,"37.01095, -76.23765",32 days ago,46080,Hampton City,VA
9,Miami River,"31.12757, -81.38871",15 mins ago,15,Saint Simons,GA


In [15]:
fleet.sort_values('Last Updated (Minutes)')

Unnamed: 0,Vessel,Lat-Long,Last Update,Last Updated (Minutes),City,State
15,St Louis River,"26.96174, -80.04058",1 min ago,1,Palm Beach County,FL
5,East River,"32.84329, -79.93221",1 min ago,1,North Charleston,SC
14,St Johns River,"32.83289, -79.94205",1 min ago,1,Charleston,SC
12,Pearl River,"32.8328, -79.94217",1 min ago,1,Charleston,SC
3,Columbia River,"30.38614, -81.53666",2 min ago,2,Jacksonville,FL
4,Cooper River,"32.83284, -79.94203",2 min ago,2,Charleston,SC
6,Evergreen State,"26.7719, -80.05117",11 mins ago,11,Palm Beach County,FL
9,Miami River,"31.12757, -81.38871",15 mins ago,15,Saint Simons,GA
10,Muskegon River,"29.91834, -81.30167",8 hours ago,480,St. Augustine,FL
11,Ohio River,"30.43275, -89.08479",4 days ago,5760,Gulfport,MS
