# Run the the first cell to recreate the mmsi_type_dict dictionary.

In [35]:
def create_mmsi_dict_from_file(file_path):
    mmsi_type_dict = {}
    
    try:
        with open(file_path, 'r') as f:
            for line in f:
                line = line.strip()
                if not line:
                    continue

                try:
                    mmsi_part, type_part = line.split(',', 1)

                    mmsi_key = mmsi_part.split(':', 1)[1].strip()
                    ship_type_value = type_part.split(':', 1)[1].strip()

                    mmsi_type_dict[mmsi_key] = ship_type_value
                    
                except (ValueError, IndexError):
                    print(f"Skipping malformed line: '{line}'")

    except FileNotFoundError:
        print(f"Error: The file '{file_path}' was not found.")
        return None
    except Exception as e:
        print(f"An error occurred: {e}")
        return None

    return mmsi_type_dict


file_name = "data/mmsi_type.txt"
mmsi_map = create_mmsi_dict_from_file(file_name)


if mmsi_map:
    print("--- Successfully created dictionary ---")

--- Successfully created dictionary ---


In [36]:
print(len(mmsi_map))

894


In [39]:
import requests
from bs4 import BeautifulSoup

def get_ship_type_from_mmsi(mmsi):
    url = f"https://www.vesselfinder.com/vessels/details/{mmsi}"
    response = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
    soup = BeautifulSoup(response.text, "html.parser")

    ais_type_cell = soup.find("td", string="AIS Type")

    if ais_type_cell:
        ship_type = ais_type_cell.find_next_sibling("td", class_="v3").get_text(strip=True)
        return ship_type
    else:
        return "Ship type not found"
test_mmsi = "200000000"
ship_type = get_ship_type_from_mmsi(test_mmsi)
print(f"Ship type for MMSI {test_mmsi}: {ship_type}")

Ship type for MMSI 200000000: Tanker


In [40]:
import pandas as pd
import time
import random

def get_ship_types_from_file(file_path):

    df = pd.read_csv(file_path)

    unique_mmsi = df['MMSI'].unique()

    for mmsi in unique_mmsi:
        if str(mmsi) in mmsi_map:
            continue
        ship_type = get_ship_type_from_mmsi(mmsi)
        print(f"MMSI: {mmsi}, Ship Type: {ship_type}")
        delay = random.uniform(0.3, 0.9)
        time.sleep(delay)

    print("Total unique MMSI count:", len(unique_mmsi))
    print("Total unique ship types found:", len(set(mmsi_type_dict.values())))
    return mmsi_type_dict

mmsi_type_dict = get_ship_types_from_file('data/ais_combined.csv')

MMSI: 255724000, Ship Type: Cargo ship
MMSI: 255735000, Ship Type: Tanker (HAZ-A)
MMSI: 255769000, Ship Type: Cargo ship
MMSI: 255801580, Ship Type: Cargo ship
MMSI: 255802270, Ship Type: Ship type not found
MMSI: 255802540, Ship Type: Cargo ship
MMSI: 255802570, Ship Type: Cargo ship
MMSI: 255802840, Ship Type: Cargo ship
MMSI: 255802940, Ship Type: Tanker
MMSI: 255802950, Ship Type: Tanker
MMSI: 255803560, Ship Type: Tanker
MMSI: 255803870, Ship Type: Other type
MMSI: 255805589, Ship Type: Cargo ship
MMSI: 255805653, Ship Type: Cargo ship
MMSI: 255805672, Ship Type: Cargo ship
MMSI: 255805880, Ship Type: Other type
MMSI: 255805899, Ship Type: Cargo ship (HAZ-A)
MMSI: 255806024, Ship Type: Tanker
MMSI: 255806151, Ship Type: Tanker (HAZ-B)
MMSI: 255806190, Ship Type: Cargo ship (HAZ-A)
MMSI: 255806196, Ship Type: Tanker
MMSI: 255806258, Ship Type: Cargo ship
MMSI: 255806303, Ship Type: Tanker
MMSI: 255806310, Ship Type: Cargo ship
MMSI: 255806328, Ship Type: Cargo ship
MMSI: 255806364,

NameError: name 'mmsi_type_dict' is not defined

In [7]:
mmsi_type_dict

NameError: name 'mmsi_type_dict' is not defined