## Decoding AIS messages for different Timestamp formats

1. Get different timestamp of predefined acceptable ais/ mnea message formats (if prop message; get time and to next line, continue)
2. Field 2,3,4 "2/3 (id= 4)" format?
3. Field 5 --> Channel 
4. Field 6 --> PAYLOAD
   - Get checksum; checksum ok --> else "Error: Checksum Mismatched"
   - Decode Payload; ASCII to Binary (later improve code by first decoding only bits describing message type --> if 1,2,3, continue decoding the binqry string, else quit)
   - Binary check message type 1,2,3 ok --> else quit 
   - Decode rest; navigation status, ROT, SOG, Position over ground, Position accuracy, Longitude, Latitude, COG, True heading, Timestamp, Maneuver indicator, ,Spare, RAIM flag, - Radio status.
5. Main: read lines in txt file and print the decoding output


## Timestamp
- Proprietary Timestamp: $PGHP,1,2013,11,6,0,0,0,0,272,,,1,25*25 
- \s:Bustard Head B,c:1686096000,T:2023-06-07 00.00.00*77\
- 2023-10-28T07:17:51.000Z !AIVDM,...

In [14]:
import re
from datetime import datetime

def extract_timestamp_and_message(line, current_timestamp=None):
    # ISO 8601 format
    iso_match = re.search(r'(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z)', line)
    if iso_match:
        current_timestamp = iso_match.group()
        line = line.replace(current_timestamp, "").strip()

    # Standard datetime
    std_match = re.search(r'(\d{2}-\d{2}-\d{4} \d{2}:\d{2}:\d{2})', line)
    if std_match:
        current_timestamp = std_match.group()
        line = line.replace(current_timestamp, "").strip()

    # Unix timestamp
    unix_match = re.search(r',(\d{10})$', line)
    if unix_match and current_timestamp is None:
        unix_time = int(unix_match.group(1))
        current_timestamp = datetime.utcfromtimestamp(unix_time).strftime('%Y-%m-%d %H:%M:%S')
        line = line.replace(unix_match.group(), "").strip()

    # T-prefixed timestamp
    t_match = re.search(r'T:(\d{4}-\d{2}-\d{2} \d{2}\.\d{2}\.\d{2})', line)
    if t_match:
        current_timestamp = t_match.group(1).replace('.', ':')
        line = line.replace(t_match.group(0), "").strip()

    # Clean AIS message
    clean_message = re.sub(r'^[^!$]+', '', line).strip()

    return current_timestamp, clean_message


## 

In [15]:
# pip install pandas

## Verify Checksum

In [16]:
def verify_checksum(line):
    # Ensure the line has a '*'
    if '*' not in line:
        return False

    # Split the line into data and checksum parts
    try:
        data, checksum = line.split('*')
    except ValueError:
        return False

    # Remove starting ! or $ for checksum calculation
    if data.startswith('!') or data.startswith('$'):
        data = data[1:]

    # Calculate checksum (XOR of all characters between start and '*')
    calculated_checksum = 0
    for char in data:
        calculated_checksum ^= ord(char)

    # Format to hex with uppercase and pad if needed (e.g., 0A not A)
    expected_checksum = f"{calculated_checksum:02X}"

    # Compare calculated and provided checksum
    return expected_checksum == checksum.upper()


## Message count, message number, sequence ID & Channel
field 2,3,4,5

In [17]:
import numpy as np

def get_ais_header(line):
    # Remove starting '!' or '$' if present
    line = line.lstrip('!$')

    # Split by comma
    fields = line.split(',')

    # Make sure we have enough fields
    if len(fields) < 5:
        raise ValueError("AIS message line has too few fields.")

    # Extract the required parts
    format = fields[0]
    field2 = fields[1]
    field3 = fields[2]
    field4 = fields[3]  if fields[3] != '' else np.nan
    field5 = fields[4]

    # Construct message_count string
    message_count = f"{field2} / {field3} ID= {field4}"

    # Channel
    channel = field5

    return format, message_count, channel


## Extract the Payload and ASCII to Binary

In [18]:
def extract_and_convert_payload(line):
    fields = line.split(',')
    payload = fields[5]

    six_bit_ascii = {
        '0': 0, '1': 1, '2': 2, '3': 3, '4': 4, '5': 5, '6': 6, '7': 7,
        '8': 8, '9': 9, ':': 10, ';': 11, '<': 12, '=': 13, '>': 14, '?': 15,
        '@': 16, 'A': 17, 'B': 18, 'C': 19, 'D': 20, 'E': 21, 'F': 22, 'G': 23,
        'H': 24, 'I': 25, 'J': 26, 'K': 27, 'L': 28, 'M': 29, 'N': 30, 'O': 31,
        'P': 32, 'Q': 33, 'R': 34, 'S': 35, 'T': 36, 'U': 37, 'V': 38, 'W': 39,
        '`': 40, 'a': 41, 'b': 42, 'c': 43, 'd': 44, 'e': 45, 'f': 46, 'g': 47,
        'h': 48, 'i': 49, 'j': 50, 'k': 51, 'l': 52, 'm': 53, 'n': 54, 'o': 55,
        'p': 56, 'q': 57, 'r': 58, 's': 59, 't': 60, 'u': 61, 'v': 62, 'w': 63
    }

    binary_str = ''
    for char in payload:
        if char in six_bit_ascii:
            value = six_bit_ascii[char]
            binary_str += format(value, '06b')
                    
    return binary_str


# MMSI

In [19]:
def check_mmsi(mmsi):
    # checks if not null
    try:
        mmsi = str(int(mmsi))
    except:
        return 0, False

    # check if the length is 7 or 9
    if len(mmsi) not in {7, 9}:
        return 0, False

    # should not contain all same digits 000000000
    if len(set(mmsi)) == 1:
        return 0, False

    # should not be consecutive eg: 123456789
    if [int(i) for i in mmsi] == list(range(int(min(mmsi)), int(max(mmsi)) + 1)):
        return 0, False

    return int(mmsi)

# Navigation Status

In [20]:
def get_navigation_status(nav_status):
    # Define the navigation statuses based on AIS standard
    nav_status_decode = {
        0: "Underway using engine",
        1: "At anchor",
        2: "Not under command",
        3: "Restricted manoeuverability",
        4: "Constrained by her draught",
        5: "Moored",
        6: "Aground",
        7: "Engaged in fishing",
        8: "Underway sailing",
        9: "Reserved for future amendment of Navigational Status for HSC",
        10: "Reserved for future amendment of Navigational Status for WIG",
        11: "Power-driven vessel towing astern (regional use)",
        12: "Power-driven vessel pushing ahead or towing alongside (regional use)",
        13: "Reserved for future use",
        14: "AIS-SART is active",
        15: "Undefined (default)"
    }
    
    # Return the corresponding navigation status
    return nav_status_decode.get(nav_status, "Unknown status")

# Longitude

Binary to Float function for the longitude and Latitude

In [21]:
from ast import literal_eval

def binary_to_float(float_str):

    if (float_str)[0] == '-':
        float_str = f"-0b{float_str[1:]}"
    else :
        float_str = f"0b{float_str[1:]}"
    
    result = float(literal_eval(float_str))

    return result
    

In [22]:
def get_long(value):
    # Convert from thousandths of a minute to minutes
    float_value = binary_to_float(value)
    total_minutes = float_value / 600000.0

    # Extract the degrees, minutes, and seconds
    #degrees = int(total_minutes)
    #minutes_decimal = (abs(total_minutes - degrees)) * 60
    #minutes = int(minutes_decimal)
    #seconds = (minutes_decimal - minutes) * 60
    
    return total_minutes

# Latitude

In [23]:
def get_lat(value):
    # Convert from thousandths of a minute to minutes
    float_value = binary_to_float(value)
    total_minutes = float_value / 600000.0

    if total_minutes > 90.0:
        total_minutes = 90- total_minutes

    # Extract the degrees, minutes, and seconds
    #degrees = int(total_minutes)
    #minutes_decimal = (abs(total_minutes - degrees)) * 60
    #minutes = int(minutes_decimal)
    #seconds = (minutes_decimal - minutes) * 60
    
    return total_minutes

# Maneuver Indicator

In [24]:
def get_maneuver_ind(maneuver):
    # Define the navigation statuses based on AIS standard
    maneuver_decode = {
        0: "Not available (default)",
        1: "No special maneuver",
        2: "Special maneuver(such as regional passing arrangement)"
    }
    
    # Return the corresponding navigation status
    return maneuver_decode.get(maneuver, "Unknown status")

## Decode the AIS message

In [25]:
def decode_message(ais_message):
    # Extract Fields 2,3,4,5 (AIS header info)
    format, message_count, channel = get_ais_header(ais_message)

    # Extract and convert the payload to binary string 
    binary_str = extract_and_convert_payload(ais_message)

    # Get and check Message Type
    if len(binary_str) < 6:
        return None
    else:
        message_type = int(binary_str[0:6], 2)
        if message_type not in [1, 2, 3]:
            return None  # Skip this message by returning None

    ####################### Decode the other variables in the message #############################

    # MMSI
    if len(binary_str) < 38:
        return None
    else:
        mmsi = check_mmsi(int(binary_str[8:38], 2))

    # Navigation status
    nav_status = float('nan') if len(binary_str) < 42 else get_navigation_status(int(binary_str[38:42], 2))

    # ROT
    if len(binary_str) < 50:
        rot = float('nan')
    else:
        rot = int(binary_str[42:50], 2)
        if 1 <= rot <= 126:
            rot = int(rot / 4.733) ** 2
        elif -126 <= rot <= -1:
            rot = -1 * (int(rot / 4.733) ** 2)

    # SOG
    if len(binary_str) < 60:
        sog = float('nan')
    else:
        sog = int(binary_str[50:60], 2) * 0.1
        if sog == 102.3:
            sog = float('nan')
        elif sog == 102.2:
            sog = "102.2 knots or higher"

    # Position Accuracy
    if len(binary_str) < 61:
        position_acc = float('nan')
    else:
        position_acc = int(binary_str[60:61], 2)
        position_acc = "<10m" if position_acc == 1 else ">10m"

    # Longitude
    if len(binary_str) < 89:
        long = float('nan')
    else:
        long = get_long(binary_str[61:89])

    # Latitude
    if len(binary_str) < 116:
        lat = float('nan')
    else:
        lat = get_lat(binary_str[89:116])

    # Course over Ground
    if len(binary_str) < 128:
        cog = float('nan')
    else:
        cog = int(binary_str[116:128], 2)
        if cog == 3600:
            cog = float('nan')

    # True Heading
    if len(binary_str) < 137:
        heading = float('nan')
    else:
        heading = int(binary_str[128:137], 2)
        if heading == 511:
            heading = float('nan')

    # Maneuver Indicator
    if len(binary_str) < 42:
        maneuver = get_maneuver_ind(0)
    else:
        maneuver = get_maneuver_ind(int(binary_str[38:42], 2))

    # Radio status
    if len(binary_str) < 168:
        radio_status = float('nan')
    else:
        radio_status = int(binary_str[149:168], 2)

    ##############################################################################################

    return {
        "Format": format,
        "Message_count": message_count,
        "Channel": channel,
        "Message_type": message_type,
        "MMSI": mmsi,
        "Nav_status": nav_status,
        "ROT": rot,
        "SOG": sog,
        "Position_acc": position_acc,
        "longitude": long,
        "latitude": lat,
        "COG": cog,
        "True_heading": heading,
        "Maneuver_ind": maneuver,
        "Radio_status": radio_status
    }


## Main

In [26]:
import pandas as pd
import re
from tqdm import tqdm  # <-- import tqdm

file_path = "C:\\Users\\IALAWWAGuest\\Documents\\Kiki\\Data\\iala-log-20131106.txt"

records = []
current_timestamp = None

with open(file_path, 'r') as file:
    lines = [line.strip() for line in file if line.strip()]

# Wrap tqdm around the range for visual progress
i = 0
while i < len(lines):
    line = lines[i]

    if line.startswith("$PGHP"):
        prop_match = re.search(
            r'\$PGHP,1,(\d{4}),(0?[1-9]|1[0-2]),(0?[1-9]|[12]?[0-9]|3[01]),'
            r'(0?[0-9]|1[0-9]|2[0-3]),(0?[0-9]|[1-5][0-9]),(0?[0-9]|[1-5][0-9])',
            line
        )
        if prop_match:
            year, month, day, hour, minute, second = prop_match.groups()
            current_timestamp = f"{year}-{month.zfill(2)}-{day.zfill(2)} {hour.zfill(2)}:{minute.zfill(2)}:{second.zfill(2)}"

        if i + 1 < len(lines):
            next_line = lines[i + 1]
            if next_line.startswith("!") or next_line.startswith("$"):
                if verify_checksum(next_line):
                    _, ais_message = extract_timestamp_and_message(next_line, current_timestamp)
                    decoded = decode_message(ais_message)
                    if decoded is not None:
                        decoded["timestamp"] = current_timestamp
                        records.append(decoded)
            i += 2
        else:
            i += 1

    else:
        timestamp, ais_message = extract_timestamp_and_message(line, current_timestamp)
        if ais_message and verify_checksum(ais_message):
            decoded = decode_message(ais_message)
            if decoded is not None:
                decoded["timestamp"] = timestamp
                records.append(decoded)
        i += 1

# Wrap loop with tqdm after loading lines
for i in tqdm(range(len(lines)), desc="Processing AIS messages"):
    # your loop logic can go here
    pass  # <-- replace with actual logic if restructuring loop is desired

# Create DataFrame from list of dicts
ais_df = pd.DataFrame(records)
ais_df


Processing AIS messages: 100%|██████████| 17222470/17222470 [00:01<00:00, 9915205.09it/s] 


Unnamed: 0,Format,Message_count,Channel,Message_type,MMSI,Nav_status,ROT,SOG,Position_acc,longitude,latitude,COG,True_heading,Maneuver_ind,Radio_status,timestamp
0,AGVDM,1 / 1 ID= 2,A,1,273350080,Underway using engine,0,6.1,>10m,36.839135,45.577693,250.0,23.0,Not available (default),198851.0,2013-11-06 00:00:00
1,BSVDM,1 / 1 ID= nan,B,3,219006111,Engaged in fishing,0,0.0,>10m,12.308813,56.126458,2975.0,76.0,Unknown status,76323.0,2013-11-06 00:00:00
2,BSVDM,1 / 1 ID= nan,A,3,211342230,Moored,128,0.0,>10m,9.935007,54.658670,0.0,,Unknown status,0.0,2013-11-06 00:00:00
3,BSVDM,1 / 1 ID= nan,A,1,257197000,Underway using engine,0,3.6,<10m,9.360285,57.841543,642.0,67.0,Not available (default),49165.0,2013-11-06 00:00:00
4,AGVDM,1 / 1 ID= 7,A,1,272245000,Underway using engine,128,0.0,<10m,30.732873,46.497875,0.0,,Not available (default),82021.0,2013-11-06 00:00:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2323699,ABVDM,1 / 1 ID= 1,B,3,413327930,Undefined (default),128,0.0,<10m,121.062032,37.711843,2294.0,,Unknown status,26419.0,2013-11-06 23:59:05
2323700,BSVDM,1 / 1 ID= nan,A,1,311939000,Underway using engine,0,10.7,>10m,11.305583,57.504983,2980.0,295.0,Not available (default),67775.0,2013-11-06 23:59:05
2323701,ABVDM,1 / 1 ID= 8,B,1,412684000,Moored,0,8.0,>10m,108.572883,19.096367,2408.0,251.0,Unknown status,34988.0,2013-11-06 23:59:05
2323702,ABVDM,1 / 1 ID= 5,B,3,538005281,Moored,0,0.0,<10m,113.843407,22.502322,2194.0,132.0,Unknown status,0.0,2013-11-06 23:59:05


# Speed (Efficiency) Log
Hatter_Barn_April_2016.txt / 1m 0.3s / 148 MB
ialadata_81 / too long / 4 GB
IALAGLADSTONE0506_ITU123_20230607_00 / 7 s / 16 MB
iala-log-20131106 / 3m 1.9s / 860 MB (2.300.000 rows)




