In [218]:
import os
import psycopg2
import json
import pandas as pd
from tqdm import tqdm
import numpy as np
from datetime import datetime

In [4]:
# TMP
os.environ['POSTGRES_HOST'] = "db"

In [6]:
conn = psycopg2.connect(f"dbname='{os.environ['POSTGRES_DB']}' user='{os.environ['POSTGRES_USER']}' host='{os.environ['POSTGRES_HOST']}' password='{os.environ['POSTGRES_PASSWORD']}'")

In [11]:
curs = conn.cursor()
curs.execute("SELECT version()")
print(curs.fetchone())
curs.close()

('PostgreSQL 17.0 (Debian 17.0-1.pgdg120+1) on x86_64-pc-linux-gnu, compiled by gcc (Debian 12.2.0-14) 12.2.0, 64-bit',)


In [16]:
with open('../mt-collector-bot/ships.json', 'r') as file:
    data = json.load(file)

In [201]:
df = pd.DataFrame.from_dict(data, orient='index')
df = df.drop(['INVALID_DIMENSIONS', 'TYPE_IMG', 'TILE_X', 'TILE_Y', 'TILE_Z'], axis=1)

numeric_cols = ['LAT', 'LON', 'SPEED', 'COURSE', 'HEADING', 'ELAPSED', 'LENGTH', 'ROT', 'WIDTH',
                'L_FORE', 'W_LEFT', 'DWT', 'GT_SHIPTYPE']
df[numeric_cols] = df[numeric_cols].apply(pd.to_numeric, errors='coerce')

int_cols = ['SPEED', 'COURSE', 'HEADING', 'ELAPSED', 'LENGTH', 'ROT', 'WIDTH', 'L_FORE', 'W_LEFT', 'DWT', 'GT_SHIPTYPE']
df[int_cols] = df[int_cols].astype('Int64')

df.replace({np.nan: None}, inplace=True)
df['FLAG'] = df['FLAG'].replace('--', None)
df

Unnamed: 0,LAT,LON,SPEED,COURSE,HEADING,ELAPSED,DESTINATION,FLAG,LENGTH,ROT,SHIPNAME,SHIPTYPE,SHIP_ID,WIDTH,L_FORE,W_LEFT,DWT,GT_SHIPTYPE,TYPE_NAME,STATUS_NAME
454448,42.274290,133.1571,1,59,262,2,FOR ORDERS,PA,180,0,WORLD TRADER I,8,454448,32,147,9,38177,88,,
725855,42.400660,133.0308,23,358,354,2,KOZMINO,PA,249,3,HUIHAI PACIFIC,8,725855,43,207,18,115934,17,,
7642592,42.583210,133.4270,128,80,81,369,0ME,RU,64,0,OMOLON,2,7642592,10,20,3,645,37,,
672876,42.438410,133.5913,123,104,109,378,JP KZU,PA,105,0,OCEAN PRIDE 1,7,672876,18,55,11,3450,122,,
755235,42.541700,133.2847,104,90,91,2,RU NJK > RU DKA,PA,246,0,PAVEL CHERNYSH,8,755235,42,203,29,101978,17,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
724527,1.212327,103.6600,4,201,,2,SINGAPORE,SG,31,,NOBLE VEGA,3,724527,10,0,0,155,54,,
8984934,1.221260,103.6516,1,,,4,CLASS B,SG,0,,KM 1,9,8984934,0,0,0,,,,
732901,1.200527,103.6801,3,75,265,3,ARAFR,SG,91,0,DEMURE,8,732901,17,73,10,6257,71,,
5779727,1.266193,103.4266,0,143,294,5,MY.PTP,SG,157,,KIRANA NAWA,8,5779727,28,129,9,18991,71,,


In [208]:
# Add missing flags

try:
  with conn.cursor() as cursor:
    cursor.execute("SELECT flag FROM flags")
    existing_flags = [row[0] for row in cursor.fetchall()]
    existing_flags
    missing_flags = [{'flag': flag} for flag in df['FLAG'].unique() if flag not in existing_flags and flag is not None]

    insert_query = """
      INSERT INTO flags (flag)
      VALUES (%(flag)s)
    """
    cursor.executemany(insert_query, missing_flags)
    conn.commit()
    print(f"Added {len(missing_flags)} flags ({len(existing_flags)} already existing)")
    cursor.execute("SELECT id, flag FROM flags")
    # Load flags DB
    flags = {}
    for f in cursor.fetchall():
      flags[f[1]] = f[0]
except Exception as e:
  conn.rollback()
  print(f"Error processing: {e}")

Added 0 flags (194 already existing)


In [233]:
# Add missing destinations

try:
  with conn.cursor() as cursor:
    cursor.execute("SELECT name FROM destinations")
    existing_destinations = [row[0] for row in cursor.fetchall()]
    existing_destinations
    missing_destinations = [{'name': dest} for dest in df['DESTINATION'].unique() if dest not in existing_destinations and dest is not None]

    insert_query = """
      INSERT INTO destinations (name)
      VALUES (%(name)s)
    """
    cursor.executemany(insert_query, missing_destinations)
    conn.commit()
    print(f"Added {len(missing_destinations)} destinations ({len(existing_destinations)} already existing)")
except Exception as e:
  conn.rollback()
  print(f"Error processing: {e}")

Added 0 destinations (7373 already existing)


In [229]:
# Register parse event

try:
  with conn.cursor() as cursor:
    insert_query = """
      INSERT INTO parses (start, "end", description)
      VALUES (%(start)s, %(end)s, %(description)s)
      RETURNING id
    """
    d = {
        'start': datetime.now(),
        'end': datetime.now(),
        'description': 'TEST PARSER WS-MARK',
    }
    cursor.execute(insert_query, d)
    parses_id = cursor.fetchone()[0]
    conn.commit()
except Exception as e:
    conn.rollback()
    print(f"Error processing: {e}")

In [234]:
# Add missing ships and record new ships positions

try:
  for index, row in tqdm(df.iterrows(), total=df.shape[0]):
    with conn.cursor() as cursor:
      cursor.execute("SELECT id FROM ships WHERE ship_id = %s LIMIT 1", (index,))
      ship = cursor.fetchone()
      if not ship:
        insert_query = """
          INSERT INTO ships (ship_id, name, flag_id, width, l_fore, w_left, length)
          VALUES (%(ship_id)s, %(shipname)s, %(flag_id)s, %(width)s, %(l_fore)s, %(w_left)s, %(length)s)
          RETURNING id
        """
        d = {
            'ship_id': row['SHIP_ID'],
            'shipname': row['SHIPNAME'],
            'flag_id': flags[row['FLAG']] if row['FLAG'] is not None else None,
            'width': row['WIDTH'],
            'l_fore': row['L_FORE'],
            'w_left': row['W_LEFT'],
            'length': row['LENGTH']
        }
        cursor.execute(insert_query, d)
        ship_id = cursor.fetchone()[0]
        conn.commit()
      ship_id = ship[0]
      insert_query = """
          INSERT INTO positions (ship_id, timestamp, location, speed, course, heading, rot, dwt, type, gt_type, parse_id, destination)
          VALUES (%(ship_id)s, %(timestamp)s, %(location)s, %(speed)s, %(course)s, %(heading)s, %(rot)s, %(dwt)s, %(type)s, %(gt_type)s, %(parse_id)s, %(destination)s)
        """
      d = {
          'ship_id': ship_id,
          'timestamp': datetime.now(),
          'location': f"({row['LAT']}, {row['LON']})",
          'speed': row['SPEED'],
          'course': row['COURSE'],
          'heading': row['HEADING'],
          'rot': row['ROT'],
          'dwt': row['DWT'],
          'type': row['SHIPTYPE'],
          'gt_type': row['GT_SHIPTYPE'],
          'parse_id': parses_id,
          'destination': row['DESTINATION']
      }
      cursor.execute(insert_query, d)
      conn.commit()

except Exception as e:
    conn.rollback()
    print(f"Error processing: {e}")

100%|██████████| 43947/43947 [03:30<00:00, 208.56it/s]
