In [0]:
%sql
CREATE OR REPLACE TABLE cleaned_vessels AS
SELECT
  vessel_id,
  imo,
  vessel_name,
  vessel_status,
  vessel_status_group,
  age_in_year
FROM rightship_test.default.qa_tht_sample_data_model_vessels
WHERE vessel_id > 0
  AND imo IS NOT NULL;

SELECT * FROM cleaned_vessels;

vessel_id,imo,vessel_name,vessel_status,vessel_status_group,age_in_year
503072,2950038197,Sea Horizon,In Trading Fleet,In Service,46
489572,7645524261,Aqua Nova,In Trading Fleet,In Service,17
234602,6847158436,Starboard Spirit,Total Loss,Not In Service,38
125774,4807079427,Iron Gull,In Trading Fleet,In Service,4
175656,4312947302,Storm Petrel,Total Loss,Not In Service,75
146842,2438466352,Silver Dolphin,In Trading Fleet,In Service,34
122906,5779842756,Kraken's Shadow,In Trading Fleet,In Service,10
420160,2358534653,Poseidon's Arrow,In Trading Fleet,In Service,37
200557,8336538328,Neptune's Crown,In Trading Fleet,In Service,33


1. Unit testing

In [0]:
import pandas as pd

def standardize_vessel_timestamps(df: pd.DataFrame, col: str) -> pd.DataFrame:
    """Converts a column to UTC and formats as ISO-8601 strings."""
    df[col] = pd.to_datetime(df[col], utc=True).dt.strftime('%Y-%m-%dT%H:%M:%SZ')
    return df

In [0]:
import unittest

class TestVesselData(unittest.TestCase):
    def test_timestamp_conversion(self):
        # Input with mixed offsets
        data = {'timestamp': ['2026-01-11 10:00:00-05:00', '2026-01-11 15:00:00Z']}
        df = pd.DataFrame(data)

        result = standardize_vessel_timestamps(df, 'timestamp')

        # Both should normalize to 15:00 UTC
        expected = '2026-01-11T15:00:00Z'
        self.assertEqual(result['timestamp'][0], expected)
        self.assertEqual(result['timestamp'][1], expected)

In [0]:
%sql
-- last arrival date by imo
SELECT
  imo,
  MAX(crew_entered_eta) AS last_voyage_time
FROM rightship_test.default.qa_tht_sample_data_model_daily_voyage
GROUP BY imo;

imo,last_voyage_time
1000069,2025-09-02T06:00:00.000Z
1000239,2025-09-15T17:30:00.000Z
1000253,2025-01-01T01:45:00.000Z
9990001,2026-01-10T08:00:00Z
10012026,2026-01-10T08:00:00Z
4807079427,2025-09-27T19:00:00.000Z
5779842756,2024-08-13T18:00:00.000Z
