In [8]:
# üß© 1. Import necessary libraries
import requests
import json
import time
import pandas as pd
import os
from datetime import datetime

In [None]:
# üîê 2. Set API endpoints and keys
GRAPHQL_URL = "https://api.kpler.marinetraffic.com/v2/vessels/graphql"
GRAPHQL_API_KEY = ""
AIS_API_KEY = ""
HISTORICAL_API_KEY = ""
PORTCALLS_API_KEY = ""

In [10]:
# üì¶ 3. Fetch vessels by REGISTER_OWNER
def fetch_vessels(after_cursor=None):
    query = f"""
    query Vessels {{
        vessels(
            first: 1000
            where: {{
                filters: [
                    {{
                        field: "management.beneficialOwner.current.name"
                        op: LIKE
                        values: ["AASEN SHIPPING%"]
                    }}
                ]
                operator: OR
            }}
            after: {json.dumps(after_cursor)}
        ) {{
            nodes {{
                identifier {{
                    imo
                }}
            }}
            pageInfo {{
                hasNextPage
                endCursor
            }}
        }}
    }}
    """

    headers = {
        "Authorization": f"Basic {GRAPHQL_API_KEY}",
        "Content-Type": "application/json"
    }

    response = requests.post(GRAPHQL_URL, json={"query": query}, headers=headers)

    if response.status_code != 200:
        print(f"Error {response.status_code}: {response.text}")
        return None

    return response.json()

In [11]:
# üì• 4. Loop through pages and gather IMO list
imo_list = []
after_cursor = None

while True:
    data = fetch_vessels(after_cursor)
    if not data:
        break

    vessels = data["data"]["vessels"]["nodes"]
    for vessel in vessels:
        imo = vessel["identifier"].get("imo")
        if imo:
            imo_list.append(imo)

    page_info = data["data"]["vessels"]["pageInfo"]
    if page_info["hasNextPage"]:
        after_cursor = page_info["endCursor"]
    else:
        break

print(f"‚úÖ Found {len(imo_list)} vessels. First 5 IMOs: {imo_list[:5]}")

‚úÖ Found 7 vessels. First 5 IMOs: [9060778, 9101546, 9147136, 9321407, 9433389]


In [12]:
# üì° 5. Fetch live AIS positions from MT Export API
def fetch_ais_data(api_key, imo_list, timespan=1440, buffer_time=1):
    url_template = f'https://services.marinetraffic.com/api/exportvessel/{api_key}/v:6/timespan:{timespan}/imo:{{imo}}/protocol:jsono'
    all_ais_data = []

    for idx, imo in enumerate(imo_list, start=1):
        print(f"[{idx}/{len(imo_list)}] Fetching AIS for IMO: {imo}")
        try:
            response = requests.get(url_template.format(imo=imo))
            if response.ok:
                data = response.json()
                if isinstance(data, list):
                    for record in data:
                        record['IMO'] = imo
                        all_ais_data.append(record)
                else:
                    print(f"‚ö†Ô∏è Unexpected format for IMO {imo}")
            else:
                print(f"‚ùå Failed for IMO {imo}: {response.status_code}")
        except Exception as e:
            print(f"‚ùå Exception for IMO {imo}: {e}")

        if idx < len(imo_list):
            time.sleep(buffer_time)

    if all_ais_data:
        df = pd.DataFrame(all_ais_data)
        if {'IMO', 'SHIPNAME', 'LAT', 'LON', 'TIMESTAMP'}.issubset(df.columns):
            return df[['IMO', 'SHIPNAME', 'LAT', 'LON', 'TIMESTAMP']]
        else:
            return df
    else:
        print("‚ö†Ô∏è No AIS data fetched.")
        return pd.DataFrame()

In [13]:
# üîç 6. Call the function and show results
df_ais = fetch_ais_data(AIS_API_KEY, imo_list)
print(f"‚úÖ Retrieved {len(df_ais)} AIS position records.")
df_ais.head()

[1/7] Fetching AIS for IMO: 9060778
[2/7] Fetching AIS for IMO: 9101546
[3/7] Fetching AIS for IMO: 9147136
[4/7] Fetching AIS for IMO: 9321407
[5/7] Fetching AIS for IMO: 9433389
[6/7] Fetching AIS for IMO: 9904766
[7/7] Fetching AIS for IMO: 9904869
‚úÖ Retrieved 7 AIS position records.


Unnamed: 0,IMO,SHIPNAME,LAT,LON,TIMESTAMP
0,9060778,AASLI,54.613209,-5.916578,2025-05-08T08:17:37
1,9101546,AASNES,54.391495,18.670176,2025-05-08T08:20:34
2,9147136,AASTIND,53.680603,2.923402,2025-05-08T08:24:05
3,9321407,AASTUN,59.640179,-0.178685,2025-05-08T08:22:38
4,9433389,AASVIK,51.290115,-3.485885,2025-05-08T08:24:17


In [14]:
# üíæ 7. Save to CSV file
if not df_ais.empty:
    timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M")
    output_filename = f"ais_positions_pdvsa_{timestamp}.csv"
    df_ais.to_csv(output_filename, index=False)
    print(f"üìÅ Data saved to: {output_filename}")
else:
    print("‚ö†Ô∏è No data to save.")

üìÅ Data saved to: ais_positions_pdvsa_2025-05-08_16-25.csv


In [15]:
# üì° 8. Fetch and append historical AIS per IMO
def fetch_historical_ais_and_save(api_key, imo_list, output_path, days=180, period="daily", msgtype="simple", buffer_time=60):
    url_template = f'https://services.marinetraffic.com/api/exportvesseltrack/{api_key}/v:3/days:{days}/period:{period}/imo:{{imo}}/msgtype:{msgtype}/protocol:jsono'

    # üßæ Create CSV with headers if it doesn't exist
    if not os.path.exists(output_path):
        with open(output_path, 'w', newline='') as f:
            pd.DataFrame(columns=['IMO', 'LAT', 'LON', 'TIMESTAMP']).to_csv(f, index=False)

    for idx, imo in enumerate(imo_list, start=1):
        print(f"[{idx}/{len(imo_list)}] Fetching and saving AIS for IMO: {imo}")
        try:
            response = requests.get(url_template.format(imo=imo))
            if response.ok:
                data = response.json()
                if isinstance(data, list) and data:
                    for record in data:
                        record['IMO'] = imo
                    df = pd.DataFrame(data)
                    
                    # ‚úçÔ∏è Save only if essential columns exist
                    if {'IMO', 'LAT', 'LON', 'TIMESTAMP'}.issubset(df.columns):
                        df[['IMO', 'LAT', 'LON', 'TIMESTAMP']].to_csv(output_path, mode='a', header=False, index=False)
                        print(f"‚úÖ Saved {len(df)} records for IMO {imo}")
                    else:
                        print(f"‚ö†Ô∏è Skipped saving due to missing fields for IMO {imo}")
                else:
                    print(f"‚ö†Ô∏è No data returned for IMO {imo}")
            else:
                print(f"‚ùå Failed request for IMO {imo}: {response.status_code}")
        except Exception as e:
            print(f"‚ùå Exception for IMO {imo}: {e}")

        if idx < len(imo_list):
            time.sleep(buffer_time)

    # ‚úÖ Return final DataFrame
    return pd.read_csv(output_path)

In [16]:
# üîç 9. Call the historical AIS fetcher and preview
output_hist_file = f"historical_ais_pdvsa_{datetime.now().strftime('%Y-%m-%d_%H-%M')}.csv"
df_hist_ais = fetch_historical_ais_and_save(
    api_key=HISTORICAL_API_KEY,
    imo_list=imo_list,
    output_path=output_hist_file,
    days=180,
    period="daily",
    buffer_time=60
)

print(f"‚úÖ Retrieved {len(df_hist_ais)} historical AIS position records.")

# üíæ 10. Already saved inside function, but you can log confirmation
if not df_hist_ais.empty:
    print(f"üìÅ Historical AIS data confirmed saved to: {output_hist_file}")
else:
    print("‚ö†Ô∏è No historical data to save.")

[1/7] Fetching and saving AIS for IMO: 9060778
‚úÖ Saved 181 records for IMO 9060778
[2/7] Fetching and saving AIS for IMO: 9101546
‚úÖ Saved 180 records for IMO 9101546
[3/7] Fetching and saving AIS for IMO: 9147136
‚úÖ Saved 181 records for IMO 9147136
[4/7] Fetching and saving AIS for IMO: 9321407
‚úÖ Saved 181 records for IMO 9321407
[5/7] Fetching and saving AIS for IMO: 9433389
‚úÖ Saved 181 records for IMO 9433389
[6/7] Fetching and saving AIS for IMO: 9904766
‚úÖ Saved 181 records for IMO 9904766
[7/7] Fetching and saving AIS for IMO: 9904869
‚úÖ Saved 180 records for IMO 9904869
‚úÖ Retrieved 1265 historical AIS position records.
üìÅ Historical AIS data confirmed saved to: historical_ais_pdvsa_2025-05-08_16-25.csv


In [17]:
# üìÜ 11. Derive dynamic date range from historical AIS DataFrame
def get_dynamic_date_range(df_hist_ais):
    if df_hist_ais.empty:
        raise ValueError("üö´ Historical AIS DataFrame is empty. Cannot derive dynamic date range.")

    df_hist_ais['TIMESTAMP'] = pd.to_datetime(df_hist_ais['TIMESTAMP'], errors='coerce')
    fromdate = df_hist_ais['TIMESTAMP'].min().strftime("%Y-%m-%d %H:%M")
    todate = df_hist_ais['TIMESTAMP'].max().strftime("%Y-%m-%d %H:%M")
    print(f"üìÜ Dynamic Date Range ‚Äî From: {fromdate} | To: {todate}")
    return fromdate, todate

In [18]:
# üß≠ 12. Fetch and append Port Calls using Port Calls API
def fetch_port_calls_and_save(
    api_key,
    imo_list,
    output_path,
    df_hist_ais,
    msgtype="simple",
    buffer_time=60
):
    # üéØ Get dynamic fromdate and todate
    fromdate, todate = get_dynamic_date_range(df_hist_ais)

    # üßæ Define required columns
    required_columns = ['IMO', 'SHIPNAME', 'TIMESTAMP_UTC', 'MOVE_TYPE', 'PORT_ID', 'PORT_NAME']
    
    # üìÅ Ensure output CSV exists with headers
    if not os.path.exists(output_path):
        pd.DataFrame(columns=required_columns).to_csv(output_path, index=False)
        print(f"üìÑ Created new CSV file with headers: {output_path}")
    
    url_template = (
        f'https://services.marinetraffic.com/api/portcalls/{api_key}/v:6/'
        f'fromdate:{fromdate}/todate:{todate}/imo:{{imo}}/msgtype:{msgtype}/protocol:jsono'
    )

    # üöÄ Loop through each IMO
    for idx, imo in enumerate(imo_list, start=1):
        print(f"[{idx}/{len(imo_list)}] Fetching Port Calls for IMO: {imo}")

        try:
            response = requests.get(url_template.format(imo=imo))
            if response.ok:
                data = response.json()
                if isinstance(data, list) and data:
                    # üè∑Ô∏è Tag records with IMO
                    for record in data:
                        record['IMO'] = imo
                    
                    df = pd.DataFrame(data)

                    # ‚úÖ Save if valid
                    if set(required_columns).issubset(df.columns):
                        df[required_columns].to_csv(output_path, mode='a', header=False, index=False)
                        print(f"‚úÖ Saved {len(df)} port call records for IMO {imo}")
                    else:
                        print(f"‚ö†Ô∏è Missing required fields ‚Äî skipped saving for IMO {imo}")
                else:
                    print(f"‚ö†Ô∏è No port call data for IMO {imo}")
            else:
                print(f"‚ùå Failed request for IMO {imo}: {response.status_code}")
        
        except Exception as e:
            print(f"‚ùå Exception for IMO {imo}: {e}")

        # ‚è±Ô∏è Pause between calls to respect rate limits
        if idx < len(imo_list):
            time.sleep(buffer_time)


In [19]:
# üìÅ 13. Output file path
output_csv = f"port_calls_pdvsa_{datetime.now().strftime('%Y-%m-%d_%H-%M')}.csv"

# üì° 14. Run the fetcher with dynamic dates
fetch_port_calls_and_save(
    api_key=PORTCALLS_API_KEY,
    imo_list=imo_list,
    output_path=output_csv,
    df_hist_ais=df_hist_ais,  # Pass in the historical AIS dataframe
    buffer_time=60  # ‚è≥ Adjustable delay
)

üìÜ Dynamic Date Range ‚Äî From: 2024-11-09 08:42 | To: 2025-05-08 02:33
üìÑ Created new CSV file with headers: port_calls_pdvsa_2025-05-08_16-31.csv
[1/7] Fetching Port Calls for IMO: 9060778
‚úÖ Saved 193 port call records for IMO 9060778
[2/7] Fetching Port Calls for IMO: 9101546
‚úÖ Saved 177 port call records for IMO 9101546
[3/7] Fetching Port Calls for IMO: 9147136
‚úÖ Saved 158 port call records for IMO 9147136
[4/7] Fetching Port Calls for IMO: 9321407
‚úÖ Saved 272 port call records for IMO 9321407
[5/7] Fetching Port Calls for IMO: 9433389
‚úÖ Saved 316 port call records for IMO 9433389
[6/7] Fetching Port Calls for IMO: 9904766
‚úÖ Saved 150 port call records for IMO 9904766
[7/7] Fetching Port Calls for IMO: 9904869
‚úÖ Saved 199 port call records for IMO 9904869


In [None]:
# üß™ 15. Combine and Enrich All Datasets
from glob import glob
from datetime import datetime

def combine_and_enrich_datasets():
    # üìÇ Find latest files dynamically
    latest_ais = sorted(glob("ais_positions_pdvsa_*.csv"))[-1]
    latest_hist = sorted(glob("historical_ais_pdvsa_*.csv"))[-1]
    latest_ports = sorted(glob("port_calls_pdvsa_*.csv"))[-1]

    print(f"üîÑ Combining files:\n- {latest_ais}\n- {latest_hist}\n- {latest_ports}")

    # üìñ Load datasets
    df_live = pd.read_csv(latest_ais, parse_dates=["TIMESTAMP"])
    df_hist = pd.read_csv(latest_hist, parse_dates=["TIMESTAMP"])
    df_ports = pd.read_csv(latest_ports, parse_dates=["TIMESTAMP_UTC"])

    # üßÆ Combine live + historical AIS
    df_all_positions = pd.concat([df_live, df_hist], ignore_index=True)
    df_all_positions.sort_values(by=["IMO", "TIMESTAMP"], inplace=True)

    # üß† Add LAST_PORT and NEXT_PORT columns
    df_all_positions["LAST_PORT"] = None
    df_all_positions["LAST_PORT_ID"] = None
    df_all_positions["NEXT_PORT"] = None
    df_all_positions["NEXT_PORT_ID"] = None

    # üîó Match each AIS row to port calls
    for imo in df_all_positions["IMO"].unique():
        df_ais_vessel = df_all_positions[df_all_positions["IMO"] == imo]
        df_ports_vessel = df_ports[df_ports["IMO"] == imo]

        for idx, ais_row in df_ais_vessel.iterrows():
            position_time = ais_row["TIMESTAMP"]

            # Match Departure (LAST_PORT)
            departures = df_ports_vessel[
                (df_ports_vessel["MOVE_TYPE"] == 1) &
                (df_ports_vessel["TIMESTAMP_UTC"] <= position_time)
            ]
            if not departures.empty:
                last_dep = departures.sort_values(by="TIMESTAMP_UTC").iloc[-1]
                df_all_positions.at[idx, "LAST_PORT"] = last_dep["PORT_NAME"]
                df_all_positions.at[idx, "LAST_PORT_ID"] = last_dep["PORT_ID"]

            # Match Arrival (NEXT_PORT)
            arrivals = df_ports_vessel[
                (df_ports_vessel["MOVE_TYPE"] == 0) &
                (df_ports_vessel["TIMESTAMP_UTC"] >= position_time)
            ]
            if not arrivals.empty:
                next_arr = arrivals.sort_values(by="TIMESTAMP_UTC").iloc[0]
                df_all_positions.at[idx, "NEXT_PORT"] = next_arr["PORT_NAME"]
                df_all_positions.at[idx, "NEXT_PORT_ID"] = next_arr["PORT_ID"]

    # üíæ Save enriched data
    output_name = f"combined_ais_pdvsa_{datetime.now().strftime('%Y-%m-%d_%H-%M')}.csv"
    df_all_positions.to_csv(output_name, index=False)
    print(f"‚úÖ Enriched dataset saved to: {output_name}")

# üß† Run the combining process
combine_and_enrich_datasets()


üîÑ Combining files:
- ais_positions_pdvsa_2025-05-08_16-25.csv
- historical_ais_pdvsa_2025-05-08_16-25.csv
- port_calls_pdvsa_2025-05-08_16-31.csv
‚úÖ Enriched dataset saved to: combined_ais_pdvsa_2025-05-08_16-37.csv
