In [None]:
import pandas as pd
import random
import json
from typing import List, Dict, Tuple, Optional
import re
from datetime import datetime, timedelta
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
import torch
from tqdm import tqdm
import numpy as np
from collections import defaultdict

import requests
from huggingface_hub import configure_http_backend

import warnings
warnings.filterwarnings("ignore")

# -------------------------------------------------------------------
# Backend Factory to Avoid Certificate Issues
# -------------------------------------------------------------------
def backend_factory() -> requests.Session:
    """Create a Requests session that disables SSL verification."""
    session = requests.Session()
    session.verify = False
    return session

configure_http_backend(backend_factory=backend_factory)

In [None]:
# -------------------------------------------------------------------
# Data Loading
# -------------------------------------------------------------------
df_locations = pd.read_csv("../data/port_data/unlocode/unlocode_ports_only_20250604_144152.csv")["Name"]

df_vessels = pd.read_csv("../data/ships_data/imo/imo_vessel_data_cleaned.csv")["name"]

df_locations_blacklisted = pd.read_csv("../data/port_data/shipping_ports_around_the_world/port_data.csv")["Port Name"]
df_vessels_blacklisted = pd.read_csv("../data/ships_data/global_cargo_ships/ships_data.csv")["Company_Name"]

# -------------------------------------------------------------------
# Data Cleaning to Avoid Leakage of Data
# -------------------------------------------------------------------
def normalize_text(text):
    """Normalize text for comparison"""
    if pd.isna(text):
        return ""
    return str(text).lower().strip()

blacklisted_locations_normalized = set(df_locations_blacklisted.apply(normalize_text))
blacklisted_vessels_normalized = set(df_vessels_blacklisted.apply(normalize_text))

clean_locations = df_locations[~df_locations.apply(normalize_text).isin(blacklisted_locations_normalized)].dropna().unique()
clean_vessels = df_vessels[~df_vessels.apply(normalize_text).isin(blacklisted_vessels_normalized)].dropna().unique()

print(f"Clean locations: {len(clean_locations)} (removed {len(df_locations) - len(clean_locations)})")
print(f"Clean vessels: {len(clean_vessels)} (removed {len(df_vessels) - len(clean_vessels)})")

In [None]:
import random
import re
from datetime import datetime, timedelta
from collections import defaultdict
from tqdm import tqdm
import json

# -------------------------------------------------------------------
# Entity Lists and Constants
# -------------------------------------------------------------------

# Incoterms
INCOTERMS = [
    "EXW", "FCA", "CPT", "CIP", "DPU",
    "FAS", "DAT", "DEQ", "DES", "DDU", 
    "CNF", "C&F", "CRF"
]

# Commodities
COMMODITIES = [
    
    # Dry bulk
    "grain", "barley", "salt", "fertilizer", "cement", "bauxite", "alumina", "manganese ore",
    "nickel ore", "copper concentrate", "zinc concentrate", "lead concentrate",
    "limestone", "clinker", "gypsum", "aggregates", "sand", "gravel",
    
    # Minor bulk
    "steel products", "steel coils", "steel plates", "steel billets", "rebar",
    "aluminum ingots", "copper cathodes", "zinc slabs", "lead ingots",
    "timber", "logs", "plywood", "wood chips", "wood pellets", "paper products",
    "scrap metal", "pig iron", "DRI/HBI", "ferro alloys", "coke", "pet coke",
    
    # Liquid bulk
    "gasoline", "diesel", "jet fuel", "fuel oil", "naphtha",
    "LPG", "condensate", "bitumen", "lubricants", "base oil",
    "methanol", "ethanol", "ammonia", "caustic soda", "sulphuric acid",
    "soybean oil", "sunflower oil", "vegetable oil", "molasses",
    
    # Containers/General
    "machinery", "equipment", "auto parts", "vehicles", "electronics",
    "textiles", "garments", "footwear", "furniture", "appliances",
    "chemicals", "plastics", "rubber", "pharmaceuticals", "medical supplies",
    "frozen meat", "frozen fish", "frozen food", "fresh produce", "fruits",
    
    # Agricultural
    "coffee beans", "cocoa beans", "tea", "tobacco", "nuts",
    "pulses", "lentils", "chickpeas", "beans", "seeds", "animal feed",
    "hay", "straw", "palm kernel", "copra", "tapioca", "cassava",
    
    # Specialized
    "project cargo", "wind turbines", "transformers", "generators",
    "reactors", "pressure vessels", "pipes", "modules", "heavy machinery",
    "yachts", "boats", "containers", "dangerous goods", "military cargo",
    "humanitarian aid", "relief supplies", "personal effects", "household goods"
]

# Vessel prefixes
VESSEL_PREFIXES = ["MV", "MT", "MS", "SS", "M/V", "M/T", "", "", ""]

# Quantity units
QUANTITY_UNITS = ["MT", "metric tons", "tons", "tonnes", "CBM", "TEU", "FEU", 
                  "barrels", "gallons", "liters", "cubic meters", "units"]

# Countries
COUNTRIES = [
    "China", "USA", "Japan", "Germany", "UK", "France", "Italy", "Spain",
    "South Korea", "India", "Brazil", "Russia", "Canada", "Australia",
    "Netherlands", "Belgium", "Norway", "Denmark", "Sweden", "Finland",
    "Guernsey", "Mauretania", "Liberia", "Mexico", "Chile", "Peru", "Guinea"
]

# Timing
TIME_TERMS = ["prompt", "early", "mid", "late", "end", "beginning", "first half", "second half"]
MONTHS = ["January", "February", "March", "April", "May", "June", 
          "July", "August", "September", "October", "November", "December"]

# Quality
QUALITY_TERMS = ["prime", "standard", "off-spec", "contaminated", "wet", "dry", "clean", "dirty"]

# Companies
SHIPPING_COMPANIES = ["Charterer A", "Shipper B", "Trader C", "Operator D", "Owner E", 
                      "Freight Co", "Global Shipping", "Maritime Corp", "Ocean Lines", "Pacific Traders"]

# Terminals
TERMINAL_OPERATORS = ["Terminal 1", "Berth A", "Wharf B", "Pier C", "Jetty D",
                      "Container Terminal", "Bulk Terminal", "Oil Terminal"]

# Chartering Related Terms
CHARTER_TYPES = ["voyage charter", "time charter", "bareboat charter", "CoA", "spot fixture"]
LOADING_TERMS = ["SHINC", "SHEX", "SSHEX", "WWD", "WIBON", "WIPON", "WCCON"]
PAYMENT_TERMS = ["LC at sight", "30 days net", "CAD", "TT prepayment", "60 days credit", "DP terms"]
VESSEL_TYPES = ["Capesize", "Panamax", "Supramax", "Handysize", "VLCC", "Suezmax", "Aframax", "MR tanker", "LR1", "LR2"]
WEATHER_CONDITIONS = ["favorable weather", "monsoon conditions", "winter storms", "calm seas", "force majeure weather"]
MARKET_CONDITIONS = ["firm market", "softening rates", "volatile conditions", "steady demand", "oversupplied tonnage"]
INSPECTION_TYPES = ["SGS inspection", "independent surveyor", "P&I survey", "draft survey", "quality inspection"]
DOCUMENT_TYPES = ["B/L", "charter party", "LOI", "NOR", "SOF", "cargo manifest", "phytosanitary certificate"]

# -------------------------------------------------------------------
# Generator Class
# -------------------------------------------------------------------
class EnhancedMaritimeGenerator:
    def __init__(self, vessels_list, locations_list):
        """Initialize generator with custom vessel and location lists"""
        self.vessels = vessels_list
        self.locations = locations_list
        
        # -------------------------------------------------------------------
        # Template Categories
        # -------------------------------------------------------------------
        self.template_categories = {
            'incoterm_focused':      self._incoterm_focused_templates(),
            'commodity_focused':     self._commodity_focused_templates(),
            'trade_terms':           self._trade_terms_templates(),
            'multi_commodity':       self._multi_commodity_templates(),
            'voyage':                self._voyage_templates(),
            'commercial':            self._commercial_templates(),
            'operations':            self._operations_templates(),
            'logistics':             self._logistics_templates(),
            'market_reports':        self._market_report_templates(),
            'contract_negotiations': self._contract_negotiation_templates(),
            'risk_management':       self._risk_management_templates(),
            'regulatory':            self._regulatory_templates(),
            'paragraph_reports':     self._paragraph_report_templates(),
            'complex_scenarios':     self._complex_scenario_templates()
        }
        
    def _incoterm_focused_templates(self):
        """Extended templates specifically designed to highlight incoterms"""
        return [
            "The {commodity} shipment was sold {incoterm} {location} with buyer arranging onward transport.",
            "Under {incoterm} terms, seller delivers {commodity} to {vessel} at {location} port.",
            "The {incoterm} price for {commodity} includes freight to {location} but not insurance.",
            "{company} quoted {quantity} {unit} of {commodity} at ${price} per ton {incoterm} {location}.",
            "Risk transfer for {commodity} occurs when goods pass ship's rail under {incoterm} terms at {location}.",
            
            "Delivery of {commodity} under {incoterm} {location} completed when cargo crosses ship's rail on {vessel}.",
            "{incoterm} obligations fulfilled as {commodity} loaded aboard {vessel} with ocean freight paid to {location}.",
            "The {incoterm} contract stipulates {company} arranges {commodity} transport but not discharge at {location}.",
            "Buyer assumes {commodity} risk under {incoterm} once goods delivered to carrier at {location} port.",
            "{incoterm} {location} terms require seller to clear {commodity} for export and pay loading costs onto {vessel}.",
            
            "Buyer preferred {incoterm1} {location1} over {incoterm2} {location2} for {commodity} purchase.",
            "The {commodity} deal shifted from {incoterm1} to {incoterm2} terms affecting price by ${price} per {unit}.",
            "Insurance responsibility differs between {incoterm1} and {incoterm2} for {commodity} shipments to {location}.",
            "{vessel} carries {commodity} under both {incoterm1} and {incoterm2} terms from {location}.",
            "Cost comparison shows {incoterm1} saves ${price} versus {incoterm2} for {commodity} to {location}.",
            "Converting from {incoterm1} to {incoterm2} adds ${price} per {unit} for {commodity} including insurance to {location}.",
            "Risk allocation changes significantly between {incoterm1} and {incoterm2} for {commodity} shipped on {vessel}.",
            
            "The {incoterm} contract requires seller to load {commodity} on {vessel} and pay freight to {location}.",
            "Under {incoterm} {location}, buyer bears all risks once {commodity} is alongside {vessel}.",
            "{company} fulfilled {incoterm} obligations by delivering {commodity} to carrier at {location} terminal.",
            "The {incoterm} terms make seller responsible for {commodity} until delivery at {location} buyer's premises.",
            "Export clearance for {commodity} under {incoterm} terms completed by seller at {location}.",
            "{incoterm} delivery point for {commodity} agreed as {terminal} at {location} with buyer collecting via {vessel}.",
            "Seller's {incoterm} responsibility ends when {commodity} placed at buyer's disposal on {vessel} at {location}.",
            "{company} bears {commodity} transport risk under {incoterm} until crossing {vessel} rail at {location} port.",
            
            "{company} offers {commodity} {incoterm1} {location1} at ${price1} or {incoterm2} {location2} at ${price2}.",
            "Split shipment of {commodity}: {quantity1} {unit} {incoterm1} and {quantity2} {unit} {incoterm2} via {location}.",
            "The {commodity} tender allows bidding on {incoterm1}, {incoterm2}, or {incoterm3} basis to {location}.",
            "{vessel} manifest shows {commodity} parcels under {incoterm1}, {incoterm2}, and {incoterm3} terms.",
            "Flexible {commodity} contract permits switching between {incoterm1} and {incoterm2} delivery to {location} on {vessel}.",
            "Portfolio approach uses {incoterm1} for {commodity1} and {incoterm2} for {commodity2} optimizing {location} logistics.",
            
            "Urgent {commodity} shipment requires {incoterm} {location} delivery by {date} on {vessel}.",
            "The {incoterm} contract for {commodity} stipulates Q{quarter} delivery to {location}.",
            "Seller must ship {commodity} {incoterm} within {days} days of L/C opening for {location} delivery.",
            "{company} exercised option for additional {commodity} at same {incoterm} terms before {date}.",
            "Laycan for {commodity} under {incoterm} {location} terms set as {date} with {vessel} nominated.",
            "{month} delivery of {commodity} {incoterm} {location} requires fixing {vessel} by end of {month}.",
            
            "The {incoterm} pricing structure for {commodity} to {location} includes ${price} freight component on {vessel}.",
            "Working capital improves using {incoterm} as {company} receives payment upon {commodity} shipment at {location}.",
            "Letter of credit terms specify {incoterm} {location} delivery of {commodity} with {payment_term} settlement.",
            "Currency hedging strategy differs for {commodity} sold {incoterm1} versus {incoterm2} to {location} market.",
            
            "Asian {commodity} buyers typically prefer {incoterm} terms while European buyers favor {incoterm2} for {location} delivery.",
            "The {location} port traditionally trades {commodity} on {incoterm} basis reflecting local market practices.",
            "Middle Eastern {commodity} importers shifting from {incoterm1} to {incoterm2} terms through {location} hub.",
            "Latin American {commodity} exports predominantly use {incoterm} terms from {location} reflecting infrastructure constraints."
        ]
        
    def _commodity_focused_templates(self):
        """Extended templates emphasizing commodities"""
        return [
            "{vessel} loaded {quantity} {unit} of {commodity} at {location} for discharge at {location2}.",
            "The {commodity} cargo requires special handling due to its {quality_term} condition at {location}.",
            "{company} trades {commodity1}, {commodity2}, and {commodity3} from {location} globally.",
            "Premium {quality_term} {commodity} commanded ${price} per {unit} above standard grade at {location}.",
            "{vessel} carries specialty grade {commodity} requiring temperature control from {location} to {location2}.",
            "Break bulk {commodity} loaded using ship's gear onto {vessel} at {location} for {location2} delivery.",
            "Containerized {commodity} shipment totaling {quantity} {unit} distributed across {number} TEUs on {vessel}.",
            
            "{vessel} carries combination of {commodity1} in holds 1-3 and {commodity2} in holds 4-5 to {location}.",
            "Cargo manifest lists {commodity1} ({quantity1} {unit}), {commodity2} ({quantity2} {unit}), and {commodity3} for {location}.",
            "The {commodity1}/{commodity2} ratio optimizes {vessel} stability on voyage to {location}.",
            "Compatible stowage allows {commodity1} adjacent to {commodity2} on {vessel} to {location}.",
            "Segregation plan keeps {commodity1} separated from {commodity2} aboard {vessel} per IMO requirements.",
            "{vessel} loads {commodity1} as base cargo with {commodity2} and {commodity3} as supplementary parcels to {location}.",
            "Parcel tanker {vessel} carries {commodity1}, {commodity2}, and {commodity3} in segregated tanks to {location}.",
            
            "The {commodity} shipment meets specification with moisture content below {number}% for {location} delivery.",
            "{quality_term} grade {commodity} with high purity loaded on {vessel} at {location}.",
            "{company} guarantees {commodity} quality for {incoterm} {location} delivery.",
            "Laboratory analysis confirms {commodity} meets contract specs before loading on {vessel} at {location}.",
            "{commodity} requires fumigation certificate for import into {location} with treatment completed before {vessel} departure.",
            "API gravity of {commodity} tested at {number} degrees meeting {location} refinery requirements for {vessel} cargo.",
            "The {commodity} shipment includes certificate of origin and quality from {inspection_type} at {location} load port.",
            
            "Spot {commodity} trades at ${price} {incoterm} {location}, up ${price_change} from last week.",
            "The {commodity} arbitrage between {location1} and {location2} widened to ${price} per {unit}.",
            "Forward sales of {commodity} for {month} delivery {incoterm} {location} closed at ${price}.",
            "Backwardation in {commodity} futures encourages prompt lifting ex {location} using vessels like {vessel}.",
            "The {commodity} basis at {location} strengthened to +${price} over futures reflecting tight physical supply.",
            "Options on {commodity} {incoterm} {location} for Q{quarter} delivery traded actively around ${price} strike.",
            "{company} hedged {commodity} price risk for {vessel} cargo using swaps linked to {location} benchmark.",
            
            "Peak {commodity} season drives demand for {vessel_type} tonnage at {location}.",
            "Off-season {commodity} shipments from {location} offer freight rate advantages on {vessel}.",
            "The {commodity} harvest at {location} region begins {month}, affecting shipping schedules.",
            "Counter-seasonal {commodity} trade between {location1} and {location2} provides year-round employment for {vessel}.",
            "Weather delays impacting {commodity} harvest at {location} create logistics bottlenecks for vessels like {vessel}.",
            "The {commodity} crushing season at {location} generates demand for {commodity2} imports via {vessel_type} vessels.",
            
            "The {commodity} supply chain from {location1} involves transport to {location2} for {vessel} loading.",
            "{commodity} requires fumigation at {location} before loading on {vessel} for {location2}.",
            "Dedicated {commodity} terminal at {location} handles {quantity} {unit} annually via vessels like {vessel}.",
            "Just-in-time delivery of {commodity} coordinates inland logistics with {vessel} arrival at {location} berth.",
            "{commodity} storage at {location} expanded to {quantity} {unit} capacity supporting larger {vessel_type} loadings.",
            "Rail constraints limit {commodity} delivery to {location} port affecting {vessel} loading rates.",
            "The {commodity} value chain from mine to {vessel} at {location} involves {number} handling stages.",
            
            "{commodity} cargo on {vessel} rejected at {location} due to {quality_term} contamination requiring disposal.",
            "Insurance claim filed for {commodity} quality deterioration during {vessel} voyage to {location}.",
            "The {commodity} cargo required reconditioning at {location} before acceptance following {inspection_type}.",
            "Moisture damage to {commodity} in hold {number} of {vessel} discovered during discharge at {location}.",
            
            "Reefer {commodity} maintained at -{number}°C throughout {vessel} voyage from {location1} to {location2}.",
            "Project {commodity} weighing {quantity} {unit} loaded using floating cranes onto {vessel} at {location}.",
            "Dangerous goods declaration required for {commodity} shipment on {vessel} under IMO class {number} to {location}.",
            "The {commodity} requires inert gas blanketing during {vessel} voyage to prevent oxidation en route {location}."
        ]
        
    def _trade_terms_templates(self):
        """Extended templates combining incoterms with commodity trades"""
        return [
            "The {commodity} sale involves {incoterm1} {location1} for main cargo and {incoterm2} {location2} for optional quantity.",
            "{company} structured {commodity} deal: buy {incoterm1} {location1}, sell {incoterm2} {location2} capturing ${price} margin.",
            "Three-way trade: {commodity} purchased {incoterm1} {location1}, shipped via {location2}, sold {incoterm2} {location3}.",
            "Back-to-back {commodity} contracts use {incoterm1} purchase at {location1} and {incoterm2} sale to {location2} via {vessel}.",
            "String deal for {commodity} involves multiple parties with title passing {incoterm} {location} while cargo on {vessel}.",
            "The {commodity} book-transfer at {location} avoids physical movement while changing from {incoterm1} to {incoterm2} terms.",
            
            "The {incoterm} terms for {commodity} specify seller provides documentation at {location}.",
            "Under {incoterm} {location}, {commodity} quality inspection occurs before risk transfer to buyer.",
            "{commodity} sold {incoterm} requires seller to arrange inspection at {location} load port.",
            "Documentary requirements for {commodity} under {incoterm} include {document_type} for customs clearance at {location}.",
            "The {incoterm} obligations for {commodity} extend to securing export license from {location} authorities.",
            "{company} responsible for {commodity} weighing under {incoterm} terms using {inspection_type} at {location}.",
            
            "Asian buyers prefer {incoterm} terms for {commodity} imports through {location} distribution hub.",
            "The {location} {commodity} market standard is {incoterm} pricing with {payment_term} payment terms.",
            "European Union {commodity} imports typically use {incoterm} terms reflecting uniform customs procedures at {location}.",
            "African {commodity} exports predominantly traded {incoterm} due to limited insurance availability at {location} ports.",
            "The {location} free zone allows {commodity} storage under {incoterm1} terms before conversion to {incoterm2} for re-export.",
            
            "LC terms for {commodity} specify {incoterm} {location} with {number} days usance from B/L date.",
            "The {incoterm} price includes {commodity} financing cost during {days}-day voyage to {location}.",
            "Prepayment discount available for {commodity} purchased {incoterm} {location} reducing price by {number}%.",
            "Bank financing for {commodity} requires {incoterm} terms ensuring title documents pass through {location} bank.",
            "The {commodity} factoring arrangement works best with {incoterm} sales providing clean payment points at {location}.",
            "Credit insurance for {commodity} trades covers {percentage}% of {incoterm} value including freight to {location}.",
            
            "The {commodity} contract allows switching between {incoterm1} and {incoterm2} based on freight market at {location}.",
            "Force majeure clause in {commodity} {incoterm} contract activated due to {location} port congestion affecting {vessel}.",
            "Optional origin clause permits {commodity} sourcing from {location1} or {location2} under same {incoterm} terms.",
            "The {commodity} pricing formula adjusts {incoterm} value based on quality at {location} discharge port.",
            
            "Laytime calculations for {commodity} differ under {incoterm1} versus {incoterm2} terms at {location} port.",
            "Demurrage responsibility for {commodity} shipment depends on whether {incoterm1} or {incoterm2} terms apply at {location}.",
            "The {incoterm} terms determine who arranges {commodity} surveys and sampling aboard {vessel} at {location}.",
            "Customs clearance delays for {commodity} at {location} impact differ under {incoterm1} versus {incoterm2} terms."
        ]
        
    def _multi_commodity_templates(self):
        """Extended templates featuring multiple commodities"""
        return [
            "{vessel} loads {commodity1} ({quantity1} {unit}), {commodity2} ({quantity2} {unit}), and {commodity3} at {location}.",
            "The {location} terminal simultaneously handles {commodity1} imports and {commodity2}/{commodity3} exports.",
            "Backhaul opportunity: {vessel} discharges {commodity1} at {location1}, loads {commodity2} for {location2}.",
            "Geared {vessel} handles both {commodity1} break bulk and {commodity2} in containers at {location} terminal.",
            "The {vessel} carries {commodity1} in lower holds with {commodity2} as deck cargo from {location} to {location2}.",
            "Combination carrier {vessel} alternates between {commodity1} and {commodity2} trades based on {location} market.",
            
            "{company} swaps {commodity1} at {location1} for {commodity2} at {location2} balancing regional needs.",
            "The commodity exchange involves {quantity1} {unit} {commodity1} for {quantity2} {unit} {commodity2} {incoterm} {location}.",
            "Physical swap of {commodity1} for {commodity2} executed at {location} storage facility avoiding transport costs.",
            "The {commodity1}-for-{commodity2} barter deal eliminates currency exposure for {company} trading through {location}.",
            "Exchange ratio of {number}:{number} established for {commodity1} to {commodity2} swap at {location} terminal.",
            
            "{location} refinery converts {commodity1} feedstock to {commodity2} and {commodity3} for export on {vessel}.",
            "The {commodity1} delivered {incoterm} {location} yields {quantity} {unit} of {commodity2} product.",
            "Integrated facility at {location} processes {commodity1} into {commodity2}, with {commodity3} as byproduct shipped via {vessel}.",
            "The {commodity1} crushing at {location} produces {commodity2} meal and {commodity3} oil for export markets.",
            "Chemical plant at {location} combines {commodity1} and {commodity2} creating {commodity3} for shipment on {vessel}.",
            "Value-added processing transforms raw {commodity1} into finished {commodity2} and {commodity3} at {location} for global distribution.",
            
            "{vessel} switches from {commodity1} in summer to {commodity2} in winter serving {location} route.",
            "The {location} port transitions from {commodity1} exports to {commodity2} imports as seasons change.",
            "Counter-cyclical trading of {commodity1} and {commodity2} through {location} optimizes {vessel} utilization year-round.",
            "Harvest patterns create {commodity1} exports from {location1} coinciding with {commodity2} imports at {location2}.",
            
            "The {location} logistics hub consolidates {commodity1}, {commodity2}, and {commodity3} for efficient {vessel} loading.",
            "Multi-modal transport brings {commodity1} by rail, {commodity2} by truck, and {commodity3} by barge to {location} for {vessel}.",
            "Storage constraints at {location} require coordinating {commodity1} and {commodity2} arrivals with {vessel} loading schedule.",
            "The {commodity1}, {commodity2}, and {commodity3} supply chains converge at {location} creating synergies for {vessel_type} operators.",
            
            "IMO regulations permit {commodity1} and {commodity2} in adjacent holds but require {commodity3} segregation on {vessel}.",
            "The {vessel} stowage plan accommodates {commodity1} aft, {commodity2} midships, and {commodity3} forward for {location} voyage.",
            "Compatible cargoes {commodity1} and {commodity2} share common areas while {commodity3} requires separate handling at {location}.",
            "Dangerous goods {commodity1} cannot be loaded with {commodity2} on {vessel} requiring separate departures from {location}."
        ]
        
    def _voyage_templates(self):
        """Extended voyage-related templates"""
        return [
            "{vessel} departed from {location1} bound for {location2} carrying {quantity} {unit} of {commodity}.",
            "The {vessel} is scheduled to arrive at {location} on {date} with a cargo of {commodity}.",
            "{vessel} completed loading {quantity} {unit} of {commodity} at {location} and sailed for {location2}.",
            "After {days} days at sea, {vessel} arrived at {location} with {commodity} cargo from {location2}.",
            "{vessel} transited via {location1} en route from {location2} to {location3} with {commodity} cargo.",
            "Weather routing diverted {vessel} carrying {commodity} {number} miles north of direct {location1}-{location2} route.",
            
            "{vessel} voyage plan optimizes fuel consumption carrying {commodity} {incoterm} from {location1} to {location2}.",
            "The {commodity}-laden {vessel} routes via {location1} to avoid weather, adding {days} days to {incoterm} delivery.",
            "Bunker strategy for {vessel} includes stops at {location1} and {location2} during {commodity} voyage to {location3}.",
            "The {vessel} master selected northern route to {location} saving {days} days despite longer distance with {commodity} cargo.",
            "ETA {location} for {vessel} carrying {commodity} adjusted to {date} following slow steaming for fuel economy.",
            "Voyage instructions for {vessel} specify maximum speed of {number} knots to ensure {commodity} arrives {location} within laycan.",
            
            "{vessel} triangular voyage: ballast to {location1}, load {commodity1} for {location2}, then {commodity2} to {location3}.",
            "Backhaul cargo of {commodity2} from {location2} improves {vessel} economics after {commodity1} discharge at {location1}.",
            "The {vessel} undertakes milk run calling {location1}, {location2}, and {location3} collecting {commodity} parcels.",
            "Tramping pattern takes {vessel} from {location1} with {commodity1} seeking best freight for next leg from {location2}.",
            "The {vessel} combines liner service to {location1} with tramp voyage to {location2} carrying {commodity}.",
            
            "Bunker consumption for {vessel} totals {quantity} {unit} fuel oil on {commodity} voyage from {location1} to {location2}.",
            "Time charter equivalent for {vessel} reaches ${price}/day on {commodity} voyage from {location1} to {location2}.",
            "The {vessel} generates ${price} daily profit carrying {commodity} on {location1}-{location2} route at current rates.",
            "Voyage estimate shows {vessel} nets ${price} carrying {commodity} {incoterm} from {location1} to {location2}.",
            
            "{vessel} port rotation includes {location1} (load {commodity1}), {location2} (part discharge), {location3} (final discharge).",
            "Multi-port discharge of {commodity} from {vessel} scheduled for {location1} ({quantity1} {unit}) and {location2} ({quantity2} {unit}).",
            "The {vessel} calls {location1} for bunkers only before proceeding to {location2} to load {commodity}.",
            "Deviation to {location1} added to {vessel} voyage for {commodity} cargo pickup increasing total distance by {number} miles.",
            
            "Piracy threat diverts {vessel} carrying {commodity} around {location1} adding {days} days to {location2} voyage.",
            "Main engine problems force {vessel} with {commodity} cargo to seek refuge at {location1} delaying {location2} arrival.",
            "The {vessel} avoided {location1} strait congestion by routing via {location2} with {commodity} cargo for {location3}.",
            "Quarantine requirements at {location} delay {vessel} carrying {commodity} by {days} days pending clearance.",
            
            "The {vessel} switches to low-sulfur fuel entering {location} ECA zone while carrying {commodity} to {location2}.",
            "Ballast water exchange conducted by {vessel} {number} miles offshore before loading {commodity} at {location}.",
            "Green shipping corridor from {location1} to {location2} offers {vessel} carrying {commodity} reduced port fees.",
            "The {vessel} slow steams at {number} knots reducing emissions on {commodity} voyage from {location1} to {location2}."
        ]
        
    def _commercial_templates(self):
        """Extended commercial and trading templates"""
        return [
            "{company} arbitrages {commodity} between {location1} {incoterm1} and {location2} {incoterm2} markets using {vessel}.",
            "Physical {commodity} premium over paper drives {company} to fix {vessel} for prompt {location} loading.",
            "Contango in {commodity} market encourages {company} to store at {location} using floating storage on {vessel}.",
            "The {commodity} squeeze at {location} forces {company} to charter {vessel} at premium rates for urgent lifting.",
            "Spread trading between {commodity1} and {commodity2} at {location} generates profits for {company} using {vessel_type} tonnage.",
            
            "{company} negotiates {commodity} volume discount: {quantity1} {unit} at ${price1}, {quantity2} {unit} at ${price2} {incoterm} {location}.",
            "Take-or-pay {commodity} contract requires {quantity} {unit} monthly liftings {incoterm} {location} on vessels like {vessel}.",
            "The {commodity} tender from {company} awards {quantity} {unit} split between {location1} and {location2} delivery.",
            "Long-term {commodity} contract includes price escalation clause linked to {location} freight index for {vessel_type} vessels.",
            "Evergreen {commodity} agreement automatically renews unless {company} gives {number} days notice for {location} deliveries.",
            "{company} exercises option to extend {commodity} contract for additional {quantity} {unit} at same {incoterm} {location} terms.",
            
            "{vessel} fixtures for {commodity} indicate tightening freight market ex {location} for {month} positions.",
            "The {commodity} cargo flow data shows {percentage}% increase from {location1} to {location2} route.",
            "Satellite tracking reveals {number} {vessel_type} vessels queuing at {location} to load {commodity}.",
            "Port congestion at {location} averages {days} days for {commodity} vessels impacting {incoterm} delivery schedules.",
            "The {commodity} stock levels at {location} dropped to {quantity} {unit}, lowest in {number} months, supporting firm prices.",
            
            "{company} undercuts competitor by ${price}/ton on {commodity} {incoterm} {location} capturing {vessel} cargo.",
            "Market share battle for {commodity} at {location} intensifies as {company} offers better payment terms than rivals.",
            "The {commodity} pricing war at {location} benefits ship operators like {vessel} owner with increased cargo volumes.",
            "Vertical integration allows {company} to control {commodity} from source through {vessel} transport to {location} delivery.",
            
            "{company} hedges {commodity} price risk using {location} futures while fixing {vessel} for physical delivery.",
            "Portfolio approach combines spot and term {commodity} purchases {incoterm} {location} optimizing price and availability.",
            "The {commodity} supply disruption at {location1} triggers force majeure allowing {company} to source from {location2}.",
            "Insurance coverage for {commodity} includes both price and freight risk on {vessel} voyage to {location}.",
            
            "Structured finance deal uses {commodity} cargo on {vessel} as collateral for {company} working capital at {location}.",
            "The {commodity} prepayment deal provides {percentage}% discount for {company} paying before {vessel} loading at {location}.",
            "Credit terms extended from {days} to {number} days for {commodity} {incoterm} {location} reflecting market competition.",
            "Factoring arrangement monetizes {commodity} receivables immediately upon {vessel} loading at {location} for {company}."
        ]
        
    def _operations_templates(self):
        """Extended operational templates"""
        return [
            "{terminal} at {location} handled {vessel} carrying {commodity}.",
            "Stevedoring operations for {commodity} on {vessel} commenced at {location}.",
            "{vessel} required {days} days to discharge {commodity} at {location}.",
            "Port congestion at {location} delays {vessel} berthing with {commodity} cargo by {days} days.",
            "Night shift approved for {commodity} operations on {vessel} at {location} to meet deadline.",
            "The {location} port authority prioritized {vessel} carrying essential {commodity} for immediate berthing.",
            
            "The {commodity} discharge rate from {vessel} at {location} reached {quantity} {unit} per day.",
            "{vessel} uses shore cranes at {location} to load {commodity}.",
            "Night operations for {commodity} on {vessel} approved at {location} terminal.",
            "Grab discharge of {commodity} from {vessel} achieves {quantity} {unit} per hour at {location} terminal.",
            "Ship's gear sufficient for {commodity} operations as {location} lacks suitable shore equipment for {vessel}.",
            "Conveyor belt system loads {commodity} onto {vessel} at {quantity} {unit} per hour at {location} terminal.",
            "The {commodity} trimming in {vessel} holds required {days} extra days at {location} affecting schedule.",
            
            "The {location} terminal's new equipment reduces {commodity} loading time for {vessel_type} vessels by {percentage}%.",
            "Automated {commodity} handling at {location} allows {vessel} to complete operations in {days} days versus {number} previously.",
            "The {terminal} invested ${price} million upgrading {commodity} handling for vessels like {vessel} at {location}.",
            "Berth productivity for {commodity} at {location} averages {quantity} {unit} per day for {vessel_type} vessels.",
            
            "Rain stopped {commodity} loading on {vessel} at {location} for {days} days per charter party terms.",
            "Force majeure declared as cyclone prevents {vessel} from loading {commodity} at {location} terminal.",
            "High winds exceed safe working limits halting {commodity} operations on {vessel} at {location}.",
            "The {location} terminal's covered storage allows {commodity} operations to continue during monsoon for {vessel}.",
            
            "The {vessel} loads {commodity} to draft of {number}m considering {location} port restrictions.",
            "Tidal window restricts {vessel} carrying {commodity} to high water departure from {location}.",
            "Draft survey before and after {commodity} loading confirms {quantity} {unit} loaded on {vessel} at {location}.",
            "The {location} channel depth limits {vessel} to {percentage}% of maximum {commodity} cargo capacity.",
            
            "The {vessel} receives free pratique at {location} allowing {commodity} discharge to commence.",
            "Customs clearance delay for {commodity} on {vessel} at {location} incurs ${price} per day storage charges.",
            "Phytosanitary inspection of {commodity} cargo passes enabling {vessel} discharge at {location}.",
            "Documentation discrepancy delays {commodity} discharge from {vessel} at {location} pending amendment.",
            
            "Stevedore damage to {commodity} during {vessel} discharge at {location} triggers insurance claim.",
            "The {commodity} spillage during loading onto {vessel} at {location} requires environmental cleanup.",
            "Cargo gear failure on {vessel} necessitates shore crane hire for {commodity} discharge at {location}.",
            "The {commodity} contamination discovered during {vessel} loading at {location} results in cargo rejection."
        ]
        
    def _logistics_templates(self):
        """Extended logistics templates"""
        return [
            "Rail delivery of {commodity} to {location} port timed for direct loading onto {vessel} minimizing storage.",
            "The {commodity} moves by barge to {location1} for transshipment to ocean vessel {vessel} bound {location2}.",
            "Truck arrivals of {commodity} coordinated with {vessel} loading windows at {location} reducing congestion.",
            "Pipeline delivery of {commodity} directly to {vessel} at {location} eliminates intermediate handling.",
            "The {commodity} arrives via {number} rail cars daily matching {vessel} loading rate at {location} port.",
            
            "JIT {commodity} delivery coordinates suppliers with {vessel} loading schedule at {location}.",
            "The {vessel} arrival triggers {commodity} release from storage avoiding demurrage at {location}.",
            "Zero-inventory approach times {commodity} production with {vessel} loading at {location} eliminating storage costs.",
            "The {commodity} supply chain synchronizes mine output with {vessel} arrivals at {location} export terminal.",
            "Cross-docking operation transfers {commodity} directly from rail to {vessel} at {location} without warehousing.",
            
            "End-to-end visibility tracks {commodity} from origin through {vessel} voyage to {location} final delivery.",
            "The {commodity} supply chain integrates {number} suppliers for consolidated shipment on {vessel} from {location}.",
            "Vendor-managed inventory ensures continuous {commodity} supply for {vessel} loadings at {location}.",
            "Collaborative planning optimizes {commodity} flow from multiple origins to {vessel} at {location} hub.",
            
            "Blockchain tracks {commodity} from origin through {vessel} voyage to final {location} delivery {incoterm}.",
            "IoT sensors monitor {commodity} condition during inland transport and {vessel} voyage to {location}.",
            "Digital twin of {commodity} supply chain predicts optimal {vessel} scheduling at {location} port.",
            "AI optimizes {commodity} stowage plan for {vessel} considering {location} discharge sequence.",
            "Real-time tracking app shows {commodity} location from warehouse through {vessel} voyage to {location} buyer.",
            "Smart contracts execute payment when {commodity} loaded on {vessel} reaches agreed milestone at {location}.",
            
            "Road weight limits restrict {commodity} truck deliveries to {location} port affecting {vessel} loading rate.",
            "Single rail track creates bottleneck for {commodity} delivery to {location} limiting {vessel_type} vessel loadings.",
            "The {location} port's shallow draft requires lightering {commodity} from {vessel} to reach berth.",
            "Storage capacity shortage at {location} forces direct {commodity} loading onto {vessel} from arriving transport.",
            
            "Electric trucks deliver {commodity} to {location} supporting green port initiative for vessels like {vessel}.",
            "The {commodity} carbon footprint reduced by {percentage}% using rail instead of road to {location} for {vessel} loading.",
            "Shore power at {location} allows {vessel} to shut down engines during {commodity} loading reducing emissions.",
            "Biofuel-powered {vessel} carries {commodity} on carbon-neutral voyage from {location1} to {location2}.",
            
            "Force majeure logistics plan activates moving {commodity} via {location1} after {location2} closure affecting {vessel}.",
            "Emergency {commodity} stockpile at {location} ensures {vessel} loading continues despite supply disruption.",
            "Contingency routing diverts {commodity} through {location1} when primary route to {location2} blocked for {vessel}.",
            "The {commodity} crisis response mobilizes additional {vessel_type} tonnage to clear backlog at {location}."
        ]
        
    def _market_report_templates(self):
        """Market report style templates"""
        return [
            "Freight rates for {vessel_type} vessels carrying {commodity} from {location1} to {location2} firmed to ${price}/ton amid tight tonnage availability. {company} fixed {vessel} for {month} laycan at ${price} daily hire reflecting {market_condition}.",
            
            "{commodity} freight market sentiment remains {market_condition} with {location} exports driving demand for {vessel_type} tonnage. Recent fixtures include {vessel} at ${price}/ton basis {incoterm} delivery to {location2}, representing a {percentage}% increase week-on-week.",
            
            "The {location1} to {location2} {commodity} route saw increased activity with {number} fixtures concluded this week. {vessel} fixed at ${price} per ton {incoterm} {location2}, while period business remains firm with {company} taking {vessel_type} tonnage for {number} months.",
            
            "{commodity} exports from {location} expected to reach {quantity} {unit} in {month}, up {percentage}% year-on-year. The increased volume supports {vessel_type} demand with {company} and competitors actively seeking tonnage. {vessel} fixed for {incoterm} {location2} delivery at firm rates reflecting supply tightness.",
            
            "Seasonal demand for {commodity} shipments from {location1} materializing earlier than expected with {company} securing {vessel} for {quantity} {unit} cargo to {location2}. Market participants anticipate further rate increases as {commodity2} export season overlaps creating vessel shortage.",
            
            "The {location} {commodity} export market faces logistics constraints with rail deliveries {percentage}% below normal. Despite this, {vessel} secured full cargo of {quantity} {unit} for {location2} discharge. Terminal congestion averaging {days} days affects vessel turnaround with implications for {incoterm} delivery schedules.",
            
            "{location1} emerged as alternative source for {commodity} following production issues at traditional supplier {location2}. {company} redirected {vessel} to load {quantity} {unit} at competitive {incoterm} terms. This shift in trade flows expected to support {vessel_type} employment through {month}.",
            
            "{commodity} arbitrage between {location1} and {location2} widened to ${price}/ton creating opportunities for physical traders. {company} capitalized by fixing {vessel} for prompt loading with {incoterm1} purchase and {incoterm2} sale. The {quantity} {unit} cargo expected to generate ${price} profit after freight costs."
        ]
        
    def _contract_negotiation_templates(self):
        """Contract negotiation scenarios"""
        return [
            "During protracted negotiations for {commodity} supply, {company} initially proposed {incoterm1} {location1} terms at ${price1}/ton. After several rounds, parties agreed to {incoterm2} {location2} at ${price2}/ton with {payment_term} and quality specifications allowing {percentage}% tolerance. {vessel} nominated for first lifting of {quantity} {unit} in {month}.",
            
            "The {commodity} tender evaluation by {company} compared multiple offers: {incoterm1} {location1} at ${price1}, {incoterm2} {location2} at ${price2}, and {incoterm3} {location3} at ${price}. Final award went to lowest evaluated bid considering freight to final destination with {vessel_type} vessels like {vessel} suitable for the route.",
            
            "Multi-year {commodity} offtake agreement between producer at {location1} and {company} includes volume flexibility of {quantity1}-{quantity2} {unit} annually. Pricing mechanism links to {location2} benchmark with {incoterm} delivery terms. First cargo of {quantity} {unit} scheduled on {vessel} for {month} lifting.",
            
            "Commercial dispute arose when {commodity} quality on {vessel} failed specifications at {location} discharge port. After {inspection_type} confirmed {percentage}% moisture versus {number}% contract maximum, parties negotiated ${price} price reduction. {company} accepted delivery under protest preserving rights while maintaining relationship for future {incoterm} {location2} business."
        ]
        
    def _risk_management_templates(self):
        """Risk management focused templates"""
        return [
            "{company} required letter of credit for {commodity} sale {incoterm} {location} after buyer's credit rating downgrade. The ${price} transaction for {quantity} {unit} on {vessel} proceeded with confirmed LC from first-class bank providing payment security. Additional credit enhancement included {percentage}% performance bond.",
            
            "Risk assessment for {commodity} shipment identified potential delays at {location1} load port and {location2} discharge. Mitigation measures included {vessel} with self-gearing capability, {loading_term} charter terms, and alternative discharge option at {location3}. {company} also secured cargo insurance covering {percentage}% of {incoterm} value.",
            
            "Hedging strategy for {commodity} cargo on {vessel} combined futures and options to protect ${price} margin on {incoterm} {location} sale. {company} bought put options at ${price1} strike while selling calls at ${price2} creating collar structure. Physical delivery of {quantity} {unit} matched hedge volume reducing basis risk.",
            
            "Sanctions compliance check revealed {vessel} previous call at restricted {location1} requiring enhanced due diligence. {company} obtained legal clearance before loading {commodity} at {location2} for {location3} discharge. Documentation included certificates confirming {commodity} origin and beneficial ownership satisfying {incoterm} obligations."
        ]
        
    def _regulatory_templates(self):
        """Regulatory and compliance templates"""
        return [
            "IMO 2020 sulfur regulations impact {commodity} shipping costs from {location1} to {location2} as {vessel} burns compliant fuel. The additional ${price}/ton reflects in {incoterm} pricing with {company} absorbing partial cost to remain competitive. Alternative using scrubber-fitted {vessel_type} vessels under consideration for future fixtures.",
            
            "Ballast water management compliance required {vessel} to install treatment system before loading {commodity} at {location}. The ${price} investment by shipowner reflected in higher freight rates for {incoterm} deliveries. {company} evaluating fleet modernization impact on long-term {commodity} transportation costs.",
            
            "New import regulations at {location} require additional {commodity} documentation including {document_type} and certificate of origin. {company} adjusted {incoterm} procedures ensuring compliance for {vessel} cargo of {quantity} {unit}. The regulatory change adds {days} days to clearance but provides better supply chain transparency.",
            
            "Export license requirements for {commodity} from {location1} tightened affecting {vessel} loading schedule. {company} secured permits for {quantity} {unit} but future shipments face uncertainty. Contingency plans include alternative sourcing from {location2} under similar {incoterm} terms to maintain supply reliability.",
            
            "SOLAS requirements for {commodity} weight verification implemented at {location} affecting loading procedures. {vessel} received verified gross mass documentation for {quantity} {unit} cargo before departure. {company} invested in certified weighing equipment to comply with {incoterm} delivery obligations and avoid shipment delays."
        ]
        
    def _paragraph_report_templates(self):
        """Longer paragraph-style templates for more complex scenarios"""
        return [
            "The {commodity} market dynamics at {location1} reflect broader supply chain challenges facing the industry. Recent fixtures including {vessel} for {quantity} {unit} at ${price} {incoterm} {location2} demonstrate firming rates amid {market_condition}. {company} strategically positioned tonnage ahead of peak season, securing {vessel_type} vessels for {month} liftings. Port congestion averaging {days} days creates additional pressure on vessel supply. Market participants expect rates to remain supported through {month} as {commodity2} exports compete for same vessel capacity. The {location1} to {location3} route particularly affected with {percentage}% increase in freight costs impacting delivered prices under {incoterm2} terms.",
            
            "{vessel} arrived {location} to load {commodity} facing multiple operational challenges requiring careful coordination. Initial draft survey revealed berth limitations necessitating partial loading of {quantity1} {unit} before shifting to deeper berth for remaining {quantity2} {unit}. Weather delays during {loading_term} period extended port stay by {days} days impacting laycan compliance. {company} arranged {inspection_type} throughout operations ensuring {commodity} quality met {incoterm} {location2} contractual specifications. Despite complications, vessel completed loading {quantity} {unit} total cargo maintaining {quality_term} grade through proper handling procedures. The experience highlighted importance of operational flexibility when executing {commodity} shipments from infrastructure-constrained ports.",
            
            "Traditional {commodity} trade flows from {location1} to {location2} face disruption as new suppliers emerge and infrastructure develops. {company} previously relied exclusively on {location1} for {commodity} requirements under long-term {incoterm1} contracts. However, recent developments at {location3} including new {terminal} with {quantity} {unit} annual capacity create alternatives. Test shipment of {quantity1} {unit} on {vessel} proved viable with competitive quality and freight rates. The shift allows {company} to diversify supply sources, negotiating better {incoterm2} terms through competition. Market impact includes {percentage}% reduction in {location1} export premiums and increased {vessel_type} vessel demand on emerging routes. Long-term implications suggest permanent alteration of established {commodity} trade patterns affecting vessel deployment and port investments.",
            
            "Charter party negotiations for {vessel} to carry {commodity} from {location1} revealed divergent interests requiring creative solutions. Owners insisted on {loading_term} terms protecting against port delays while {company} sought {number} hours loading time given terminal constraints. Compromise reached with scaled demurrage rates: ${price1} daily for first {days} days, increasing to ${price2} thereafter. Additional clauses addressed {commodity} moisture content, authorizing rejection if exceeding {percentage}% at load port. Force majeure provisions expanded beyond standard language recognizing {location1} infrastructure vulnerabilities. The {incoterm} nature of underlying sale required careful alignment between charter party and sales contract terms. Final agreement balanced risk allocation enabling profitable {quantity} {unit} shipment to {location2} while protecting both parties' commercial interests.",
            
            "Integration of {commodity} supply chain from mine to market demonstrates modern logistics complexity requiring sophisticated coordination. {company} manages flow from extraction at inland {location1} through rail transport to {location2} port for ocean shipment to {location3}. Each stage presents unique challenges: rail capacity constraints limit daily tonnage to {quantity1} {unit}, while port storage of {quantity2} {unit} provides buffer for vessel arrivals. {vessel} loading depends on tide conditions and terminal availability, typically achieving {quantity} {unit} daily rate. The {incoterm} sale terms to end buyers require precise timing as delays cascade through system. Technology deployment including real-time tracking and predictive analytics helps optimize throughput. Recent performance improved {percentage}% through better integration reducing delivered costs despite rising freight markets. Success attracts competitors seeking to replicate model on other {commodity2} routes.",
            
            "Comprehensive risk analysis for {commodity} shipments from {location1} via {vessel_type} vessels reveals multiple vulnerability points requiring mitigation strategies. Political instability at {location1} threatens supply continuity prompting {company} to maintain strategic inventory at {location2}. Weather patterns particularly during {month} create {percentage}% probability of port closure exceeding {days} days. {vessel} mechanical reliability statistics show {number}% breakdown risk on long voyages necessitating newer tonnage selection. Price volatility between {incoterm1} purchase and {incoterm2} sale exposes margin to market movements. Quality degradation during {weather_condition} requires moisture monitoring and ventilation management. Credit exposure to buyers reaches ${price} requiring insurance or LC coverage. Successful traders like {company} develop playbooks addressing each risk through contractual provisions, operational procedures, and financial instruments. The complexity justifies {percentage}% risk premium built into {commodity} pricing models.",
            
            "The journey toward environmental compliance in {commodity} shipping illustrates industry transformation challenges and opportunities. {vessel} represents new generation of eco-friendly tonnage meeting IMO emissions targets while maintaining commercial viability on routes like {location1} to {location2}. Investments include scrubber technology, ballast water treatment, and energy efficiency measures totaling ${price} per vessel. {company} committed to carbon neutrality by partnering with green operators despite {percentage}% higher freight costs. The {commodity} cargo of {quantity} {unit} generates detailed emissions reporting throughout supply chain. Customers increasingly demand sustainability credentials affecting {incoterm} pricing negotiations. Port authorities at {location2} offer green incentives reducing fees for compliant vessels. Market differentiates between standard and green logistics creating two-tier structure. Forward-thinking participants position for regulatory changes while maintaining competitiveness. The transition period creates opportunities for prepared operators while challenging traditional business models.",
            
            "Sophisticated market intelligence drives profitable {commodity} trading decisions in increasingly transparent markets. {company} deploys multiple information sources tracking vessel positions, port congestion, weather patterns, and commodity flows. Recent analysis identified emerging shortage of {vessel_type} tonnage suitable for {commodity} from {location1} before market consensus formed. Early positioning secured {vessel} at ${price} daily rate before {percentage}% spike within weeks. Satellite imagery revealed {commodity2} stockpile buildups at {location2} indicating future shipping demand. Port agent networks provide real-time updates on berth availability and loading rates. Weather routing services optimize voyage planning reducing costs and improving {incoterm} delivery reliability. Algorithmic analysis of freight fixtures identifies patterns predicting rate movements. Information advantage translates to commercial success with {company} outperforming market benchmarks. Investment in intelligence capabilities provides returns multiples of cost through better timing and risk management.",
            
            "Major infrastructure upgrades at {location} transform {commodity} logistics creating opportunities and disrupting established patterns. New deepwater berth accommodates {vessel_type} vessels up to {number} meters draft compared to previous {number} meter limitation. Automated handling systems increase loading rates to {quantity} {unit} daily reducing port stays and improving vessel utilization. {company} leverages improvements renegotiating {incoterm} terms with buyers reflecting lower freight costs. Expanded storage capacity of {quantity2} {unit} enables stockpiling during low freight markets. Rail connections upgraded to deliver {quantity1} {unit} daily matching vessel loading capabilities. The ${price} million investment attracts new market participants increasing competition for traditional players. {vessel} represents first of larger vessels calling regularly carrying {percentage}% more {commodity} per voyage. Efficiency gains reduce delivered costs to {location2} by ${price} per ton affecting regional pricing dynamics. Success prompts similar developments at competing ports as facilities race to capture growing {commodity2} trade volumes.",
            
            "Digital transformation reshapes {commodity} trading and logistics challenging traditional intermediary roles while creating new opportunities. Blockchain platform connects {location1} producers directly with {location2} consumers eliminating multiple transaction layers. Smart contracts execute automatically when {vessel} loading confirmed by IoT sensors reducing documentation delays and payment risks. {company} adapted business model from pure trading to technology-enabled logistics provider. Digital twin of supply chain simulates scenarios optimizing decisions like {vessel_type} selection and routing via {location3}. Artificial intelligence predicts equipment failures preventing costly {commodity} contamination incidents. Real-time visibility attracts customers willing to pay premium for supply chain transparency under {incoterm} terms. Traditional players struggle with technology adoption creating market share opportunities for digital natives. Investment requirements reach ${price} for full platform deployment but returns justify expenditure through efficiency gains. The transformation continues accelerating as stakeholders demand instant information and seamless execution. Early adopters like {company} establish competitive advantages difficult for followers to overcome."
        ]
        
    def _complex_scenario_templates(self):
        """Very complex, multi-faceted scenario templates"""
        return [
            "{company} orchestrated complex {commodity} transaction involving multiple parties across {number} jurisdictions. Initial purchase from {location1} producer at {incoterm1} terms required bridging finance of ${price} while organizing ocean transportation. {vessel} chartered from Greek owners faced detention at {location2} requiring diplomatic intervention. Simultaneously, {commodity2} backhaul arranged from {location3} optimizing voyage economics. Final delivery to {location4} buyer under {incoterm2} terms complicated by new import regulations requiring additional {document_type}. Payment structure involved three banks with standby LC covering performance risk. Despite challenges, transaction generated ${price} profit demonstrating value of integrated logistics capabilities. Experience gained positions {company} for similar opportunities in growing {commodity3} trade between regions.",
            
            "When {vessel} suffered major casualty carrying {commodity} cargo worth ${price} million, crisis management protocols activated immediately. Initial distress call from {location1} waters triggered emergency response involving salvage tugs, coast guard, and P&I club representatives. {company} as cargo owner faced multiple challenges: securing alternative vessel for replacement {commodity} shipment to honor {incoterm} {location2} contract, managing insurance claims for {quantity} {unit} cargo, coordinating with environmental authorities regarding potential pollution, and maintaining customer relationships despite force majeure event. Salvage operation successfully prevented total loss but {percentage}% of {commodity} damaged by seawater ingress. Legal complexities included General Average declaration requiring securities from all cargo interests. Alternative shipment via {vessel2} arranged within {days} days minimizing supply disruption. Insurance recovery expected to cover {percentage}% of losses after deductibles. Incident highlighted importance of comprehensive risk management and emergency preparedness in maritime trade.",
            
            "Unusual {commodity} price movements at {location} prompted investigation revealing sophisticated market manipulation scheme. Analysis showed coordinated buying by related entities artificially inflating spot prices {percentage}% above fundamentals. {company} identified pattern through freight fixture analysis noting same parties chartering multiple {vessel_type} vessels for phantom cargoes. Regulatory authorities alerted leading to formal investigation freezing {quantity} {unit} at {location} terminal. Legitimate traders like {company} faced windfall losses on {incoterm} contracts priced before manipulation discovered. {vessel} already loaded found cargo unmarketable at destination {location2} requiring expensive deviation to {location3}. Legal proceedings continue with potential claims exceeding ${price} million. Market confidence shaken affecting forward liquidity and increasing risk premiums. Episode demonstrates vulnerability of physical markets to manipulation and importance of robust compliance procedures. Industry associations reviewing rules to prevent recurrence while maintaining market efficiency.",
            
            "Catastrophic IT system failure at {location} port paralyzed operations affecting global {commodity} supply chains. The ransomware attack encrypted critical databases preventing vessel berthing assignments, cargo documentation processing, and equipment allocation. {vessel} among {number} ships anchored awaiting resolution while {commodity} inventories accumulated onshore. {company} faced cascading impacts: {incoterm} delivery obligations at risk, demurrage claims mounting at ${price} daily per vessel, quality degradation of moisture-sensitive {commodity} in storage, and force majeure negotiations with counterparties. Manual workarounds enabled limited operations prioritizing perishable {commodity2} over dry bulk. Recovery took {days} days with permanent data loss affecting historical records. Financial impact exceeded ${price} million across supply chain participants. Incident accelerated cybersecurity investments and offline backup procedures. {company} diversified port exposure reducing single point failure risks in future operations. Industry-wide lessons improved resilience against similar attacks.",
            
            "Sudden geopolitical realignment disrupted established {commodity} trade routes forcing rapid adaptation by market participants. Traditional flow from {location1} to {location2} severed by sanctions requiring {company} to restructure entire supply chain. Alternative sourcing from {location3} involved longer voyages, different {commodity} specifications, and unfamiliar counterparties. {vessel} already en route faced prohibition entering {location2} waters necessitating urgent cargo resale. New routing via {location4} added {days} days and ${price} per ton transport costs. {incoterm1} contracts required renegotiation to {incoterm2} terms reflecting changed risk allocation. Insurance markets repriced coverage with war risk premiums increasing {percentage}%. Quality differences between {location1} and {location3} {commodity} required downstream process adjustments. Financial hedges proved ineffective as correlation assumptions broke down. {company} successfully navigated transition maintaining market share through agility and relationship management. Long-term implications include permanently altered trade flows and new regional pricing benchmarks affecting {vessel_type} deployment patterns.",
            
            "Environmental catastrophe at {location1} following {commodity} terminal explosion required coordinated industry response protecting ecological and commercial interests. Initial blast destroyed {quantity} {unit} storage capacity and damaged {vessel} during loading operations. Toxic plume threatened nearby {location2} forcing evacuation and halting all port operations. {company} activated emergency protocols: diverting incoming {vessel2} carrying {commodity} to alternative {location3}, arranging environmental cleanup specialists, coordinating with authorities on damage assessment, managing media communications protecting reputation, and supporting affected communities through relief funds. Investigation revealed safety violations prompting industry-wide review of {commodity} handling procedures. Insurance complexities included determining coverage between marine and onshore policies. Business interruption lasted {days} days with total losses exceeding ${price} million. Recovery required rebuilding physical infrastructure and community trust. New regulations mandate additional safety measures increasing operational costs but improving risk profile. {company} emerged with enhanced reputation through responsible crisis management while competitors faced litigation."
        ]
        
    def _generate_random_values(self):
        """Generate random values for template filling"""
        return {
            'vessel': self._get_random_vessel(),
            'vessel1': self._get_random_vessel(),
            'vessel2': self._get_random_vessel(),
            'location': self._get_random_location(),
            'location1': self._get_random_location(),
            'location2': self._get_random_location(), 
            'location3': self._get_random_location(),
            'location4': self._get_random_location(),
            'commodity': self._get_random_commodity(),
            'commodity1': self._get_random_commodity(),
            'commodity2': self._get_random_commodity(),
            'commodity3': self._get_random_commodity(),
            'incoterm': self._get_random_incoterm(),
            'incoterm1': self._get_random_incoterm(),
            'incoterm2': self._get_random_incoterm(),
            'incoterm3': self._get_random_incoterm(),
            'quantity': random.randint(5000, 200000),
            'quantity1': random.randint(5000, 100000),
            'quantity2': random.randint(5000, 100000),
            'unit': random.choice(QUANTITY_UNITS),
            'price': random.randint(10, 1000),
            'price1': random.randint(10, 500),
            'price2': random.randint(10, 500),
            'price_change': random.randint(1, 50),
            'percentage': random.randint(1, 100),
            'number': random.randint(1, 100),
            'days': random.randint(1, 90),
            'months': random.randint(1, 12),
            'date': self._generate_random_date(),
            'month': random.choice(MONTHS),
            'quarter': random.randint(1, 4),
            'company': random.choice(SHIPPING_COMPANIES),
            'terminal': random.choice(TERMINAL_OPERATORS),
            'quality_term': random.choice(QUALITY_TERMS),
            'vessel_type': random.choice(VESSEL_TYPES),
            'payment_term': random.choice(PAYMENT_TERMS),
            'country': random.choice(COUNTRIES),
            'charter_type': random.choice(CHARTER_TYPES),
            'loading_term': random.choice(LOADING_TERMS),
            'weather_condition': random.choice(WEATHER_CONDITIONS),
            'market_condition': random.choice(MARKET_CONDITIONS),
            'inspection_type': random.choice(INSPECTION_TYPES),
            'document_type': random.choice(DOCUMENT_TYPES)
        }
        
    def _get_random_vessel(self):
        """Get random vessel with prefix"""
        vessel = random.choice(self.vessels)
        if random.random() < 0.7:
            prefix = random.choice(VESSEL_PREFIXES)
            return f"{prefix} {vessel}"
        return vessel
        
    def _get_random_location(self):
        """Get random location"""
        return random.choice(self.locations)
        
    def _get_random_commodity(self):
        """Get random commodity"""
        return random.choice(COMMODITIES)
        
    def _get_random_incoterm(self):
        """Get random incoterm"""
        return random.choice(INCOTERMS)
        
    def _generate_random_date(self):
        """Generate random date"""
        days_ahead = random.randint(1, 180)
        future_date = datetime.now() + timedelta(days=days_ahead)
        return future_date.strftime("%Y-%m-%d")
        
    def _fill_template(self, template):
        """Fill template with random values"""
        values = self._generate_random_values()
        
        try:
            sentence = template.format(**values)
            return sentence, values
        except KeyError as e:
            print(f"Missing key in template: {e}")
            return None, None
            
    def _extract_entities_from_sentence(self, sentence, context):
        """Extract ONLY entities that actually appear in the sentence"""
        entities = []
        
        entity_mapping = {
            'vessel': 'vessel',
            'vessel1': 'vessel', 
            'vessel2': 'vessel',
            'location': 'location',
            'location1': 'location',
            'location2': 'location',
            'location3': 'location',
            'location4': 'location',
            'commodity': 'commodity',
            'commodity1': 'commodity',
            'commodity2': 'commodity',
            'commodity3': 'commodity',
            'incoterm': 'incoterm',
            'incoterm1': 'incoterm',
            'incoterm2': 'incoterm',
            'incoterm3': 'incoterm',
            'company': 'company',
            'terminal': 'company'
        }
        
        seen_entities = set()
        
        for key, value in context.items():
            if key in entity_mapping and isinstance(value, str):
                if value in sentence:
                    entity_text = str(value)
                    entity_type = entity_mapping[key]
                    entity_key = f"{entity_text} <> {entity_type}"
                    
                    if entity_key not in seen_entities:
                        seen_entities.add(entity_key)
                        entities.append(entity_key)
                        
        return list(seen_entities)
        
    def generate_dataset(self, num_samples=20000, min_entities_per_sample=2):
        """Generate dataset with specified number of samples"""
        dataset = []
        category_counts = defaultdict(int)
        failed_attempts = 0
        
        category_weights = {
            'incoterm_focused': 20,
            'commodity_focused': 20,
            'trade_terms': 15,
            'multi_commodity': 10,
            'voyage': 5,
            'commercial': 5,
            'operations': 5,
            'logistics': 5,
            'market_reports': 5,
            'contract_negotiations': 3,
            'risk_management': 3,
            'regulatory': 2,
            'paragraph_reports': 2,
            'complex_scenarios': 1
        }
        
        weighted_categories = []
        for category, weight in category_weights.items():
            weighted_categories.extend([category] * weight)
            
        print(f"Generating {num_samples} maritime sentences...")
        print(f"Using {len(self.vessels)} vessels and {len(self.locations)} locations")
        print(f"Categories available: {len(self.template_categories)}")
        
        with tqdm(total=num_samples) as pbar:
            while len(dataset) < num_samples:
                category = random.choice(weighted_categories)
                templates = self.template_categories.get(category, [])
                
                if not templates:
                    continue
                    
                template = random.choice(templates)
                sentence, context = self._fill_template(template)
                
                if not sentence:
                    failed_attempts += 1
                    continue
                    
                entities = self._extract_entities_from_sentence(sentence, context)
                
                if len(entities) >= min_entities_per_sample:
                    has_priority_entity = any(
                        'incoterm' in e or 'commodity' in e 
                        for e in entities
                    )
                    
                    is_paragraph = category in ['paragraph_reports', 'complex_scenarios', 'market_reports']
                    
                    if has_priority_entity or is_paragraph or random.random() < 0.3:
                        dataset.append({
                            'input': sentence,
                            'output': entities,
                            'category': category,
                            'length': len(sentence)
                        })
                        
                        category_counts[category] += 1
                        pbar.update(1)
                        
        print(f"\nGeneration complete! Failed attempts: {failed_attempts}")
        
        self._print_statistics(dataset, category_counts)
        
        return dataset
        
    def _print_statistics(self, dataset, category_counts):
        """Print comprehensive dataset statistics"""
        print("\n" + "="*60)
        print("DATASET STATISTICS")
        print("="*60)
        print(f"Total samples: {len(dataset)}")
        
        print("\nCategory distribution:")
        total_samples = len(dataset)
        for category, count in sorted(category_counts.items(), key=lambda x: x[1], reverse=True):
            percentage = (count / total_samples) * 100
            print(f"  {category}: {count} ({percentage:.1f}%)")
            
        lengths = [item['length'] for item in dataset]
        print(f"\nText length statistics:")
        print(f"  Average length: {sum(lengths)/len(lengths):.0f} characters")
        print(f"  Min length: {min(lengths)} characters")
        print(f"  Max length: {max(lengths)} characters")
        print(f"  Samples >500 chars: {len([l for l in lengths if l > 500])} ({len([l for l in lengths if l > 500])/total_samples*100:.1f}%)")
        print(f"  Samples >1000 chars: {len([l for l in lengths if l > 1000])} ({len([l for l in lengths if l > 1000])/total_samples*100:.1f}%)")
            
        entity_counts = defaultdict(int)
        total_entities = 0
        samples_with_incoterm = 0
        samples_with_commodity = 0
        samples_with_both = 0
        entity_per_sample = []
        
        for item in dataset:
            has_incoterm = False
            has_commodity = False
            
            entity_per_sample.append(len(item['output']))
            
            for entity in item['output']:
                total_entities += 1
                entity_type = entity.split(' <> ')[1]
                entity_counts[entity_type] += 1
                
                if entity_type == 'incoterm':
                    has_incoterm = True
                elif entity_type == 'commodity':
                    has_commodity = True
                    
            if has_incoterm:
                samples_with_incoterm += 1
            if has_commodity:
                samples_with_commodity += 1
            if has_incoterm and has_commodity:
                samples_with_both += 1
                
        print(f"\nTotal entities: {total_entities}")
        print(f"Average entities per sample: {total_entities/len(dataset):.2f}")
        print(f"Min entities per sample: {min(entity_per_sample)}")
        print(f"Max entities per sample: {max(entity_per_sample)}")
        
        print("\nEntity type distribution:")
        for entity_type, count in sorted(entity_counts.items(), key=lambda x: x[1], reverse=True):
            percentage = (count / total_entities) * 100
            print(f"  {entity_type}: {count} ({percentage:.1f}%)")
            
        print(f"\nIncoterm/Commodity coverage:")
        print(f"  Samples with incoterms: {samples_with_incoterm} ({samples_with_incoterm/total_samples*100:.1f}%)")
        print(f"  Samples with commodities: {samples_with_commodity} ({samples_with_commodity/total_samples*100:.1f}%)")
        print(f"  Samples with both: {samples_with_both} ({samples_with_both/total_samples*100:.1f}%)")
        
        print("\n" + "="*60)
        print("SAMPLE OUTPUTS")
        print("="*60)
        
        short_examples = [item for item in dataset if item['length'] < 200][:2]
        medium_examples = [item for item in dataset if 200 <= item['length'] < 500][:2]
        long_examples = [item for item in dataset if item['length'] >= 500][:2]
        
        print("\nSHORT examples (<200 chars):")
        for i, example in enumerate(short_examples):
            print(f"\n{i+1}. {example['input']}")
            print(f"   Category: {example['category']}")
            print(f"   Entities: {example['output']}")
            
        print("\nMEDIUM examples (200-500 chars):")
        for i, example in enumerate(medium_examples):
            print(f"\n{i+1}. {example['input'][:200]}...")
            print(f"   Category: {example['category']}")
            print(f"   Length: {example['length']} chars")
            print(f"   Entities: {example['output']}")
            
        print("\nLONG examples (>500 chars):")
        for i, example in enumerate(long_examples):
            print(f"\n{i+1}. {example['input'][:300]}...")
            print(f"   Category: {example['category']}")
            print(f"   Length: {example['length']} chars")
            print(f"   Entities: {example['output'][:5]}... ({len(example['output'])} total)")

# -------------------------------------------------------------------
# Main Execution
# -------------------------------------------------------------------
def generate_enhanced_maritime_dataset(num_samples=30000, output_path='enhanced_maritime_ner_dataset.json',
                                     vessels_list=None, locations_list=None):
    """
    Generate enhanced maritime NER dataset with extended templates
    
    Args:
        num_samples: Number of samples to generate
        output_path: Path to save the dataset
        vessels_list: List of vessel names from your data
        locations_list: List of locations from your data
    """
    
    generator = EnhancedMaritimeGenerator(
        vessels_list=vessels_list,
        locations_list=locations_list
    )
    
    dataset = generator.generate_dataset(num_samples=num_samples)
    
    with open(output_path, 'w', encoding='utf-8') as f:
        json.dump(dataset, f, indent=2, ensure_ascii=False)
        
    print(f"\nDataset saved to: {output_path}")
    
    random.shuffle(dataset)
    split_idx = int(0.9 * len(dataset))
    
    train_data = dataset[:split_idx]
    val_data = dataset[split_idx:]
    
    train_path = output_path.replace('.json', '_train.json')
    val_path = output_path.replace('.json', '_val.json')
    
    with open(train_path, 'w', encoding='utf-8') as f:
        json.dump(train_data, f, indent=2, ensure_ascii=False)
        
    with open(val_path, 'w', encoding='utf-8') as f:
        json.dump(val_data, f, indent=2, ensure_ascii=False)
        
    print(f"Train set ({len(train_data)} samples): {train_path}")
    print(f"Validation set ({len(val_data)} samples): {val_path}")
    
    return dataset

In [None]:
dataset = generate_enhanced_maritime_dataset(
    num_samples=40000,
    output_path='data/enhanced_maritime_ner_dataset.json',
    vessels_list=list(clean_vessels),
    locations_list=list(clean_locations)
)