# KMST Verdict Decisions Downloader

This notebook downloads and processes Korean Maritime Safety Tribunal (KMST) verdict decisions. It performs the following tasks:

1. Loads a CSV file containing vessel names and URLs
2. Downloads verdict decision files from the provided URLs
3. Renames the downloaded files based on vessel names
4. Handles duplicate filenames and error cases

The notebook uses pandas for data handling and the requests library for downloading files.


In [4]:
import pandas as pd
import os
import requests
from pathlib import Path
import logging
from datetime import datetime

In [3]:
decisions_df = pd.read_csv('../data/extracted_vessel_names.csv')

In [11]:
def download_and_rename_decisions(df, output_dir='downloaded_decisions'):
    """
    Downloads decision files and renames them based on vessel names
    
    Args:
        df: DataFrame containing 'url' and 'vessel_name' columns
        output_dir: Directory to save downloaded files
    """
    # Create output directory if it doesn't exist
    Path(output_dir).mkdir(parents=True, exist_ok=True)
    
    # Track existing filenames to handle duplicates
    existing_names = set()
    
    for idx, row in df.iterrows():
        try:
            # Get the URL and vessel names
            url = row['url']
            vessel_names = row['vessel_name']
            
            # Convert string representation of list to actual list if needed
            if isinstance(vessel_names, str):
                try:
                    # Handle string representation of list
                    vessel_names = eval(vessel_names)
                except:
                    # If it's a single vessel name, make it a list
                    vessel_names = [vessel_names]
            elif not isinstance(vessel_names, list):
                vessel_names = [str(vessel_names)]
                
            # Create filename from vessel names
            base_filename = '-'.join(vessel_names)
            filename = base_filename
            
            # Handle duplicate filenames
            counter = 1
            while filename in existing_names:
                filename = f"{base_filename}-{counter}"
                counter += 1
            
            existing_names.add(filename)
            
            print(f"Downloading {filename} from {url}")  # Add debug print
            
            # Download the file
            response = requests.get(url, timeout=30)
            response.raise_for_status()
            
            # Save the file with .hwpextension
            output_path = os.path.join(output_dir, f"{filename}.hwp")
            with open(output_path, 'wb') as f:
                f.write(response.content)
                
            print(f"Successfully downloaded: {filename}")
            
        except Exception as e:
            error_msg = f"Error processing row {idx}: {str(e)}"
            print(error_msg)
            logging.error(error_msg)
            continue

In [7]:
# Set up logging
log_filename = f'download_errors_{datetime.now().strftime("%Y%m%d_%H%M%S")}.log'
logging.basicConfig(filename=log_filename, level=logging.ERROR,
                   format='%(asctime)s - %(message)s')

In [12]:
# Execute the download function
download_and_rename_decisions(decisions_df)


Downloading Myungyoonho-Daeyangho from https://www.kmst.go.kr/web/atch/atchFileDownload.do?atchId=100553&fileSn=1
Successfully downloaded: Myungyoonho-Daeyangho
Downloading Geumoh 7-Woogukti 5 from https://www.kmst.go.kr/web/atch/atchFileDownload.do?atchId=100499&fileSn=1
Successfully downloaded: Geumoh 7-Woogukti 5
Downloading 26 Namseongho from https://www.kmst.go.kr/web/atch/atchFileDownload.do?atchId=100496&fileSn=1
Successfully downloaded: 26 Namseongho
Downloading Gwangjeong 8-Gwangjeong 88 from https://www.kmst.go.kr/web/atch/atchFileDownload.do?atchId=100493&fileSn=1
Successfully downloaded: Gwangjeong 8-Gwangjeong 88
Downloading Yeonheungho 2007-Sing from https://www.kmst.go.kr/web/atch/atchFileDownload.do?atchId=100447&fileSn=1
Successfully downloaded: Yeonheungho 2007-Sing
Downloading 101 Tongyeongho from https://www.kmst.go.kr/web/atch/atchFileDownload.do?atchId=100444&fileSn=1
Successfully downloaded: 101 Tongyeongho
Downloading Seong-ho-cargo from https://www.kmst.go.kr/w