In [16]:
import os
import re
import json
import kml2geojson as k2g
import zipfile
import requests
import shutil
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import xml.etree.ElementTree as ET
from lxml import etree
from datetime import datetime
import subprocess


In [17]:
# Define paths
base_directory = r"C:\Users\CrudeIntern\OneDrive - Hengli Petrochemical International Pte Ltd\Market Analysis\Current Projects\Hurricane"
def webscraping_kmz(base_directory: str) -> None:
    driver = webdriver.Chrome()
    try:
        url = "https://www.metoc.navy.mil/jtwc/jtwc.html"
        driver.get(url)
        driver.implicitly_wait(5)
        kmz_link_elements = WebDriverWait(driver, 5).until(
            EC.presence_of_all_elements_located((By.LINK_TEXT, "Google Earth Overlay"))
        )
        if not kmz_link_elements:
            print("No Google Earth Overlay links found.")
            return
        file_names, kmz_file_paths = [], []
        for index, kmz_link_element in enumerate(kmz_link_elements):
            kmz_url = kmz_link_element.get_attribute('href')
            print(f"KMZ File URL {index + 1}: {kmz_url}")
            file_name = kmz_url.split('/')[-1].split('.')[0]
            kmz_file = requests.get(kmz_url) 
            kmz_file_path = os.path.join(base_directory, f'{file_name}.kmz')
            kmz_file_paths.append(kmz_file_path)
            file_names.append(file_name)
            with open(kmz_file_path, 'wb') as f:
                f.write(kmz_file.content)
            print(f"KMZ file {index + 1} saved to {kmz_file_path}")
    except Exception as e:
        print("Error occurred while scraping KMZ files:", e)
    finally:
        driver.quit()
    return kmz_file_paths, file_names
kmz_file_paths, file_names = webscraping_kmz(base_directory)
print(kmz_file_paths)
print(file_names)





KMZ File URL 1: https://www.metoc.navy.mil/jtwc/products/wp2124.kmz
KMZ file 1 saved to C:\Users\CrudeIntern\OneDrive - Hengli Petrochemical International Pte Ltd\Market Analysis\Current Projects\Hurricane\wp2124.kmz
KMZ File URL 2: https://www.metoc.navy.mil/jtwc/products/ep9924.kmz
KMZ file 2 saved to C:\Users\CrudeIntern\OneDrive - Hengli Petrochemical International Pte Ltd\Market Analysis\Current Projects\Hurricane\ep9924.kmz
['C:\\Users\\CrudeIntern\\OneDrive - Hengli Petrochemical International Pte Ltd\\Market Analysis\\Current Projects\\Hurricane\\wp2124.kmz', 'C:\\Users\\CrudeIntern\\OneDrive - Hengli Petrochemical International Pte Ltd\\Market Analysis\\Current Projects\\Hurricane\\ep9924.kmz']
['wp2124', 'ep9924']


In [18]:

def extract_kml(kmz_file_paths: list, file_names: list, base_directory: str) -> None:
    kml_file_paths = []
    for kmz_file_path, file_name in zip(kmz_file_paths, file_names):
        output_directory = os.path.join(base_directory, file_name)
        with zipfile.ZipFile(kmz_file_path, 'r') as zip_ref:
            zip_ref.extractall(output_directory)
        print(f"KMZ file {kmz_file_path} has been extracted to {output_directory}")
        ld = os.listdir(output_directory)
        kml_file = [file for file in ld if file.endswith('.kml')]
        kml_file_path = os.path.join(output_directory, kml_file[0])
        try:
            new_kml_path = os.path.join(base_directory, f'{file_name}.kml')
            print(new_kml_path)
            os.rename(kml_file_path, new_kml_path)
            print(f"KML file has been moved and renamed to {new_kml_path}")
            kml_file_paths.append(new_kml_path)
        except Exception as e:
            print(f"Error moving KML file: {e}")
        try:
            shutil.rmtree(output_directory)
            print(f"Folder {output_directory} has been deleted.")
        except Exception as e:
            print(f"Error deleting folder {output_directory}: {e}")
        try:
            os.remove(kmz_file_path)
            print(f"KMZ file {kmz_file_path} has been deleted.")
        except Exception as e:
            print(f"Error deleting KMZ file {kmz_file_path}: {e}")
    return kml_file_paths
kml_file_paths = extract_kml(kmz_file_paths, file_names, base_directory)
print(kml_file_paths)

KMZ file C:\Users\CrudeIntern\OneDrive - Hengli Petrochemical International Pte Ltd\Market Analysis\Current Projects\Hurricane\wp2124.kmz has been extracted to C:\Users\CrudeIntern\OneDrive - Hengli Petrochemical International Pte Ltd\Market Analysis\Current Projects\Hurricane\wp2124
C:\Users\CrudeIntern\OneDrive - Hengli Petrochemical International Pte Ltd\Market Analysis\Current Projects\Hurricane\wp2124.kml
Error moving KML file: [WinError 183] Cannot create a file when that file already exists: 'C:\\Users\\CrudeIntern\\OneDrive - Hengli Petrochemical International Pte Ltd\\Market Analysis\\Current Projects\\Hurricane\\wp2124\\doc.kml' -> 'C:\\Users\\CrudeIntern\\OneDrive - Hengli Petrochemical International Pte Ltd\\Market Analysis\\Current Projects\\Hurricane\\wp2124.kml'
Folder C:\Users\CrudeIntern\OneDrive - Hengli Petrochemical International Pte Ltd\Market Analysis\Current Projects\Hurricane\wp2124 has been deleted.
KMZ file C:\Users\CrudeIntern\OneDrive - Hengli Petrochemical 

In [19]:
def parse_kml_file(kml_file):
    """
    Parse a KML file and return its root element.
    Args:
        kml_file (str): The path to the KML file.
    Returns:
        root (Element): The root element of the parsed KML file.
    """
    try:
        tree = etree.parse(kml_file)
        return tree.getroot()
    except etree.XMLSyntaxError as e:
        print(f"Error parsing file {kml_file}: {e}")
        return None

def adjust_ids(element, id_suffix):
    """
    Adjust IDs of elements to ensure uniqueness in the merged KML.
    Args:
        element (Element): The root element whose child IDs need adjustment.
        id_suffix (int): A unique suffix to append to IDs.
    """
    for elem in element.iter():
        if 'id' in elem.attrib:
            elem.attrib['id'] = f"{elem.attrib['id']}_{id_suffix}"

def merge_kml_files(selected_files, output_file):
    """
    Merge the selected KML files into a single KML file.
    Args:
        selected_files (list): List of KML files selected for merging.
        output_file (str): The path for the output merged KML file.
    """
    # Define the KML namespace
    KML_NAMESPACE = "http://www.opengis.net/kml/2.2"
    NSMAP = {None: KML_NAMESPACE}

    # Create a root for the new KML file
    merged_root = etree.Element('kml', nsmap=NSMAP)
    merged_document = etree.SubElement(merged_root, 'Document')

    # Keep track of unique IDs to avoid conflicts
    id_counter = 0

    for kml_file in selected_files:
        root = parse_kml_file(kml_file)
        if root is not None:
            # Find the <Document> element inside each KML file
            document = root.find(f'.//{{{KML_NAMESPACE}}}Document')
            if document is not None:
                # Adjust IDs to avoid conflicts
                adjust_ids(document, id_counter)
                id_counter += 1

                # Append child elements of the <Document> to the merged document
                for elem in document:
                    merged_document.append(elem)
            else:
                # If <Document> is not found, check for <Folder> elements
                folders = root.findall(f'.//{{{KML_NAMESPACE}}}Folder')
                for folder in folders:
                    adjust_ids(folder, id_counter)
                    id_counter += 1
                    merged_document.append(folder)
        else:
            print(f"Failed to parse file {kml_file}.")

    if len(merged_document) > 0:
        # Write the merged KML content into the output file
        with open(output_file, 'wb') as f:
            f.write(etree.tostring(merged_root, pretty_print=True, xml_declaration=True, encoding='UTF-8'))
        print(f"Merged KML file saved as: {output_file}")
    else:
        print("No valid KML content was merged.")
print(file_names)
output_combine_file = os.path.join(base_directory, 'hurricane_combined.kml')
merge_kml_files(kml_file_paths, output_file=output_combine_file)

['wp2124', 'ep9924']
No valid KML content was merged.


Transform the Geojson


In [20]:
def kml_to_geojson(output_geojson_path: str, output_combine_file: str) -> None:
    file = k2g.main.convert(kml_path_or_buffer=output_combine_file, feature_collection_name='hurricane_combined.geojson')
    with open(output_geojson_path, 'w') as f:
        f.write(str(file[0]))
    with open(output_geojson_path, 'r') as f:
        raw_geojson_content = f.read()
    while True:
        try:
            raw_geojson_content = raw_geojson_content.replace("'", '"')
            geojson_obj = json.loads(raw_geojson_content)
            formatted_geojson_content = json.dumps(geojson_obj, indent=4)
            with open(output_geojson_path, 'w') as f:
                f.write(formatted_geojson_content)
            print("GeoJSON cleaned and saved successfully.")
            break
        except json.JSONDecodeError as e:
            print(f"Error processing the GeoJSON file: {e}")
            escape_index = e.colno - 2
            raw_geojson_content = raw_geojson_content[:escape_index] + "\\" + raw_geojson_content[escape_index:]
            for i, val in enumerate(raw_geojson_content[escape_index+2:]):
                if val == '"':
                    raw_geojson_content = raw_geojson_content[:escape_index+i+2] + "\\" + raw_geojson_content[escape_index+i+2:] 
                    break

output_geojson_path = os.path.join(base_directory, 'hurricane_combined.geojson')
output_combine_file = os.path.join(base_directory, 'hurricane_combined.kml')
kml_to_geojson(output_geojson_path, output_combine_file)

Error processing the GeoJSON file: Expecting ',' delimiter: line 1 column 424 (char 423)
Error processing the GeoJSON file: Expecting ',' delimiter: line 1 column 440 (char 439)
Error processing the GeoJSON file: Expecting ',' delimiter: line 1 column 457 (char 456)
Error processing the GeoJSON file: Expecting ',' delimiter: line 1 column 5224 (char 5223)
Error processing the GeoJSON file: Expecting ',' delimiter: line 1 column 5240 (char 5239)
Error processing the GeoJSON file: Expecting ',' delimiter: line 1 column 5257 (char 5256)
Error processing the GeoJSON file: Expecting ',' delimiter: line 1 column 10372 (char 10371)
Error processing the GeoJSON file: Expecting ',' delimiter: line 1 column 10388 (char 10387)
Error processing the GeoJSON file: Expecting ',' delimiter: line 1 column 10405 (char 10404)
Error processing the GeoJSON file: Expecting ',' delimiter: line 1 column 15536 (char 15535)
Error processing the GeoJSON file: Expecting ',' delimiter: line 1 column 15552 (char 15

Upload to github

In [24]:
import subprocess

# Path to Git Bash executable
bash_path = r"C:\Windows\System32\bash.exe"  # Update this path based on where Git Bash is installed

# Path to your shell script (.sh file)
script_path = r"C:/Users/CrudeIntern/OneDrive - Hengli Petrochemical International Pte Ltd/Market Analysis/Current Projects/Hurricane/auto_upload.sh"

# Prepare the command to run the script using Git Bash
cmd = [bash_path, '-c', script_path]

# Execute the script
shellscript = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, stdin=subprocess.PIPE, text=True)

# Read and print the output line by line
for line in shellscript.stdout:
    print(line.strip())

# Wait for the process to complete and get the return code
returncode = shellscript.wait()
print(f"Process ended with the return code of {returncode}.")


FileNotFoundError: [WinError 2] The system cannot find the file specified