# Convert GeoJson File (10T Resample)

In [1]:
import json

def load_geojson(filename):
    geojson_data_path = '../server/testdata/' + filename + '.geojson'
    try:
        with open(geojson_data_path, 'r') as file:
            data = json.load(file)
        print("GeoJSON data loaded successfully.")
        return data
    except Exception as e:
        raise ValueError(f"An error occurred while loading the GeoJSON data: {e}")

In [2]:
import geopandas as gpd

def load_geojson_gpd(filename):
    # Path to the GeoJSON file
    geojson_data_path = '../server/testdata/' + filename + '.geojson'

    # Read the GeoJSON file
    try:
        gdf_geojson = gpd.read_file(geojson_data_path)
        print("GeoJSON data loaded successfully.")
        return gdf_geojson
    
    except Exception as e:
        print(f"An error occurred while loading the GeoJSON data: {e}")

In [3]:
myjsonfile = load_geojson('passenger_data_resample10T')
myjsonfile

GeoJSON data loaded successfully.


{'type': 'FeatureCollection',
 'features': [{'type': 'Feature',
   'properties': {'SHIP_ID': '123456788',
    'RECPTN_DT': '2022-08-12T01:30:00',
    'SEQUENCE_ID': 1.0,
    'LA': 31.468265,
    'LO': 126.542485,
    'SOG': 10.7,
    'COG': 96.8,
    'TYPE': 'passenger',
    'TON': 11.0,
    'dist': 148.67964567511444,
    'sea_lv': 182.0,
    'port_name': '이어도'},
   'geometry': {'type': 'Point', 'coordinates': [125.16667, 32.11667]}},
  {'type': 'Feature',
   'properties': {'SHIP_ID': '123456788',
    'RECPTN_DT': '2022-08-24T22:00:00',
    'SEQUENCE_ID': 2.0,
    'LA': 31.409738333333333,
    'LO': 126.31339666666666,
    'SOG': 5.3,
    'COG': 60.5,
    'TYPE': 'passenger',
    'TON': 11.0,
    'dist': 133.91085080078506,
    'sea_lv': 228.0,
    'port_name': '이어도'},
   'geometry': {'type': 'Point', 'coordinates': [125.16667, 32.11667]}},
  {'type': 'Feature',
   'properties': {'SHIP_ID': '123456788',
    'RECPTN_DT': '2022-08-24T22:10:00',
    'SEQUENCE_ID': 2.0,
    'LA': 31.43278

In [4]:
mygdffile = load_geojson_gpd('passenger_data_resample10T')
mygdffile

GeoJSON data loaded successfully.


Unnamed: 0,SHIP_ID,RECPTN_DT,SEQUENCE_ID,LA,LO,SOG,COG,TYPE,TON,dist,sea_lv,port_name,geometry
0,123456788,2022-08-12 01:30:00,1.0,31.468265,126.542485,10.700000,96.800000,passenger,11.0,148.679646,182.0,이어도,POINT (125.16667 32.11667)
1,123456788,2022-08-24 22:00:00,2.0,31.409738,126.313397,5.300000,60.500000,passenger,11.0,133.910851,228.0,이어도,POINT (125.16667 32.11667)
2,123456788,2022-08-24 22:10:00,2.0,31.432782,126.316425,9.400000,357.700000,passenger,11.0,132.646465,227.0,이어도,POINT (125.16667 32.11667)
3,123456788,2022-08-24 22:20:00,2.0,31.451929,126.311978,6.216667,338.833333,passenger,11.0,131.080876,225.0,이어도,POINT (125.16667 32.11667)
4,123456788,2022-08-25 14:30:00,3.0,31.560967,126.595317,9.000000,106.100000,passenger,11.0,148.427475,74.0,이어도,POINT (125.16667 32.11667)
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1626311,667002108,2022-11-22 19:40:00,20.0,38.159287,131.734440,8.700000,27.600000,passenger,7653.0,97.656542,9.0,쌍정초,POINT (130.93333 37.55000)
1626312,667002108,2022-11-22 19:50:00,20.0,38.181743,131.750356,8.266667,31.366667,passenger,7653.0,100.390809,10.0,쌍정초,POINT (130.93333 37.55000)
1626313,667002108,2022-11-22 20:00:00,20.0,38.202060,131.764880,8.800000,29.300000,passenger,7653.0,102.877772,10.0,쌍정초,POINT (130.93333 37.55000)
1626314,667002108,2022-11-22 20:10:00,20.0,38.223666,131.779747,8.800000,29.166667,passenger,7653.0,105.491824,11.0,쌍정초,POINT (130.93333 37.55000)


In [9]:
import json

def transform_geojson(input_filename, output_filename):
    geojson_data_path = '../server/testdata/' + input_filename + '.geojson'
    
    try:
        # Load the existing GeoJSON data
        with open(geojson_data_path, 'r') as file:
            data = json.load(file)
        print("GeoJSON data loaded successfully.")
        
        # Transform the data
        transformed_features = []
        for feature in data['features']:
            # Extract properties
            properties = feature['properties']
            
            # Create new geometry for the ship's coordinates
            ship_geometry = {
                'type': 'Point',
                'coordinates': [properties['LO'], properties['LA']]
            }
            
            # Add the port coordinates separately
            port_geometry = feature['geometry']  # This is the port's coordinates
            
            # Construct the new feature
            new_feature = {
                'type': 'Feature',
                'properties': {
                    'SHIP_ID': properties['SHIP_ID'],
                    'RECPTN_DT': properties['RECPTN_DT'],
                    'SEQUENCE_ID': int(properties['SEQUENCE_ID']),  # Convert SEQUENCE_ID to int
                    'SOG': properties['SOG'],
                    'COG': properties['COG'],
                    'TYPE': properties['TYPE'],
                    'TON': properties['TON'],
                    'dist': properties['dist'],
                    'sea_lv': properties['sea_lv'],
                    'port_name': properties['port_name'],
                    'port_geometry': port_geometry
                },
                'geometry': ship_geometry
            }
            
            # Add the transformed feature to the list
            transformed_features.append(new_feature)
        
        # Construct the new GeoJSON structure
        transformed_geojson = {
            'type': 'FeatureCollection',
            'features': transformed_features
        }
        
        # Save the transformed GeoJSON data to a new file
        output_path = '../server/testdata/' + output_filename + '.geojson'
        with open(output_path, 'w') as outfile:
            json.dump(transformed_geojson, outfile, indent=4)
        print(f"Transformed GeoJSON data saved successfully to {output_filename}.geojson")
        
    except Exception as e:
        print(f"An error occurred while processing the GeoJSON data: {e}")

In [11]:
# Example usage
transform_geojson('passenger_data_resample10T', 'passenger_data_resample10T_ver02')

GeoJSON data loaded successfully.
Transformed GeoJSON data saved successfully to passenger_data_resample10T_ver02.geojson


In [3]:
myjsonfile2 = load_geojson('passenger_data_resample10T_ver02')
myjsonfile2

GeoJSON data loaded successfully.


{'type': 'FeatureCollection',
 'features': [{'type': 'Feature',
   'properties': {'SHIP_ID': '123456788',
    'RECPTN_DT': '2022-08-12T01:30:00',
    'SEQUENCE_ID': 1,
    'SOG': 10.7,
    'COG': 96.8,
    'TYPE': 'passenger',
    'TON': 11.0,
    'dist': 148.67964567511444,
    'sea_lv': 182.0,
    'port_name': '이어도',
    'port_geometry': {'type': 'Point', 'coordinates': [125.16667, 32.11667]}},
   'geometry': {'type': 'Point', 'coordinates': [126.542485, 31.468265]}},
  {'type': 'Feature',
   'properties': {'SHIP_ID': '123456788',
    'RECPTN_DT': '2022-08-24T22:00:00',
    'SEQUENCE_ID': 2,
    'SOG': 5.3,
    'COG': 60.5,
    'TYPE': 'passenger',
    'TON': 11.0,
    'dist': 133.91085080078506,
    'sea_lv': 228.0,
    'port_name': '이어도',
    'port_geometry': {'type': 'Point', 'coordinates': [125.16667, 32.11667]}},
   'geometry': {'type': 'Point',
    'coordinates': [126.31339666666666, 31.409738333333333]}},
  {'type': 'Feature',
   'properties': {'SHIP_ID': '123456788',
    'REC

In [4]:
mygdffile2 = load_geojson_gpd('passenger_data_resample10T_ver02')
mygdffile2

GeoJSON data loaded successfully.


Unnamed: 0,SHIP_ID,RECPTN_DT,SEQUENCE_ID,SOG,COG,TYPE,TON,dist,sea_lv,port_name,port_geometry,geometry
0,123456788,2022-08-12 01:30:00,1,10.700000,96.800000,passenger,11.0,148.679646,182.0,이어도,"{'type': 'Point', 'coordinates': [125.16667, 3...",POINT (126.54248 31.46826)
1,123456788,2022-08-24 22:00:00,2,5.300000,60.500000,passenger,11.0,133.910851,228.0,이어도,"{'type': 'Point', 'coordinates': [125.16667, 3...",POINT (126.31340 31.40974)
2,123456788,2022-08-24 22:10:00,2,9.400000,357.700000,passenger,11.0,132.646465,227.0,이어도,"{'type': 'Point', 'coordinates': [125.16667, 3...",POINT (126.31642 31.43278)
3,123456788,2022-08-24 22:20:00,2,6.216667,338.833333,passenger,11.0,131.080876,225.0,이어도,"{'type': 'Point', 'coordinates': [125.16667, 3...",POINT (126.31198 31.45193)
4,123456788,2022-08-25 14:30:00,3,9.000000,106.100000,passenger,11.0,148.427475,74.0,이어도,"{'type': 'Point', 'coordinates': [125.16667, 3...",POINT (126.59532 31.56097)
...,...,...,...,...,...,...,...,...,...,...,...,...
1626311,667002108,2022-11-22 19:40:00,20,8.700000,27.600000,passenger,7653.0,97.656542,9.0,쌍정초,"{'type': 'Point', 'coordinates': [130.93333, 3...",POINT (131.73444 38.15929)
1626312,667002108,2022-11-22 19:50:00,20,8.266667,31.366667,passenger,7653.0,100.390809,10.0,쌍정초,"{'type': 'Point', 'coordinates': [130.93333, 3...",POINT (131.75036 38.18174)
1626313,667002108,2022-11-22 20:00:00,20,8.800000,29.300000,passenger,7653.0,102.877772,10.0,쌍정초,"{'type': 'Point', 'coordinates': [130.93333, 3...",POINT (131.76488 38.20206)
1626314,667002108,2022-11-22 20:10:00,20,8.800000,29.166667,passenger,7653.0,105.491824,11.0,쌍정초,"{'type': 'Point', 'coordinates': [130.93333, 3...",POINT (131.77975 38.22367)


# Length Feature Mapping

In [7]:
import json
import pandas as pd
import numpy as np
from tqdm import tqdm

# Load the vessel_static.csv data
vessel_static = pd.read_csv('vessel_static.csv')

# Define the function to predict the length
def predict_length(ship_type, ton):
    # Filter the vessel_static data to match the ship type (case-insensitive)
    type_filtered = vessel_static[vessel_static['shiptype'].str.contains(ship_type, case=False, na=False)]
    
    if not type_filtered.empty:
        # Filter to match the TON
        ton_filtered = type_filtered[type_filtered['ton'] == ton]
        
        if not ton_filtered.empty:
            # If exact match, return the median length
            return ton_filtered['length'].median()
        else:
            # If no exact match, perform linear interpolation
            sorted_filtered = type_filtered.sort_values(by='ton')
            
            # Group by 'ton' and get the median length for interpolation
            grouped_median = sorted_filtered.groupby('ton')['length'].median().reset_index()
            
            # Find the closest values for interpolation
            lower_bound = grouped_median[grouped_median['ton'] <= ton].tail(1)
            upper_bound = grouped_median[grouped_median['ton'] >= ton].head(1)
            
            if not lower_bound.empty and not upper_bound.empty:
                if lower_bound['ton'].values[0] == upper_bound['ton'].values[0]:
                    return lower_bound['length'].values[0]
                else:
                    # Perform linear interpolation
                    x0, y0 = lower_bound['ton'].values[0], lower_bound['length'].values[0]
                    x1, y1 = upper_bound['ton'].values[0], upper_bound['length'].values[0]
                    return y0 + (ton - x0) * (y1 - y0) / (x1 - x0)
            elif not lower_bound.empty:
                return lower_bound['length'].values[0]
            elif not upper_bound.empty:
                return upper_bound['length'].values[0]
            else:
                return np.nan
    else:
        return np.nan

def predict_length_mapping(geojson_data, output_filename):
    # Add the LEN_PRED feature to each feature in the GeoJSON data with progress bar
    for feature in tqdm(geojson_data['features'], desc="Processing features"):
        properties = feature['properties']
        ship_type = properties['TYPE']
        ton = properties['TON']
        
        # Predict the length
        len_pred = predict_length(ship_type, ton)
        
        # Insert LEN_PRED into properties dictionary
        properties['LEN_PRED'] = len_pred

        # Reorder properties to insert LEN_PRED after TON
        ordered_properties = {k: properties[k] for k in list(properties)[:6]}
        ordered_properties['LEN_PRED'] = len_pred
        for k in list(properties)[6:]:
            ordered_properties[k] = properties[k]
        
        feature['properties'] = ordered_properties

    # Save the modified GeoJSON data to a new file
    output_geojson_path = '../server/testdata/' + output_filename + '.geojson'
    with open(output_geojson_path, 'w') as outfile:
        json.dump(geojson_data, outfile, indent=4)

    print(f"Transformed GeoJSON data with LEN_PRED saved successfully to {output_geojson_path}")

In [8]:
# Run the function with progress logging
predict_length_mapping(myjsonfile2, 'passenger_resample10T_ver03')

Processing features:   0%|          | 3256/1626316 [00:40<5:33:52, 81.02it/s]

In [None]:
myjsonfile3 = load_geojson('passenger_resample10T_ver03')
myjsonfile3

In [None]:
mygdffile3 = load_geojson_gpd('passenger_resample10T_ver03')
mygdffile3

---

In [8]:
transform_geojson(myjsonfile)

{'type': 'FeatureCollection',
 'features': [{'type': 'Feature',
   'properties': {'SHIP_ID': '123456788',
    'RECPTN_DT': '2022-08-12T01:30:00',
    'SEQUENCE_ID': 1.0,
    'SOG': 10.7,
    'COG': 96.8,
    'TYPE': 'passenger',
    'TON': 11.0,
    'dist': 148.67964567511444,
    'sea_lv': 182.0,
    'port_name': '이어도',
    'port_geometry': {'type': 'Point', 'coordinates': [125.16667, 32.11667]}},
   'geometry': {'type': 'Point', 'coordinates': [126.542485, 31.468265]}},
  {'type': 'Feature',
   'properties': {'SHIP_ID': '123456788',
    'RECPTN_DT': '2022-08-24T22:00:00',
    'SEQUENCE_ID': 2.0,
    'SOG': 5.3,
    'COG': 60.5,
    'TYPE': 'passenger',
    'TON': 11.0,
    'dist': 133.91085080078506,
    'sea_lv': 228.0,
    'port_name': '이어도',
    'port_geometry': {'type': 'Point', 'coordinates': [125.16667, 32.11667]}},
   'geometry': {'type': 'Point',
    'coordinates': [126.31339666666666, 31.409738333333333]}},
  {'type': 'Feature',
   'properties': {'SHIP_ID': '123456788',
    

In [None]:
import json

def transform_geojson(input_filename, output_filename):
    geojson_data_path = '../server/testdata/' + input_filename + '.geojson'
    
    try:
        # Load the existing GeoJSON data
        with open(geojson_data_path, 'r') as file:
            data = json.load(file)
        print("GeoJSON data loaded successfully.")
        
        # Transform the data
        transformed_features = []
        for feature in data['features']:
            # Extract properties
            properties = feature['properties']
            
            # Create new geometry for the ship's coordinates
            ship_geometry = {
                'type': 'Point',
                'coordinates': [properties['LO'], properties['LA']]
            }
            
            # Add the port coordinates separately
            port_geometry = feature['geometry']  # This is the port's coordinates
            
            # Construct the new feature
            new_feature = {
                'type': 'Feature',
                'properties': {
                    'SHIP_ID': properties['SHIP_ID'],
                    'RECPTN_DT': properties['RECPTN_DT'],
                    'SEQUENCE_ID': properties['SEQUENCE_ID'],
                    'SOG': properties['SOG'],
                    'COG': properties['COG'],
                    'TYPE': properties['TYPE'],
                    'TON': properties['TON'],
                    'dist': properties['dist'],
                    'sea_lv': properties['sea_lv'],
                    'port_name': properties['port_name'],
                    'port_geometry': port_geometry
                },
                'geometry': ship_geometry
            }
            
            # Add the transformed feature to the list
            transformed_features.append(new_feature)
        
        # Construct the new GeoJSON structure
        transformed_geojson = {
            'type': 'FeatureCollection',
            'features': transformed_features
        }
        
        # Save the transformed GeoJSON data to a new file
        output_path = '../server/testdata/' + output_filename + '.geojson'
        with open(output_path, 'w') as outfile:
            json.dump(transformed_geojson, outfile, indent=4)
        print(f"Transformed GeoJSON data saved successfully to {output_filename}.geojson")
        
    except Exception as e:
        print(f"An error occurred while processing the GeoJSON data: {e}")

# Example usage
transform_geojson('input_geojson_filename', 'output_geojson_filename')