In [3]:
! pip install scikit-learn

Collecting scikit-learn
  Using cached scikit_learn-1.5.2-cp312-cp312-macosx_12_0_arm64.whl.metadata (13 kB)
Collecting joblib>=1.2.0 (from scikit-learn)
  Using cached joblib-1.4.2-py3-none-any.whl.metadata (5.4 kB)
Collecting threadpoolctl>=3.1.0 (from scikit-learn)
  Using cached threadpoolctl-3.5.0-py3-none-any.whl.metadata (13 kB)
Using cached scikit_learn-1.5.2-cp312-cp312-macosx_12_0_arm64.whl (11.0 MB)
Using cached joblib-1.4.2-py3-none-any.whl (301 kB)
Using cached threadpoolctl-3.5.0-py3-none-any.whl (18 kB)
Installing collected packages: threadpoolctl, joblib, scikit-learn
Successfully installed joblib-1.4.2 scikit-learn-1.5.2 threadpoolctl-3.5.0

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [7]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error

# Function to normalize coordinates
def normalize_coordinates(df):
    min_lat, max_lat = df['y'].min(), df['y'].max()
    min_lon, max_lon = df['x'].min(), df['x'].max()
    df['y_norm'] = (df['y'] - min_lat) / (max_lat - min_lat)
    df['x_norm'] = (df['x'] - min_lon) / (max_lon - min_lon)
    return min_lat, max_lat, min_lon, max_lon

# Function to quantize normalized values to 8-bit
def quantize_coordinates(df):
    df['y_quant'] = np.round(df['y_norm'] * 255).astype(int)
    df['x_quant'] = np.round(df['x_norm'] * 255).astype(int)
    
    # save the quantized values to a new CSV file.
    df[['y_quant', 'x_quant']].to_csv('quantized_coordinates.csv')
    

# Function to inverse quantize to normalized values
def inverse_quantize_coordinates(df):
    df['y_norm_inv'] = df['y_quant'] / 255
    df['x_norm_inv'] = df['x_quant'] / 255

# Function to inverse normalize to original values
def inverse_normalize_coordinates(df, min_lat, max_lat, min_lon, max_lon):
    df['y_inv'] = df['y_norm_inv'] * (max_lat - min_lat) + min_lat
    df['x_inv'] = df['x_norm_inv'] * (max_lon - min_lon) + min_lon
    df['y_inv'] = df['y_inv'].round(6)  # rounding for consistency
    df['x_inv'] = df['x_inv'].round(6)

# Function to calculate RMSE
def calculate_rmse(df):
    y_rmse = np.sqrt(mean_squared_error(df['y'], df['y_inv']))
    x_rmse = np.sqrt(mean_squared_error(df['x'], df['x_inv']))
    return y_rmse, x_rmse

# Main function to process the coordinates
def process_coordinates(csv_file):
    # Load the data
    df = pd.read_csv(csv_file, index_col=0)

    # Normalize coordinates
    min_lat, max_lat, min_lon, max_lon = normalize_coordinates(df)

    # Quantize coordinates
    quantize_coordinates(df)

    # Inverse quantize coordinates
    inverse_quantize_coordinates(df)

    # Inverse normalize to original coordinates
    inverse_normalize_coordinates(df, min_lat, max_lat, min_lon, max_lon)

    # Calculate RMSE
    y_rmse, x_rmse = calculate_rmse(df)

    # Output results
    print('Root Mean Squared Error (Latitude):', y_rmse)
    print('Root Mean Squared Error (Longitude):', x_rmse)

    # Display the results
    print(df.head())
    print(df.tail())

    # Save results to a CSV file (optional)
    # df.to_csv('transformed_coordinates.csv')

# Run the process
process_coordinates('node_coordinates.csv')

Root Mean Squared Error (Latitude): 0.00014178539075923808
Root Mean Squared Error (Longitude): 0.00018276550245638335
                  y         x    y_norm    x_norm  y_quant  x_quant  \
osmid                                                                 
10602396  49.750094  6.637228  0.313529  0.464594       80      118   
10602410  49.749518  6.640148  0.308863  0.482550       79      123   
10602412  49.753861  6.642500  0.344044  0.497012       88      127   
10602431  49.754348  6.644230  0.347984  0.507648       89      129   
10602432  49.753833  6.645809  0.343811  0.517359       88      132   

          y_norm_inv  x_norm_inv      y_inv     x_inv  
osmid                                                  
10602396    0.313725    0.462745  49.750119  6.636927  
10602410    0.309804    0.482353  49.749635  6.640116  
10602412    0.345098    0.498039  49.753991  6.642667  
10602431    0.349020    0.505882  49.754475  6.643943  
10602432    0.345098    0.517647  49.753991  6.