基本的には、まず、kmlを読み込むことでどのsemanticでsegmentを生成するのを指定します。そして、生成したいsegmentの間隔を指定して、それに基づいて先に該当semanticにおいて、segmentを生成します。  
ここで、各segmentの経度緯度が分かりました。その経度緯度を訓練セットとして最近傍法(k＝1)で分類モデルをトレニングします。
そして、ゼンリンのjsonファイルから、データを読み込んで、ゼンリンの各ポイントをさっきトレニングした分類モデルを使って、各segmentに分配します。  

あるsemanticにおいて、segmentの経度緯度の密度とゼンリンデータのポイントの密度が違うため、segmentに分配されていない部分があり、その部分をデータの欠損と見做し、欠損のある点の前後に欠損ではない点を使って加重平均の方法で欠損を埋めます。  
そして、補正されたデータをSQLサーバーに挿入します。 

---
<font color=red>下記はアルゴリズムのフローチャートです。</font>  
![img](img/chart.png)  
  
---
<font color=red>下記は欠損補正部分のアルゴリズムの説明です。</font>  \
![img](img/加重平均の説明.png)  

In [13]:
import os
import re
from pathlib import Path
from os import path
import json
import time
import pyodbc
import webbrowser
import shutil
import csv
import pandas as pd
import folium
import datetime
import sklearn
from sklearn.neighbors import KNeighborsClassifier
import pyodbc
import numpy as np
import math
import datetime
import warnings
warnings.simplefilter('ignore')

# ヒュベニの公式
# https://butter-tiger.hatenablog.com/entry/2020/08/20/222650
POLE_RADIUS = 6356752  # 極半径(短半径)
EQUATOR_RADIUS = 6378137  # 赤道半径(長半径)
E = 0.081819191042815790  # 離心率
E2 = 0.006694380022900788  # 離心率の２乗

<font size = 6 color =red>ここからは必要な関数を定義する↓</font>  
  
<font size = 3>この部分はJSONファイルに関する計算やファイルの処理</font>  

In [2]:
segment_inserter_path = rf"/segment_inserter/"

FOLDER_PATH_json = rf"\FILES"  
GIS_BOOL = 0  # 10M****標高と比較する場合は1，そうでない場合は0

JSON_PATH = rf"\JSON"  # 入力フォルダ
JSON_inserted_PATH = rf"\JSON_inserted"  # 入力済みフォルダ

def distance(_lat1, _long1, _lat2, _long2):
    lat1 = math.radians(_lat1)
    long1 = math.radians(_long1)
    lat2 = math.radians(_lat2)
    long2 = math.radians(_long2)
    m_lat = (lat1 + lat2) / 2  # 平均緯度
    d_lat = abs(lat1 - lat2)  # 緯度差
    d_lon = abs(long1 - long2)  # 経度差
    W = math.sqrt(1 - E2 * math.pow(math.sin(m_lat), 2))
    M = EQUATOR_RADIUS * (1 - E2) / math.pow(W, 3)  # 子午線曲率半径
    N = EQUATOR_RADIUS / W  # 卯酉線曲率半径
    # d = math.sqrt(math.pow(M*d_lat,2) + math.pow(N*d_lon*math.cos(m_lat),2) + math.pow(point_a.altitude-point_b.altitude,2))
    d = math.sqrt(math.pow(M * d_lat, 2) + math.pow(N * d_lon * math.cos(m_lat), 2))
    return d

def search_filelist():
    """toinsert/ ディレクトリに含まれるファイルリストを返す関数.
    Returns:
        [string]: toinsert/ ディレクトリに含まれるファイルパスのリスト.
    """
    p = Path(path.join(path.dirname(os.getcwd()), "." +"/segment_inserter/"+ FOLDER_PATH + JSON_PATH))

    filenames = []
    print("filelist:")

    for file in p.iterdir():
        if file.is_dir():
            continue

        # pathstrings = str(file).split('/')
        # filename = file.name
        # print(pathstrings)
        if re.match(".+" + r".json", file.name):
            print("- ", file.name, ":JSON file")
            filenames.append("{}".format(file.name))
            # filenames.append(FOLDER_PATH + JSON_PATH + '\\{}'.format(file.name))
        else:
            print(file.name, ":not JSON file:")
        # filenames.append(file.name)
    print()
    return filenames

# SUB FUNC
## UTF8形式のjsonファイルを読み込みdictに格納
def load_json_to_dict_UTF8(input_jsonpath):
    fp = open(input_jsonpath, "r", encoding="utf-8_sig")
    json_load = json.load(fp)
    return json_load

# ここから10M****標高取得関連
# 緯度、経度の組を入力とし、****の10M****標高を参照して標高を返す関数
# ゼンリンと国土地理院の比較用に用意
def altDao(_lat, _long):
    connect = pyodbc.connect(
        "DRIVER={SQL Server};SERVER="
        + SERVER
        + ";UID="
        + uid
        + ";PWD="
        + pwd
        + ";DATABASE="
        + DATABASE
        + ";Trusted_Connection="
        + trusted_connection
        + ";"
    )
    altitude = -99
    sql = (
        "SELECT ALTITUDE FROM [****].[dbo].[ALTITUDE_10M_****] WHERE LOWER_LATITUDE <= "
        + _lat
        + "AND UPPER_LATITUDE > "
        + _lat
        + "AND LOWER_LONGITUDE <= "
        + _long
        + "AND UPPER_LONGITUDE > "
        + _long
    )
    rows = select_execute(connect, sql)
    if len(rows) > 0:
        altitude = round(float(str(rows[0]).replace(", )", "").replace("(", "")), 3)
    # print(str(i) + ' -> ' + str(altitude))
    connect.close()
    # print('10M**** Altitude : ' + str(altitude))
    return altitude


def select_execute(con, sql):
    cursor = con.cursor()
    cursor.execute(sql)
    rows = cursor.fetchall()
    cursor.close()
    return rows


# ここまで10M****標高取得関連

# convert JSON to [Latitude, Longitude, ZENRIN_Elevation]

def ADASListGenerator_to_df(filename):
    json_semanticlinkID_name = filename.replace(".json", "")  # ここを変更
    ## filepath
    curdir_dirpath = os.path.dirname(os.path.abspath(os.getcwd()))
    json_zenrin_filepath = (
        curdir_dirpath +"/segment_inserter/"+ FOLDER_PATH_json + JSON_PATH + "\\" + filename
    )  # ECOLOG_CANdataレコード取得用sql
    print("next_file：", json_zenrin_filepath)

    if GIS_BOOL == 1:
        data_json = pd.DataFrame({'SemanticLink':[],
                                  'Latitude':[],
                                  'Longitude':[],
                                  'ZENRIN_Elevation':[],
                                  'adas_null':[],
                                  'GIS':[]})
    elif GIS_BOOL == 0:
        data_json = pd.DataFrame({'SemanticLink':[],
                                  'Latitude':[],
                                  'Longitude':[],
                                  'ZENRIN_Elevation':[],
                                  'adas_null':[]})
    else:
        print("GIS_BOOLは0か1にしてください")

    ## データ抽出
    zenrin_response_json_list = []
    zenrin_response_json_list = load_json_to_dict_UTF8(json_zenrin_filepath)  # jsonの大外がリスト構造
    global adas_null 
    

    for zenrin_response_json in zenrin_response_json_list:
        for path in zenrin_response_json["result"]["path"]:  # マッチした全リンク情報の取り出し

            if "adas" in path["matchLink"] and "roadelevation" in str(path["matchLink"]):
                adas_null = 0

                if "\"adas\": null" in path["matchLink"] and "roadelevation" in str(path["matchLink"]):
                    adas_null = 1
                link_number = 0  # ゼンリンリンクごとに振られる番号
                for adasPoint in path["matchLink"]["adas"]["roadelevation"]:  # adas由来の標高抽出
                    
                    # print("DRIVING DIRECTION = xxx") # DB入力の際には必要
                    if GIS_BOOL == 1:
                        row = {
                            "ZenrinLinkId": path["matchLink"]["code"],
                            "Latitude": adasPoint["lat"],
                            "Longitude": adasPoint["lon"],
                            "Elevation": adasPoint["elevation"] / 1000.0,
                        }
                        GIS_elevation = altDao(str(adasPoint["lat"]), str(adasPoint["lon"]))

                        data_json.loc[len(data_json)+1] = [json_semanticlinkID_name, adasPoint["lat"], adasPoint["lon"], adasPoint["elevation"],adas_null, GIS_elevation]
                        

                    elif GIS_BOOL == 0:
                    
                        row = {
                            "ZenrinLinkId": path["matchLink"]["code"],
                            "Latitude": adasPoint["lat"],
                            "Longitude": adasPoint["lon"],
                        }
                        data_json.loc[len(data_json)+1] = [json_semanticlinkID_name, adasPoint["lat"], adasPoint["lon"], adasPoint["elevation"], adas_null]


                    link_number = link_number + 1
        data_json.reset_index(drop = True,inplace = True)
        return data_json

<font size = 3>この部分はKMLファイルに関する計算やファイルの処理</font>  

In [3]:
now_jst = datetime.datetime.now()
now_jst_result = str(now_jst.strftime('%Y%m%d-%H%M%S'))
#FOLDER_PATH = "\\{}".format(now_jst_result)  # 作業フォルダ
FOLDER_PATH = ""  # 作業フォルダ
KML_PATH = rf"\KML_DrivingRoute"  # 入力フォルダ
# 入力済みフォルダ
KML_inserted_PATH_10m = rf"\KML_DrivingRoute_inserted\10m"  
KML_inserted_PATH_50m = rf"\KML_DrivingRoute_inserted\50m"  
KML_inserted_PATH_100m = rf"\KML_DrivingRoute_inserted\100m"  

KML_inserted_PATH_ELEVATION_null = rf"\KML_DrivingRoute_inserted\ELEVATION_null"  


CSV_PATH = rf"\CSV_DrivingLog"  # 出力フォルダ
CSV_FOLIUM_PATH = r"\CSV_FoliumMap"  # CSV の folium マップ出力フォルダ
#ACCESS_NUMBER = 5  # いつもNAVI APIアクセス数（1アクセスにつき100入力まで）
#CONSTANT_SPEED = 100 # 定速速度はファイル名から自動で判別するようにする（この値はデフォルト値）

# ヒュベニの公式
# https://butter-tiger.hatenablog.com/entry/2020/08/20/222650
POLE_RADIUS = 6356752  # 極半径(短半径)
EQUATOR_RADIUS = 6378137  # 赤道半径(長半径)
E = 0.081819191042815790  # 離心率
E2 = 0.006694380022900788  # 離心率の２乗

def distance(_lat1, _long1, _lat2, _long2):
    lat1 = math.radians(_lat1)
    long1 = math.radians(_long1)
    lat2 = math.radians(_lat2)
    long2 = math.radians(_long2)
    m_lat = (lat1 + lat2) / 2  # 平均緯度
    d_lat = abs(lat1 - lat2)  # 緯度差
    d_lon = abs(long1 - long2)  # 経度差
    W = math.sqrt(1 - E2 * math.pow(math.sin(m_lat), 2))
    M = EQUATOR_RADIUS * (1 - E2) / math.pow(W, 3)  # 子午線曲率半径
    N = EQUATOR_RADIUS / W  # 卯酉線曲率半径
    # d = math.sqrt(math.pow(M*d_lat,2) + math.pow(N*d_lon*math.cos(m_lat),2) + math.pow(point_a.altitude-point_b.altitude,2))
    d = math.sqrt(math.pow(M * d_lat, 2) + math.pow(N * d_lon * math.cos(m_lat), 2))
    return d


def search_filelist_kml():
    
    p = Path(rf"{os.getcwd()}{KML_PATH}")
    print(p)

    filenames = []

    for file in p.iterdir():
        if file.is_dir():
            continue

        # pathstrings = str(file).split('/')
        # filename = file.name
        # print(pathstrings)
        if re.match(".+" + r".kml", file.name):
            print(file.name, ":KML file")
            filenames.append(
                rf"{os.getcwd()}{KML_PATH}\{file.name}"
            )
        else:
            print(file.name, ":not KML file:")
        # filenames.append(file.name)
    return filenames


# KMLファイルの中から，必要な座標列のみを抽出し，リスト形式で返すメソッド
def readLinksKML(filename):
    lineSirungTab1 = r"<LineString>"
    lineSirungTab2 = r"</LineString>"
    isLineString = 0
    f = open(filename, "r", encoding="UTF-8")
    datalist = f.readlines()
    GPSlist = [["Latitude", "Longitude"]]
    for data in datalist:
        if isLineString == 1:
            if lineSirungTab2 in data:
                # print('ここまでが対象データ')
                isLineString = 0
            else:
                target_data = data.replace(" ", "").replace(r",0", "")
                if re.match(r"[0-9]+.?[0-9]*,[0-9]+.?[0-9]*", target_data):
                    target_data2 = re.split(",", target_data)
                    appendList = [float(target_data2[1]), float(target_data2[0])]
                    GPSlist.append(appendList)
                    # print(appendList)
        elif isLineString == 0:
            if lineSirungTab1 in data:
                # print('ここからが対象データ')
                isLineString = 1
        else:
            print("よきせぬれーがい")
    GPSlist.remove(["Latitude", "Longitude"])
    return GPSlist


def calcDistance(list_GPS):
    list_lat_long_dist = [
        ["Latitude1", "Longitude1", "Latitude2", "Longitude2", "Distance"]
    ]
    before_lat = 0
    before_long = 0
    dist_sum = 0
    for row in list_GPS:
        if before_lat > 0:
            dist = distance(before_lat, before_long, row[0], row[1])
            appendList = [before_lat, before_long, row[0], row[1], dist]
            list_lat_long_dist.append(appendList)
            dist_sum = dist_sum + dist
            # print(appendList)
        before_lat = row[0]
        before_long = row[1]
    list_lat_long_dist.remove(
        ["Latitude1", "Longitude1", "Latitude2", "Longitude2", "Distance"]
    )
    return list_lat_long_dist, dist_sum


def normalizedCoordinatesGenerator(list_lat_long_dist, normalized_dist):
    normalizedList = [["Latitude", "Longitude"]]
    rest = normalized_dist
    _lat = list_lat_long_dist[0][0]
    _long = list_lat_long_dist[0][1]
    appendList = [_lat, _long]
    normalizedList.append(appendList)
    coordinatesNumber = 1

    for row in list_lat_long_dist:
        row_loop = 0
        if rest > row[4]:
            rest = rest - row[4]
            # print('rest:', str(rest))
        else:
            while rest + normalized_dist * row_loop < row[4]:
                _lat = (row[2] - row[0]) * (rest + normalized_dist * row_loop) / row[
                    4
                ] + row[0]
                _long = (row[3] - row[1]) * (rest + normalized_dist * row_loop) / row[
                    4
                ] + row[1]
                appendList = [_lat, _long]
                normalizedList.append(appendList)
                coordinatesNumber = coordinatesNumber + 1
                row_loop = row_loop + 1
                # print('rest:', str(rest))
            rest = rest - row[4] + normalized_dist * row_loop
    
    return normalizedList, coordinatesNumber

coordinatesNumber = 0.00
def GPSListGenerator_segment(filename):
    print("try generate {}.".format(filename))
    list_GPS = readLinksKML(filename)  # KMLファイルの中から，必要な座標列のみを抽出し，リスト形式で返す
    # print(list_GPS)    #(lat, long)
    list_lat_long_dist, dist_sum = calcDistance(list_GPS)
    CONSTANCE_SPEED = distance_between_segments*3.6
    print(' ')
    NORMALIZED_DISTANCE = CONSTANCE_SPEED * 1000 / 3600

    normalized_dist = NORMALIZED_DISTANCE

    list_normalized_lat_long, coordinatesNumber = normalizedCoordinatesGenerator(
        list_lat_long_dist, normalized_dist
    )
    
    return list_normalized_lat_long

#上のGPSListGeneratorのoutputをcsvファイルとして出力
def output_lat_long(filename, list_normalized_lat_long):
    filename_csv = filename.replace(KML_PATH, CSV_PATH).replace(r'.kml', r'.csv')
    print("output file:", filename_csv, "    座標数:", str(coordinatesNumber), "\n")
    # 2次元配列→CSV変換:https://rikei-danshi.work/entry/python-2darray-csv
    with open(filename_csv, "w") as file:
        writer = csv.writer(file, lineterminator="\n")
        writer.writerows(list_normalized_lat_long)


def move_file(filename):
    filename_kml_inserted = filename.replace(KML_PATH, KML_inserted_PATH)
    shutil.move(filename, filename_kml_inserted)


def display_folium_map_from_csv(data,path):
    output_folium_file_path = path
    df_output_points = data.iloc[:,0:2]
    folium_map = folium.Map(
        location=[
            df_output_points.at[len(df_output_points) // 2, "Latitude"],
            df_output_points.at[len(df_output_points) // 2, "Longitude"],
        ],
        zoom_start=13,
    )
    for index, point in df_output_points.iterrows():
        folium.Marker(location=[point["Latitude"], point["Longitude"]]).add_to(
            folium_map
        )
    folium_map.save(output_folium_file_path)
    webbrowser.open(output_folium_file_path, new=2)

<font size = 3>この部分はSEGMENTS_****のテーブル群のインサーター関数</font>  

In [4]:
def table_name_SEGMENTS_****_10m():
    return "SEGMENTS_****_10M"

def table_name_SEGMENTS_****_50m():
    return "SEGMENTS_****_50M"

def table_name_SEGMENTS_****_100m():
    return "SEGMENTS_****_100M"

def column_list_SEGMENTS_****():
    return "(SEGMENT_ID,SEGMENT_LENGTH,START_LATITUDE,START_LONGITUDE,END_LATITUDE,END_LONGITUDE,START_ADAS_ELEVATION_MILLI_METER,END_ADAS_ELEVATION_MILLI_METER,SLOPE_ANGLE_THETA,COS_THETA,SIN_THETA,IS_ADAS_NULL,IS_GET_****_DATA)"

def insert_data_to_SEGMENTS_****_10M(data):
    driver='{SQL Server}'
    server = '****'
    database = '****'
    trusted_connection='yes'
    connect= pyodbc.connect('DRIVER='+driver+';SERVER='+server+';DATABASE='+database+';PORT=1433;Trusted_Connection='+trusted_connection+';')
    cursor = connect.cursor()
    for i in range(len(data)):
        list1 = data.loc[i].to_list()
        stmt = """
        INSERT INTO {} {} 
        VALUES {}
        """.format(table_name_SEGMENTS_****_10m(),column_list_SEGMENTS_****(),tuple(list1))
        try:
            cursor.execute(stmt)
            cursor.commit()
        except pyodbc.IntegrityError as err:
                        # 主キー違反の場合には読み飛ばす
                            continue
        except Exception as e:
                            print("---")
                            print(e)
                            print("HINT: トリップの最初のレコードだけエラーが出る様子.")
                            print("Excecuted SQL below.")
                            print(stmt)
                            print("---")
    cursor.close()
    connect.close()
    
def insert_data_to_SEGMENTS_****_50M(data):
    driver='{SQL Server}'
    server = '****'
    database = '****'
    trusted_connection='yes'
    connect= pyodbc.connect('DRIVER='+driver+';SERVER='+server+';DATABASE='+database+';PORT=1433;Trusted_Connection='+trusted_connection+';')
    cursor = connect.cursor()
    for i in range(len(data)):
        list1 = data.loc[i].to_list()
        stmt = """
        INSERT INTO {} {} 
        VALUES {}
        """.format(table_name_SEGMENTS_****_50m(),column_list_SEGMENTS_****(),tuple(list1))
        try:
            cursor.execute(stmt)
            cursor.commit()
        except pyodbc.IntegrityError as err:
                        # 主キー違反の場合には読み飛ばす
                            continue
        except Exception as e:
                            print("---")
                            print(e)
                            print("HINT: トリップの最初のレコードだけエラーが出る様子.")
                            print("Excecuted SQL below.")
                            print(stmt)
                            print("---")
    cursor.close()
    connect.close()
    
def insert_data_to_SEGMENTS_****_100M(data):
    driver='{SQL Server}'
    server = '****'
    database = '****'
    trusted_connection='yes'
    connect= pyodbc.connect('DRIVER='+driver+';SERVER='+server+';DATABASE='+database+';PORT=1433;Trusted_Connection='+trusted_connection+';')
    cursor = connect.cursor()
    for i in range(len(data)):
        list1 = data.loc[i].to_list()
        stmt = """
        INSERT INTO {} {} 
        VALUES {}
        """.format(table_name_SEGMENTS_****_100m(),column_list_SEGMENTS_****(),tuple(list1))
        try:
            cursor.execute(stmt)
            cursor.commit()
        except pyodbc.IntegrityError as err:
                        # 主キー違反の場合には読み飛ばす
                            continue
        except Exception as e:
                            print("---")
                            print(e)
                            print("HINT: トリップの最初のレコードだけエラーが出る様子.")
                            print("Excecuted SQL below.")
                            print(stmt)
                            print("---")
    cursor.close()
    connect.close()
#新しい間隔のセグメントの挿入関数はここで定義
#下のSEMANTIC_LINKS_SEGMENTSのテーブル群のインサーター関数も忘れずに

<font size = 3>この部分はSEMANTIC_LINKS_SEGMENTSのテーブル群のインサーター関数</font>  

In [5]:
def table_name_SEMANTIC_LINKS_SEGMENTS_10m():
    return "SEMANTIC_LINKS_SEGMENTS_10M"

def table_name_SEMANTIC_LINKS_SEGMENTS_50m():
    return "SEMANTIC_LINKS_SEGMENTS_50M"

def table_name_SEMANTIC_LINKS_SEGMENTS_100m():
    return "SEMANTIC_LINKS_SEGMENTS_100M"

def column_list_SEMANTIC_LINKS_SEGMENTS():
    return "(SEMANTIC_LINK_ID,SEMANTIC_LINK_SEGMENT_ID,SEGMENT_ID)"

def insert_data_to_SEMANTIC_LINKS_SEGMENTS_10M(data):
    driver='{SQL Server}'
    server = '****'
    database = '****'
    trusted_connection='yes'
    connect= pyodbc.connect('DRIVER='+driver+';SERVER='+server+';DATABASE='+database+';PORT=1433;Trusted_Connection='+trusted_connection+';')
    cursor = connect.cursor()
    for i in range(len(data)):
        list1 = data.loc[i].to_list()
        stmt = """
        INSERT INTO {} {} 
        VALUES {}
        """.format(table_name_SEMANTIC_LINKS_SEGMENTS_10m(),column_list_SEMANTIC_LINKS_SEGMENTS(),tuple(list1))
        try:
            cursor.execute(stmt)
            cursor.commit()
        except pyodbc.IntegrityError as err:
                        # 主キー違反の場合には読み飛ばす
                            continue
        except Exception as e:
                            print("---")
                            print(e)
                            print("HINT: トリップの最初のレコードだけエラーが出る様子.")
                            print("Excecuted SQL below.")
                            print(stmt)
                            print("---")
    cursor.close()
    connect.close()
    
def insert_data_to_SEMANTIC_LINKS_SEGMENTS_50M(data):
    driver='{SQL Server}'
    server = '****'
    database = '****'
    trusted_connection='yes'
    connect= pyodbc.connect('DRIVER='+driver+';SERVER='+server+';DATABASE='+database+';PORT=1433;Trusted_Connection='+trusted_connection+';')
    cursor = connect.cursor()
    for i in range(len(data)):
        list1 = data.loc[i].to_list()
        stmt = """
        INSERT INTO {} {} 
        VALUES {}
        """.format(table_name_SEMANTIC_LINKS_SEGMENTS_50m(),column_list_SEMANTIC_LINKS_SEGMENTS(),tuple(list1))
        try:
            cursor.execute(stmt)
            cursor.commit()
        except pyodbc.IntegrityError as err:
                        # 主キー違反の場合には読み飛ばす
                            continue
        except Exception as e:
                            print("---")
                            print(e)
                            print("HINT: トリップの最初のレコードだけエラーが出る様子.")
                            print("Excecuted SQL below.")
                            print(stmt)
                            print("---")
    cursor.close()
    connect.close()
    
def insert_data_to_SEMANTIC_LINKS_SEGMENTS_100M(data):
    driver='{SQL Server}'
    server = '****'
    database = '****'
    trusted_connection='yes'
    connect= pyodbc.connect('DRIVER='+driver+';SERVER='+server+';DATABASE='+database+';PORT=1433;Trusted_Connection='+trusted_connection+';')
    cursor = connect.cursor()
    for i in range(len(data)):
        list1 = data.loc[i].to_list()
        stmt = """
        INSERT INTO {} {} 
        VALUES {}
        """.format(table_name_SEMANTIC_LINKS_SEGMENTS_100m(),column_list_SEMANTIC_LINKS_SEGMENTS(),tuple(list1))
        try:
            cursor.execute(stmt)
            cursor.commit()
        except pyodbc.IntegrityError as err:
                        # 主キー違反の場合には読み飛ばす
                            continue
        except Exception as e:
                            print("---")
                            print(e)
                            print("HINT: トリップの最初のレコードだけエラーが出る様子.")
                            print("Excecuted SQL below.")
                            print(stmt)
                            print("---")
    cursor.close()
    connect.close()
#新しい間隔のセグメントの挿入関数はここで定義
#下のSEMANTIC_LINKS_SEGMENTSのテーブル群とSEGMENTS_****のテーブル群から、SEGMENT_idを取得する関数も忘れずに

<font size = 3>この部分はSEMANTIC_LINKS_SEGMENTSのテーブル群とSEGMENTS_****のテーブル群から、</font>  
<font size = 3>SEGMENT_idを取得する関数</font> 

In [22]:
def get_segment_id_SEGMENTS_****_10M():
    driver='{SQL Server}'
    server = '****'
    database = '****'
    trusted_connection='yes'
    connect= pyodbc.connect('DRIVER='+driver+';SERVER='+server+';DATABASE='+database+';PORT=1433;Trusted_Connection='+trusted_connection+';')
    cursor = connect.cursor()
    cursor.execute( "select max(SEGMENT_ID) from SEGMENTS_****_10M") 
    rows = cursor.fetchall()
    
    cursor.close()
    connect.close()
    return rows

def get_segment_id_SEGMENTS_****_50M():
    driver='{SQL Server}'
    server = '****'
    database = '****'
    trusted_connection='yes'
    connect= pyodbc.connect('DRIVER='+driver+';SERVER='+server+';DATABASE='+database+';PORT=1433;Trusted_Connection='+trusted_connection+';')
    cursor = connect.cursor()
    cursor.execute( "select max(SEGMENT_ID) from SEGMENTS_****_50M") 
    rows = cursor.fetchall()
    
    cursor.close()
    connect.close()
    return rows

def get_segment_id_SEGMENTS_****_100M():
    driver='{SQL Server}'
    server = '****'
    database = '****'
    trusted_connection='yes'
    connect= pyodbc.connect('DRIVER='+driver+';SERVER='+server+';DATABASE='+database+';PORT=1433;Trusted_Connection='+trusted_connection+';')
    cursor = connect.cursor()
    cursor.execute( "select max(SEGMENT_ID) from SEGMENTS_****_100M") 
    rows = cursor.fetchall()
    
    cursor.close()
    connect.close()
    return rows
#新しい間隔のセグメントの挿入関数はここで定義

def get_semantic_id_from_tables(semantic_id,distance_between_segments):
    driver='{SQL Server}'
    server = '****'
    database = '****'
    trusted_connection='yes'
    connect= pyodbc.connect('DRIVER='+driver+';SERVER='+server+';DATABASE='+database+';PORT=1433;Trusted_Connection='+trusted_connection+';')
    cursor = connect.cursor()
    table = "SEMANTIC_LINKS_SEGMENTS_" + str(distance_between_segments) + "M"
    sql = "select count(SEMANTIC_LINK_ID) from " +table+ " where SEMANTIC_LINK_ID = " + str(semantic_id)
    cursor.execute(sql) 
    rows = cursor.fetchall()
    rows = int(list(rows[0])[0])
    
    cursor.close()
    connect.close()
    return rows

<font size = 6 color =red> ここからは実行↓ </font>

In [None]:
if __name__ == "__main__":
    segment_id = 1
    print('セグメントの間隔を指定してください(m),範囲は10,50,100から一つです：')
    print('-------')
    distance_between_segments = input()
    distance_between_segments = int(distance_between_segments)
    for filename in search_filelist_kml():
        SEMANTIC_LINK_ID = int(filename.split('.')[-2].split('\\')[-1])
        if get_semantic_id_from_tables(SEMANTIC_LINK_ID,distance_between_segments) != 0:
            print('セマンティック',SEMANTIC_LINK_ID,'のデータはすでにSQLサーバーに存在しているので')
            print('セマンティック',SEMANTIC_LINK_ID,'に対する挿入を停止する')
            print('')
            if distance_between_segments == 10:
                KML_inserted_PATH = KML_inserted_PATH_10m
            elif distance_between_segments == 50:
                KML_inserted_PATH = KML_inserted_PATH_50m
            elif distance_between_segments == 100:
                KML_inserted_PATH = KML_inserted_PATH_100m
            print(filename,'を',KML_inserted_PATH_100m,'に移動する')
                
            move_file(filename)
            print('-------')
            continue
        
        #get segment_id from tables and check if it is >1 and  check if distance_between_segments is correct
        if distance_between_segments == 10:
            if list(get_segment_id_SEGMENTS_****_10M()[0]) == [None]:
                segment_id = 1
            elif int(list(get_segment_id_SEGMENTS_****_10M()[0])[0]) >= 1 :
                segment_id = int(list(get_segment_id_SEGMENTS_****_10M()[0])[0]) + 1
        elif distance_between_segments == 50:
            if list(get_segment_id_SEGMENTS_****_50M()[0]) == [None]:
                segment_id = 1
            elif int(list(get_segment_id_SEGMENTS_****_50M()[0])[0]) >= 1 :
                segment_id = int(list(get_segment_id_SEGMENTS_****_50M()[0])[0]) + 1
        elif distance_between_segments == 100:
            if list(get_segment_id_SEGMENTS_****_100M()[0]) == [None]:
                segment_id = 1
            elif int(list(get_segment_id_SEGMENTS_****_100M()[0])[0]) >= 1 :
                segment_id = int(list(get_segment_id_SEGMENTS_****_100M()[0])[0]) + 1
        else:
            print('セグメント間隔の指定に問題があります、必ず範囲内に指定してください')
            break 
        #read kml and convert it to a DataFrame
        #segmentを生成する
        df_segment = pd.DataFrame(GPSListGenerator_segment(filename)[1:],columns =GPSListGenerator_segment(filename)[0])
        segment_id_series = list(range(segment_id,segment_id+len(df_segment)))
        #segment_id = int(segment_id_series[-1])+1
        df_segment['segment_Coordinate_id'] = segment_id_series

        #To match link_id of roads, a Knn classifier is trained with datas extracted from the df_segment
        train_data = df_segment.iloc[:,0:2]
        train_label = df_segment.iloc[:,-1]
        Classifier = KNeighborsClassifier(n_neighbors=1)
        Classifier.fit(train_data,train_label)
        #Use trained classifier to match roads
        #you can trust this classifier cause of I set n_neighbors to 1,and it must overfitting
        #so its accuracy will be 100%
        if isinstance(SEMANTIC_LINK_ID, int):
            json_file_name = str(SEMANTIC_LINK_ID) + '.json'
            df_json = ADASListGenerator_to_df(json_file_name)
            #print(df_json)
        else:
            print('kmlファイル名をsemantic_id（整数）にしてください')
            print('例：455.kml')
            break
        
        #該当セマンティックのjsonに標高データが存在しない場合：
        if len(df_json) <1:
            print('ゼンリンデータ：',json_file_name,' には標高データが存在していないため、')
            print('セマンティック',SEMANTIC_LINK_ID,'に対する挿入を停止する')
            print('')
            print(filename,'を\KML_DrivingRoute_inserted\ELEVATION_nullに移動する')
            
            KML_inserted_PATH = KML_inserted_PATH_ELEVATION_null
            move_file(filename)
            
            continue
            
        Classifier_result_for_json = Classifier.predict(df_json.iloc[:,1:3])
        #The following DataFrame was created to aggregate the training results and to organize the data
        Data_for_inserting_SEGMENTS_**** = pd.DataFrame({'SEGMENT_ID':[],
                                                        'SEGMENT_LENGTH':[],
                                                        'START_LATITUDED':[],
                                                        'START_LONGITUDE':[],
                                                        'END_LATITUDE':[],
                                                        'END_LONGITUDE':[],
                                                        'START_ADAS_ELEVATION_MILLI_METER':[],
                                                        'END_ADAS_ELEVATION_MILLI_METER':[],
                                                        'SLOPE_ANGLE_THETA':[],
                                                        'COS_THETA':[],
                                                        'SIN_THETA':[],
                                                        'IS_ADAS_NULL':[],
                                                        'IS_GET_****_DATA':[]
                                                        })
        Data_for_inserting_SEGMENTS_****.SEGMENT_ID = df_segment.segment_Coordinate_id.iloc[0:-1]
        Data_for_inserting_SEGMENTS_****.START_LATITUDED = df_segment.Latitude.iloc[0:-1]
        Data_for_inserting_SEGMENTS_****.START_LONGITUDE = df_segment.Longitude.iloc[0:-1]
        Data_for_inserting_SEGMENTS_****.END_LATITUDE = df_segment.Latitude.iloc[1:].reset_index(drop = True)
        Data_for_inserting_SEGMENTS_****.END_LONGITUDE = df_segment.Longitude.iloc[1:].reset_index(drop = True)
        
 
        Data_for_inserting_SEMANTIC_LINKS_SEGMENTS = pd.DataFrame({'SEMANTIC_LINK_ID':[],
                                                                   'SEMANTIC_LINK_SEGMENT_ID':[],
                                                                   'SEGMENT_ID':[]
                                                                   })
        Data_for_inserting_SEMANTIC_LINKS_SEGMENTS.SEGMENT_ID = Data_for_inserting_SEGMENTS_****.SEGMENT_ID
        Data_for_inserting_SEMANTIC_LINKS_SEGMENTS.SEMANTIC_LINK_ID = SEMANTIC_LINK_ID
        for i in range(len(Data_for_inserting_SEMANTIC_LINKS_SEGMENTS)):
            SEMANTIC_LINK_SEGMENT_ID = int(str(SEMANTIC_LINK_ID)+str(distance_between_segments)+ str(Data_for_inserting_SEMANTIC_LINKS_SEGMENTS.SEGMENT_ID.iloc[i]))
            Data_for_inserting_SEMANTIC_LINKS_SEGMENTS.SEMANTIC_LINK_SEGMENT_ID.iloc[i] = SEMANTIC_LINK_SEGMENT_ID

        #insert Classified df_json to Data_for_inserting_SEGMENTS_****
        for i in range(len(Classifier_result_for_json)):
            index = Data_for_inserting_SEGMENTS_****.SEGMENT_ID.isin([Classifier_result_for_json[i]])
            Data_for_inserting_SEGMENTS_****.START_ADAS_ELEVATION_MILLI_METER.iloc[index] = df_json.ZENRIN_Elevation.loc[i]
            Data_for_inserting_SEGMENTS_****.IS_ADAS_NULL.iloc[index] = df_json.adas_null.loc[i]
        
        #ここからはClassifierできない部分に対して、加重平均などの手法で欠損を埋める
        index_after = 0
        index_before = 0
        index_NOT_nan_list = list(Data_for_inserting_SEGMENTS_****[~Data_for_inserting_SEGMENTS_****.START_ADAS_ELEVATION_MILLI_METER.isna()].index)
        index_nan_list = list(Data_for_inserting_SEGMENTS_****[Data_for_inserting_SEGMENTS_****.START_ADAS_ELEVATION_MILLI_METER.isna()].index)
        IS_GET_****_DATA=[]
        for i in range(len(Data_for_inserting_SEGMENTS_****.START_ADAS_ELEVATION_MILLI_METER.isna())):
            IS_GET_****_DATA.append(int(~Data_for_inserting_SEGMENTS_****.START_ADAS_ELEVATION_MILLI_METER.isna()[i]))
        Data_for_inserting_SEGMENTS_****['IS_GET_****_DATA'] = IS_GET_****_DATA

        before = 0
        after = 0
        if str(Data_for_inserting_SEGMENTS_****.iloc[0]['START_ADAS_ELEVATION_MILLI_METER']) == 'nan':
            if Data_for_inserting_SEGMENTS_****.START_ADAS_ELEVATION_MILLI_METER.isnull().sum() == len(Data_for_inserting_SEGMENTS_****):
                print('ゼンリンデータ：',json_file_name,' には標高データが存在していないため、')
                print('セマンティック',SEMANTIC_LINK_ID,'に対する挿入を停止する')
                continue
            else:
                Data_for_inserting_SEGMENTS_****['START_ADAS_ELEVATION_MILLI_METER'].iloc[0] = Data_for_inserting_SEGMENTS_****['START_ADAS_ELEVATION_MILLI_METER'].iloc[index_NOT_nan_list[0]]
        if str(Data_for_inserting_SEGMENTS_****.iloc[-1]['START_ADAS_ELEVATION_MILLI_METER']) == 'nan':
            Data_for_inserting_SEGMENTS_****['START_ADAS_ELEVATION_MILLI_METER'].iloc[-1] = Data_for_inserting_SEGMENTS_****['START_ADAS_ELEVATION_MILLI_METER'].iloc[index_NOT_nan_list[-1]]
        #ここからは加重平均
        while Data_for_inserting_SEGMENTS_****.START_ADAS_ELEVATION_MILLI_METER.isnull().sum() !=0:
            for i in range(len(Data_for_inserting_SEGMENTS_****)-1):
                if str(Data_for_inserting_SEGMENTS_****.iloc[i]['START_ADAS_ELEVATION_MILLI_METER'])== 'nan':
                    before = Data_for_inserting_SEGMENTS_****.iloc[i-1]['START_ADAS_ELEVATION_MILLI_METER']
                    index_before = i-1
                    #print('index_before:',index_before,' ','before:',before)
                    for j in range(i,len(Data_for_inserting_SEGMENTS_****)):
                        if str(Data_for_inserting_SEGMENTS_****.iloc[j]['START_ADAS_ELEVATION_MILLI_METER'])!= 'nan':
                            after = Data_for_inserting_SEGMENTS_****.iloc[j]['START_ADAS_ELEVATION_MILLI_METER']
                            index_after = j
                            #print('index_after:',index_after,' ','after:',after)
                            break
                    distance_before_to_after = distance(Data_for_inserting_SEGMENTS_****.iloc[index_before]['START_LATITUDED'], 
                                                        Data_for_inserting_SEGMENTS_****.iloc[index_before]['START_LONGITUDE'],
                                                        Data_for_inserting_SEGMENTS_****.iloc[index_after]['START_LATITUDED'],
                                                        Data_for_inserting_SEGMENTS_****.iloc[index_after]['START_LONGITUDE'])
#                     if distance_before_to_after == 0:
#                         print(index_before,Data_for_inserting_SEGMENTS_****.iloc[index_before]['START_LATITUDED'],Data_for_inserting_SEGMENTS_****.iloc[index_before]['START_LONGITUDE'])
#                         print(index_after,Data_for_inserting_SEGMENTS_****.iloc[index_after]['START_LATITUDED'],Data_for_inserting_SEGMENTS_****.iloc[index_after]['START_LONGITUDE'])
                    rate_before = distance(Data_for_inserting_SEGMENTS_****.iloc[index_before]['START_LATITUDED'], 
                                           Data_for_inserting_SEGMENTS_****.iloc[index_before]['START_LONGITUDE'],
                                           Data_for_inserting_SEGMENTS_****.iloc[i]['START_LATITUDED'],
                                           Data_for_inserting_SEGMENTS_****.iloc[i]['START_LONGITUDE'])/distance_before_to_after
                    rate_after = distance(Data_for_inserting_SEGMENTS_****.iloc[i]['START_LATITUDED'], 
                                           Data_for_inserting_SEGMENTS_****.iloc[i]['START_LONGITUDE'],
                                           Data_for_inserting_SEGMENTS_****.iloc[index_after]['START_LATITUDED'],
                                           Data_for_inserting_SEGMENTS_****.iloc[index_after]['START_LONGITUDE'])/distance_before_to_after
                    #print(before * (1-rate_before) + after*(1-rate_after))
                    Data_for_inserting_SEGMENTS_****['START_ADAS_ELEVATION_MILLI_METER'].iloc[i] = before * (1-rate_before) + after*(1-rate_after)
                    #print('for this loop:',Data_for_inserting.iloc[i]['START_ADAS_ELEVATION_MILLI_METER'])
                    break
                    
        #次のstartは前のend
        Data_for_inserting_SEGMENTS_****['END_ADAS_ELEVATION_MILLI_METER'].iloc[1:] = Data_for_inserting_SEGMENTS_****['START_ADAS_ELEVATION_MILLI_METER'].iloc[0:-1]
        Data_for_inserting_SEGMENTS_****['END_ADAS_ELEVATION_MILLI_METER'].iloc[0] = Data_for_inserting_SEGMENTS_****['START_ADAS_ELEVATION_MILLI_METER'].iloc[0]
        #segment間隔を挿入
        Data_for_inserting_SEGMENTS_****['SEGMENT_LENGTH'] = distance_between_segments
        
        #startとendの標高差を利用して勾配を算出
        for i in range(len(Data_for_inserting_SEGMENTS_****)):
            delta_ELEVATION = Data_for_inserting_SEGMENTS_****['END_ADAS_ELEVATION_MILLI_METER'].iloc[i] - Data_for_inserting_SEGMENTS_****['START_ADAS_ELEVATION_MILLI_METER'].iloc[i]
            Data_for_inserting_SEGMENTS_****['SLOPE_ANGLE_THETA'].iloc[i] = math.atan(delta_ELEVATION/distance_between_segments)
            Data_for_inserting_SEGMENTS_****['COS_THETA'].iloc[i] = math.cos(Data_for_inserting_SEGMENTS_****['SLOPE_ANGLE_THETA'].iloc[i])
            Data_for_inserting_SEGMENTS_****['SIN_THETA'].iloc[i] = math.sin(Data_for_inserting_SEGMENTS_****['SLOPE_ANGLE_THETA'].iloc[i])
            if str(Data_for_inserting_SEGMENTS_****.iloc[i]['IS_ADAS_NULL'])== 'nan':
                Data_for_inserting_SEGMENTS_****['IS_ADAS_NULL'].iloc[i] = 0
        Data_for_inserting_SEGMENTS_****['IS_ADAS_NULL'] = Data_for_inserting_SEGMENTS_****['IS_ADAS_NULL'].astype('int')
            
        
        
        

        #insert aggregated datas to SQL server
        #dont forget to add new inserting method for new distance of segment
        if distance_between_segments == 10:
            KML_inserted_PATH = KML_inserted_PATH_10m
            print('-------------------------------------')
            print('セマンティック',SEMANTIC_LINK_ID,'に対して、SQLサーバーにデータを挿入しますฅ^•ω•^ฅ')
            print('')
            print('10m間隔のsegmentデータが挿入中です....')
            print('')
            print('今挿入中のテーブルはSEGMENTS_****_10M....')
            print('')
            insert_data_to_SEGMENTS_****_10M(Data_for_inserting_SEGMENTS_****)
            print('SEGMENTS_****_10M挿入完了')
            print('')
            print('今挿入中のテーブルはSEMANTIC_LINKS_SEGMENTS_10M')
            print('')
            insert_data_to_SEMANTIC_LINKS_SEGMENTS_10M(Data_for_inserting_SEMANTIC_LINKS_SEGMENTS)
            print('セマンティック',SEMANTIC_LINK_ID,'に対して、','全て挿入完了')
            print('-------------------------------------')
        elif distance_between_segments == 50:
            KML_inserted_PATH = KML_inserted_PATH_50m
            print('-------------------------------------')
            print('セマンティック',SEMANTIC_LINK_ID,'に対して、SQLサーバーにデータを挿入しますฅ^•ω•^ฅ')
            print('')
            print('50m間隔のsegmentデータが挿入中です')
            print('')
            print('今挿入中のテーブルはSEGMENTS_****_50M ')
            print('')
            insert_data_to_SEGMENTS_****_50M(Data_for_inserting_SEGMENTS_****)
            print('SEGMENTS_****_50M挿入完了')
            print('')
            print('今挿入中のテーブルはSEMANTIC_LINKS_SEGMENTS_50M')
            print('')
            insert_data_to_SEMANTIC_LINKS_SEGMENTS_50M(Data_for_inserting_SEMANTIC_LINKS_SEGMENTS)
            print('セマンティック',SEMANTIC_LINK_ID,'に対して、','全て挿入完了')
            print('-------------------------------------')
        elif distance_between_segments == 100:
            KML_inserted_PATH = KML_inserted_PATH_100m
            print('-------------------------------------')
            print('セマンティック',SEMANTIC_LINK_ID,'に対して、SQLサーバーにデータを挿入しますฅ^•ω•^ฅ')
            print('')
            print('100m間隔のsegmentデータが挿入中です')
            print('')
            print('今挿入中のテーブルはSEGMENTS_****_100M')
            print('')
            insert_data_to_SEGMENTS_****_100M(Data_for_inserting_SEGMENTS_****)
            print('SEGMENTS_****_100M挿入完了')
            print('')
            print('今挿入中のテーブルはSEMANTIC_LINKS_SEGMENTS_100M')
            print('')
            insert_data_to_SEMANTIC_LINKS_SEGMENTS_100M(Data_for_inserting_SEMANTIC_LINKS_SEGMENTS)
            print('セマンティック',SEMANTIC_LINK_ID,'に対して、','全て挿入完了')
            print('-------------------------------------')
        else:
            print('-------------------------------------')
            print('挿入に失敗しました')
            print('セグメント間隔の指定に問題があります、必ず範囲内に指定してください')
            print('-------------------------------------')
            break
        #insert済みのものを移動する
        move_file(filename)
        
        #生成したセグメントをレビューする
        review_path = Path(rf"{os.getcwd()}\review_of_segment\{SEMANTIC_LINK_ID}_{distance_between_segments}m.html")
        display_folium_map_from_csv(df_segment,review_path)
    print('')
    print('ฅ^•ω•^ฅすべてのKMLが挿入完了ニャー')

# Credit
- ShichiryにkmlとJsonを処理する関数の部分のcodeを教えていただきました
- 何新に欠損を埋める部分に協力していただきました
- Ishigeにたくさんの助言をいただきました
- Sogaにエネルギーマップについて親切かつ詳しく解説していただきました
- prof.Tommyにsegmentと欠損について大変有用な助言をいただきました