In [27]:
import pandas as pd
import numpy as np
from numpy import dot
from numpy.linalg import norm
import glob
import os
import pickle as pkl
import math
import random
import geopandas
import folium as fo

#DIR_P_PATH = os.getcwd()+'/Predict_Data/'
#DIR_T_PATH = os.getcwd()+'/Test_Data/'
DIR_P_PATH = '/STORAGE/SHARE/Intern/DA30/Predict_Data/'
DIR_T_PATH ='/STORAGE/SHARE/Intern/DA30/Test_Data/'
Test_Route = {}
Pred_Route = {}


min_lat = 33.120581  # Minimum latitude value
max_lat = 38.726809 # Maximum latitude value
min_lon = 124.896901  # Minimum longitude value
max_lon = 132.058734 # Maximum longitude value
cell_size = 0.001  # Cell size in degrees 

# Data Loading

with open(DIR_T_PATH+'Test_Route.pkl', "rb") as fd:
    Test_Route = pkl.load(fd)

with open(DIR_P_PATH+'Predict_route.pkl',"rb") as fd:
    Pred_Route = pkl.load(fd)

# Grid function
class GridIndexer:
    def __init__(self, min_lat, max_lat, min_lon, max_lon, cell_size):
        self.min_lat = min_lat
        self.max_lat = max_lat
        self.min_lon = min_lon
        self.max_lon = max_lon
        self.cell_size = cell_size
        self.num_cols = int(math.ceil((max_lon - min_lon) / cell_size)) # col_size
        self.num_rows = int(math.ceil((max_lat - min_lat) / cell_size)) # row_size
        self.grid = [[None] * self.num_cols for _ in range(self.num_rows)] # grid_size
        self.populate_grid()

    def populate_grid(self):
        index = 0
        for row in range(self.num_rows):
            for col in range(self.num_cols):
                self.grid[row][col] = index
                index += 1
                
    # get_index(lat, lon) --> return grid_num (start index: 1)
    def get_index(self, lat, lon):
        col = int((lon - self.min_lon) / self.cell_size)
        row = int((lat - self.min_lat) / self.cell_size)
        return self.grid[row][col] + 1

    # get_M_coord(grid_num) --> return median_lat, median_lon (round: 4)
    def get_M_coord(self, index):
        row = (index - 1) // self.num_cols
        col = (index - 1) % self.num_cols
        median_lat = self.min_lat + (row + 0.5) * self.cell_size
        median_lon = self.min_lon + (col + 0.5) * self.cell_size
        return round(median_lat,4), round(median_lon,4)
    
    def save_visualization(self, PR, TR):
        Mymap=fo.Map()
        for j in range(len(TR)):
            TR_M_coords=[]
            Nodu_TR_M_coords=[]
            for i in range((TR[j]['lat'].index.start),(TR[j]['lat'].index.stop)):
                TR_M_coords.append(self.get_M_coord(self.get_index((TR[j]['lat'][i]),(TR[j]['lon'][i]))))
            for v in TR_M_coords:
                Nodu_TR_M_coords.append(v)
            fo.PolyLine(Nodu_TR_M_coords,color='red',tooltip="Test_Route").add_to(Mymap)
        Predictlst=[]
        Nodu_Predictlst=[]
        for q in range((PR['lat'].index.start),(PR['lat'].index.stop)):
            Predictlst.append(self.get_M_coord(self.get_index(PR['lat'][q],PR['lon'][q])))
        for v in Predictlst:
            if v not in Nodu_Predictlst:
                Nodu_Predictlst.append(v)
        fo.PolyLine(Nodu_Predictlst,color='blue',tooltip="Predict_Route").add_to(Mymap)
        Mymap.save("Route2.html")
      
    


class CosineSimilarityCalculator:
    def __init__(self, grid_route_num_list):
        self.grid_route_num_list = grid_route_num_list
        self.all_grid_nums = sorted(set(grid_route_num_list))
        self.vector = np.array([1 if num in grid_route_num_list else 0 for num in self.all_grid_nums])
    
    def calculate_similarity(self, other_grid_route_num_list):
        other_vector = np.array([1 if num in other_grid_route_num_list else 0 for num in self.all_grid_nums])
        dot_product = np.dot(self.vector, other_vector)
        norm_product = np.linalg.norm(self.vector) * np.linalg.norm(other_vector)
        similarity = dot_product / norm_product
        return similarity

class RouteSimilarityCalculator:
    def __init__(self, PR, TR):
        self.PR = PR
        self.TR = TR
    
    def get_route_indices(self, route):
        return sorted(list(set([indexer.get_index(num[0], num[1]) for num in route])))
    
    def find_matching_routes(self):
        predict_route = self.get_route_indices([(num[0], num[1]) for num in zip(self.PR.lat, self.PR.lon)])
        
        the_other_route = []
        for route in self.TR:
            for lat, lon in zip(route.lat, route.lon):
                the_other_route.append((lat, lon))
        
        data_route = self.get_route_indices(the_other_route)
        
        res_route = []
        for p_route in predict_route:
            for d_route in data_route:
                if p_route == d_route:
                    res_route.append(d_route)
                    data_route.pop(data_route.index(d_route))
        return predict_route, res_route
    def cal_index(self,route):
        return [indexer.get_index(i[0],i[1]) for i in zip(route.lat, route.lon)]


    def cal_M_coords(self, index):  
        return [indexer.get_M_coord(idx) for idx in index]

    def Save_Visualization2(self):
        Mymap=fo.Map()
        for r in self.TR:
            fo.PolyLine(self.cal_M_coords(self.cal_index(r)),tooltip='Test_route',color='red').add_to(Mymap)
        fo.PolyLine(self.cal_M_coords(self.cal_index(self.PR)),tooltip='Predicted_route',color='blue').add_to(Mymap)
        Mymap.save("hi.html")
        print("파일이 저장되었습니다.")
    
    
    def calculate_cosine_similarity(self):
        predict_route, res_route = self.find_matching_routes()
        cosine_calculator = CosineSimilarityCalculator(predict_route)
        similarity = cosine_calculator.calculate_similarity(res_route)
        return similarity
    
indexer = GridIndexer(min_lat, max_lat, min_lon, max_lon, cell_size)
def main(PR, TR):

    route_similarity_calculator = RouteSimilarityCalculator(PR, TR)
    route_similarity_calculator.Save_Visualization2()
    similarity = route_similarity_calculator.calculate_cosine_similarity()
    
    return similarity

if __name__ == '__main__':
    res = main(Pred_Route,Test_Route)
    print(f'cos simiarity: {res}')


파일이 저장되었습니다.
cos simiarity: 0.8569633695778818


In [4]:
with open(DIR_T_PATH+'Test_Route.pkl', "rb") as fd:
    Test_Route = pkl.load(fd)

with open(DIR_P_PATH+'Predict_route.pkl',"rb") as fd:
    Pred_Route = pkl.load(fd)

In [71]:
'''
데이터를 받아왔으니까 이걸 까보자 까봤더니 19개의 Test_Route가 존재
이걸 19개의 경로와 추천 경로로 나눠야함.
실제 경로 geometry 값을 인덱스로 변환 -> 인덱스값을 통해 중앙값 -> 이걸 찍어서 시각화 하기 

'''


#test1= GridIndexer(min(Test_Route[18]['lat']),max(Test_Route[18]['lat']),max(Test_Route[18]['lat']),max(Test_Route[18]['lon']),cell_size)
#test1.get_index(Test_Route[18]['lat'],Test_Route[18]['lon'])
import folium as fo
mymap = fo.Map(location=[37,127],zoom_start=10)

test2= GridIndexer(min_lat,max_lat,min_lon,max_lon,cell_size)
#test2.get_index(Test_Route[18]['lat'],Test_Route[18]['lon'])
'''
test2 객체 생성
생성된 객체를 반복문을 돌리고 싶음
근데 test_Route 마다 인덱스 시작이 다름 따라서 이중 for문을 통해 인덱스 값을 찾아오는 방식으로 시도해볼 예정
'''



#for j in range(len(Test_Route)):
for i in range((Test_Route[0]['lat'].index.start),Test_Route[0]['lat'].index.stop):
        test2.get_M_coord(test2.get_index((Test_Route[0]['lat'][i]),Test_Route[0]['lon'][i]))
        fo.Marker(test2.get_M_coord(test2.get_index((Test_Route[0]['lat'][i]),Test_Route[0]['lon'][i]))).add_to(mymap)

In [184]:
mymap7=fo.Map(location=nodulst1[10],zoom_start=8)
test=GridIndexer(min_lat,max_lat,min_lon,max_lon,cell_size)
for j in range(len(Test_Route)):
    lst1=[]
    nodulst1=[]
    for i in range((Test_Route[j]['lat'].index.start),Test_Route[j]['lat'].index.stop):  
        lst1.append(test.get_M_coord(test.get_index((Test_Route[j]['lat'][i]),Test_Route[j]['lon'][i])))
    for v in lst1:
        if v not in nodulst1:
            nodulst1.append(v) 
   
    fo.PolyLine(nodulst1,color='red',tooltip="Coast").add_to(mymap7)
test=GridIndexer(min_lat,max_lat,min_lon,max_lon,cell_size)
prelst=[]
noprelst=[]
for i in range((Pred_Route['lat'].index.start),Pred_Route['lat'].index.stop):  
        prelst.append(test.get_M_coord(test.get_index((Pred_Route['lat'][i]),Pred_Route['lon'][i])))
for v in prelst:
        if v not in noprelst:
            noprelst.append(v) 
fo.PolyLine(noprelst,color='blue').add_to(mymap7)


In [186]:
mymap7.save("test.html")

위에 나온 코드를 함수화 하는 작업을 해야함

In [224]:

import pandas as pd
import numpy as np
from numpy import dot
from numpy.linalg import norm
import glob
import os
import pickle as pkl
import math
import random
import geopandas
import folium as fo

#DIR_P_PATH = os.getcwd()+'/Predict_Data/'
#DIR_T_PATH = os.getcwd()+'/Test_Data/'
DIR_P_PATH = '/STORAGE/SHARE/Intern/DA30/Predict_Data/'
DIR_T_PATH ='/STORAGE/SHARE/Intern/DA30/Test_Data/'
Test_Route = {}
Pred_Route = {}


min_lat = 33.120581  # Minimum latitude value
max_lat = 38.726809 # Maximum latitude value
min_lon = 124.896901  # Minimum longitude value
max_lon = 132.058734 # Maximum longitude value
cell_size = 0.001  # Cell size in degrees 

# Data Loading

with open(DIR_T_PATH+'Test_Route.pkl', "rb") as fd:
    Test_Route = pkl.load(fd)

with open(DIR_P_PATH+'Predict_route.pkl',"rb") as fd:
    Pred_Route = pkl.load(fd)







def Save_Visualization(self,PR,TR):
    Mymap=fo.Map()
    for j in range(len(TR)):
        TR_M_coords=[]
        Nodu_TR_M_coords=[]
        for i in range((TR[j]['lat'].index.start),(TR[j]['lat'].index.stop)):
            TR_M_coords.append(indexer.get_M_coord(indexer.get_index((TR[j]['lat'][i]),(TR[j]['lon'][i]))))
        for v in TR_M_coords:
            Nodu_TR_M_coords.append(v)
        fo.PolyLine(Nodu_TR_M_coords,color='red',tooltip="Test_Route").add_to(Mymap)
    Predictlst=[]
    Nodu_Predictlst=[]
    for q in range((PR['lat'].index.start),(PR['lat'].index.stop)):
        Predictlst.append(indexer.get_M_coord(indexer.get_index(PR['lat'][q],PR['lon'][q])))
    for v in Predictlst:
        if v not in Nodu_Predictlst:
            Nodu_Predictlst.append(v)
    fo.PolyLine(Nodu_Predictlst,color='blue',tooltip="Predict_Route").add_to(Mymap)
    Mymap.save("Route1.html")

    
        

수정이 필요한 부분

1.자료구조를 많이 사용할 필요 없이 파이썬이니까 리스트 컴프리헨션으로 간단하게 만들기

2.자주 사용되는 부분 혹은 반복되는 부분을 따로 함수화하기

3.index값도 index로 접근하는게 아니라 길이로 접근하면 더 좋았을 것. 등등..

4.객체를 새로 만드는게 아니라 한번 생성된 객체를 이용하기 위해 class 안에 집어넣어서 잘 사용하기
(RouteSimilariryCalculatro(PR,TR) 을 이용하기 위해 저 클래스에 함수 넣으면 객체를 두번 생성할 필요가 없다.)

# 궁금한점

indexer라는 객체를 생성, indexer를 가지고 있는 class에 함수를 넣어서 사용하면 왜 아까 안되었는지 -> 그래서 굳이 새로 만들 필요없이 있는곳에다가 넣어서 사용하라고 하심. 근데 이럴 경우 indexer에 있는 함수를 사용하기 위해서 indexer 객체 생성을 통해 값을 받아와야함.
-> 요걸 개선할 수 있지 않을까?


추가 사항은 DA30_test.py를 통해 보고 지금은 위의 1,2,3,4를 개선한 함수를 만들어보자


In [15]:
[indexer.get_index(i[0],i[1]) for i in zip(Pred_Route.lat,Pred_Route.lon)]

[2938065,
 2945228,
 2945228,
 2945228,
 2945228,
 2945228,
 2952390,
 2952391,
 2959553,
 2959553,
 2966715,
 2966715,
 2966715,
 2973877,
 2973877,
 2973877,
 2981039,
 2981039,
 2988201,
 2988201,
 2988201,
 2988201,
 3002525,
 3002525,
 3002525,
 3002525,
 3002524,
 3002524,
 3009686,
 3009686,
 3009686,
 3009686,
 3016848,
 3016848,
 3024010,
 3024009,
 3031171,
 3038333,
 3038333,
 3038333,
 3038333,
 3045495,
 3045495,
 3052656,
 3052656,
 3059818,
 3074142,
 3074141,
 3074141,
 3088465,
 3088465,
 3102788,
 3102788,
 3109950,
 3109950,
 3117112,
 3117112,
 3117112,
 3124273,
 3131435,
 3131435,
 3138597,
 3145759,
 3145759,
 3152920,
 3152920,
 3160082,
 3160082,
 3167244,
 3167244,
 3174405,
 3181567,
 3181567,
 3188729,
 3195891,
 3203053,
 3203053,
 3203053,
 3210214,
 3210214,
 3210214,
 3217376,
 3224538,
 3224538,
 3231700,
 3231700,
 3238861,
 3238861,
 3253185,
 3253185,
 3260347,
 3267508,
 3267508,
 3274670,
 3288993,
 3303317,
 3303317,
 3310479,
 3310479,
 3317640,


In [19]:
def cal_index(self,route):
    return [self.get_index(i[0],i[1]) for i in zip(route.lat, route.lon)]

            
def cal_M_coords(self, index):  
    return [self.get_M_coord(idx) for idx in index]

def Save_Visualization2(self,PR,TR):
    Mymap=fo.Map(location=PR[0],zoom_start=8)
    for r in self.TR:
        fo.PolyLine(self.cal_M_coords(self.cal_index(route)),tooltip='Test_route',color='red').add_to(Mymap)
    fo.PolyLine(self.cal_M_coords(self.cal_index(route)),tooltip='Predicted_route',color='blue').add_to(Mymap)
    fo.save("hi.html")
    print("파일이 저장되었습니다.")
    

30의 코드를 보면 indexer의 값을 리턴해준다.

trans_gird와 trans_m_coord에서는 indexer를 객체로 만들고 거기서 해결을 한다 나는 여기서 self의 값을 리턴한다. 

그래서 코드에 에러가 있는 것 같다. route가 정의되지 않았다는 에러가 나는데, 함 indexer를 통해서 해보자~

그래도 밑에 코드처럼 에러가 발생

에러는 잘 해결함 -> cal 함수 2개를 indexer. 메소드를 통해서 해결

그리고 indexer의 객체를 한번 생성하니까 indexer를 통해서 main 메소드에서 데이터를 저장하는 코드를 만들어봤다.




In [28]:
import pandas as pd
import numpy as np
from numpy import dot
from numpy.linalg import norm
import glob
import os
import pickle as pkl
import math
import random
import geopandas
import folium as fo

#DIR_P_PATH = os.getcwd()+'/Predict_Data/'
#DIR_T_PATH = os.getcwd()+'/Test_Data/'
DIR_P_PATH = '/STORAGE/SHARE/Intern/DA30/Predict_Data/'
DIR_T_PATH ='/STORAGE/SHARE/Intern/DA30/Test_Data/'
Test_Route = {}
Pred_Route = {}


min_lat = 33.120581  # Minimum latitude value
max_lat = 38.726809 # Maximum latitude value
min_lon = 124.896901  # Minimum longitude value
max_lon = 132.058734 # Maximum longitude value
cell_size = 0.001  # Cell size in degrees 

# Data Loading

with open(DIR_T_PATH+'Test_Route.pkl', "rb") as fd:
    Test_Route = pkl.load(fd)

with open(DIR_P_PATH+'Predict_route.pkl',"rb") as fd:
    Pred_Route = pkl.load(fd)

# Grid function
class GridIndexer:
    def __init__(self, min_lat, max_lat, min_lon, max_lon, cell_size):
        self.min_lat = min_lat
        self.max_lat = max_lat
        self.min_lon = min_lon
        self.max_lon = max_lon
        self.cell_size = cell_size
        self.num_cols = int(math.ceil((max_lon - min_lon) / cell_size)) # col_size
        self.num_rows = int(math.ceil((max_lat - min_lat) / cell_size)) # row_size
        self.grid = [[None] * self.num_cols for _ in range(self.num_rows)] # grid_size
        self.populate_grid()

    def populate_grid(self):
        index = 0
        for row in range(self.num_rows):
            for col in range(self.num_cols):
                self.grid[row][col] = index
                index += 1
                
    # get_index(lat, lon) --> return grid_num (start index: 1)
    def get_index(self, lat, lon):
        col = int((lon - self.min_lon) / self.cell_size)
        row = int((lat - self.min_lat) / self.cell_size)
        return self.grid[row][col] + 1

    # get_M_coord(grid_num) --> return median_lat, median_lon (round: 4)
    def get_M_coord(self, index):
        row = (index - 1) // self.num_cols
        col = (index - 1) % self.num_cols
        median_lat = self.min_lat + (row + 0.5) * self.cell_size
        median_lon = self.min_lon + (col + 0.5) * self.cell_size
        return round(median_lat,4), round(median_lon,4)
    
    def save_visualization(self, PR, TR):
        Mymap=fo.Map()
        for j in range(len(TR)):
            TR_M_coords=[]
            Nodu_TR_M_coords=[]
            for i in range((TR[j]['lat'].index.start),(TR[j]['lat'].index.stop)):
                TR_M_coords.append(self.get_M_coord(self.get_index((TR[j]['lat'][i]),(TR[j]['lon'][i]))))
            for v in TR_M_coords:
                Nodu_TR_M_coords.append(v)
            fo.PolyLine(Nodu_TR_M_coords,color='red',tooltip="Test_Route").add_to(Mymap)
        Predictlst=[]
        Nodu_Predictlst=[]
        for q in range((PR['lat'].index.start),(PR['lat'].index.stop)):
            Predictlst.append(self.get_M_coord(self.get_index(PR['lat'][q],PR['lon'][q])))
        for v in Predictlst:
            if v not in Nodu_Predictlst:
                Nodu_Predictlst.append(v)
        fo.PolyLine(Nodu_Predictlst,color='blue',tooltip="Predict_Route").add_to(Mymap)
        Mymap.save("Route2.html")
      
    


class CosineSimilarityCalculator:
    def __init__(self, grid_route_num_list):
        self.grid_route_num_list = grid_route_num_list
        self.all_grid_nums = sorted(set(grid_route_num_list))
        self.vector = np.array([1 if num in grid_route_num_list else 0 for num in self.all_grid_nums])
    
    def calculate_similarity(self, other_grid_route_num_list):
        other_vector = np.array([1 if num in other_grid_route_num_list else 0 for num in self.all_grid_nums])
        dot_product = np.dot(self.vector, other_vector)
        norm_product = np.linalg.norm(self.vector) * np.linalg.norm(other_vector)
        similarity = dot_product / norm_product
        return similarity

class RouteSimilarityCalculator:
    def __init__(self, PR, TR):
        self.PR = PR
        self.TR = TR
    
    def get_route_indices(self, route):
        return sorted(list(set([indexer.get_index(num[0], num[1]) for num in route])))
    
    def find_matching_routes(self):
        predict_route = self.get_route_indices([(num[0], num[1]) for num in zip(self.PR.lat, self.PR.lon)])
        
        the_other_route = []
        for route in self.TR:
            for lat, lon in zip(route.lat, route.lon):
                the_other_route.append((lat, lon))
        
        data_route = self.get_route_indices(the_other_route)
        
        res_route = []
        for p_route in predict_route:
            for d_route in data_route:
                if p_route == d_route:
                    res_route.append(d_route)
                    data_route.pop(data_route.index(d_route))
        return predict_route, res_route
    '''
    def cal_index(self,route):
        return [indexer.get_index(i[0],i[1]) for i in zip(route.lat, route.lon)]


    def cal_M_coords(self, index):  
        return [indexer.get_M_coord(idx) for idx in index]

    def Save_Visualization2(self):
        Mymap=fo.Map()
        for r in self.TR:
            fo.PolyLine(self.cal_M_coords(self.cal_index(r)),tooltip='Test_route',color='red').add_to(Mymap)
        fo.PolyLine(self.cal_M_coords(self.cal_index(self.PR)),tooltip='Predicted_route',color='blue').add_to(Mymap)
        Mymap.save("hi.html")
        print("파일이 저장되었습니다.")

    '''
    
    def calculate_cosine_similarity(self):
        predict_route, res_route = self.find_matching_routes()
        cosine_calculator = CosineSimilarityCalculator(predict_route)
        similarity = cosine_calculator.calculate_similarity(res_route)
        return similarity
    
indexer = GridIndexer(min_lat, max_lat, min_lon, max_lon, cell_size)
def main(PR, TR):

    route_similarity_calculator = RouteSimilarityCalculator(PR, TR)
    indexer.save_visualization(PR,TR)
    similarity = route_similarity_calculator.calculate_cosine_similarity()
    
    return similarity

if __name__ == '__main__':
    res = main(Pred_Route,Test_Route)
    print(f'cos simiarity: {res}')


cos simiarity: 0.8569633695778818
