In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd
import shapely
from shapely.geometry import Point, LineString, MultiLineString, Polygon, MultiPolygon
from tqdm.notebook import tqdm
import time
from datetime import datetime

import requests
import re
from bs4 import BeautifulSoup
import json
import urllib

In [2]:
transp_reg = gpd.read_file('../data/from/transp_reg_st.shp',
                        encoding='utf-8')
# transp_reg.crs = 'epsg:53004'
transp_reg = transp_reg.to_crs('epsg:4326')

In [3]:
def find_js(lnk):
    with requests.get(lnk, stream=True, timeout=25) as req:
        bs = BeautifulSoup(req.text, 'html.parser')
        all_scrpt = bs.find_all("script")
        for scr in all_scrpt:
            if scr.get('type') == "application/json":
                sc_ind = all_scrpt.index(scr)
                break
            else:
                sc_ind=0
        # 
        scrp_txt = all_scrpt[sc_ind].text
        js = json.loads(scrp_txt)

        return js

In [4]:
def get_poi(word, srch_pt, poly):

    str_qt = urllib.parse.quote(word)

    coords1 = str(str(srch_pt.coords[0][0]) + "%2C" + str(srch_pt.coords[0][1]))
    coords2 = coords1

    url = "https://yandex.ru/maps/51/samara/search/{str_qt}/?ll={coords1}&sll={coords2}&sspn=0.015134%2C0.006711&z=15.96".format(str_qt=str_qt,
                                                                                                                                       coords1=coords1,
                                                                                                                                       coords2=coords2)
    # 
    js = find_js(url)

    fnd_items = js['searchPreloadedResults']['items']

    if len(fnd_items) == 0:
        lst_res = [word+'_not_found', None, srch_pt]
    else:
        i=0
        for i in range(len(fnd_items)):
            fnd_pt = Point(fnd_items[i]['coordinates'])
            if poly.intersects(fnd_pt):
                name = fnd_items[0]['title']
                lst_res = [word, name, fnd_pt]
                break
            else:
                lst_res = [word+'_not_found', None, srch_pt]
    # 
    return lst_res

In [5]:
def get_reg_centre(reg, lst_words):

    poly = reg.geometry[0]
    srch_pt = poly.centroid
    reg_id = reg.NO[0]

    bg_lst_found = []
    i=0
    for i in range(len(lst_words)):
        word = lst_words[i]
        try:
            lst_res = [reg_id] + get_poi(word, srch_pt, poly)
        except:
            time.sleep(30)
            lst_res = [reg_id] + get_poi(word, srch_pt, poly)
        bg_lst_found.append(lst_res)
    # 
    return bg_lst_found

In [6]:
def get_dist_n_time(js, tp_ts):

    if tp_ts == 'auto':
        distnc = round(js['routerResponse']['routes'][0]['distance']['value'] / 1000, 2)
        tm_avg = round(js['routerResponse']['routes'][0]['duration'] / 60, 1)
        tm_trfc = round(js['routerResponse']['routes'][0]['durationInTraffic'] / 60, 1)
    else:
        tm_avg = round(js['routerResponse']['routes'][0]['duration'] / 60, 1)
        distnc = None
        tm_trfc = None
    # 
    lst_dist_time = [distnc, tm_avg, tm_trfc]

    return lst_dist_time

In [7]:
def make_str_coords(one_reg_pt, two_reg_pt):
    coord_str = str(str(one_reg_pt.coords[0][1]) 
                    + "%2C" 
                    + str(one_reg_pt.coords[0][0]) 
                    + "~" 
                    + str(two_reg_pt.coords[0][1]) 
                    + "%2C" 
                    + str(two_reg_pt.coords[0][0]))
    # 
    return coord_str

In [8]:
lst_words = ['Торговый центр',
                'Железнодорожная станция',
                'Автостанция',
                'Площадь',
                'Больница',
                'ВУЗ',
                'Завод']

In [216]:
# bg_lst_pts = []

# i=0
# # for i in tqdm(range(2)):
# for i in tqdm(range(len(transp_reg))):
#     reg = transp_reg.iloc[[i]].reset_index(drop=True)
#     bg_lst_found = get_reg_centre(reg, lst_words)
#     bg_lst_pts = bg_lst_pts + bg_lst_found
# # 

HBox(children=(FloatProgress(value=0.0, max=115.0), HTML(value='')))




In [217]:
# clmns = ['reg_id', 'grp_pt', 'name_pt', 'geometry']
# df = pd.DataFrame(bg_lst_pts, columns = clmns)
# gdf_pts = gpd.GeoDataFrame(df, geometry='geometry')
# gdf_pts.crs='epsg:4326'

In [218]:
# gdf_pts.to_file('./data/res/gdf_pts_all.json', driver='GeoJSON', encoding='utf-8')

In [3]:
gdf_pts_select = gpd.read_file('../data/res/gdf_pts_all_select.geojson',
                        encoding='utf-8')

In [11]:
def get_rt_data(one_reg_pt, two_reg_pt, reg_id_from, reg_id_to):

    coord_str = make_str_coords(one_reg_pt, two_reg_pt)
    str_coord = str(one_reg_pt.coords[0][1]) + "%2C"  +  str(one_reg_pt.coords[0][0])
    
    # tp_ts_mt="mt"
    url_mt = "https://yandex.ru/maps/51/samara/?ll={}&mode=routes&rtext={}&rtt=mt&ruri=~&z=14.41". format(str_coord, coord_str)
    # tp_ts_auto = "auto"
    url_auto = "https://yandex.ru/maps/51/samara/?ll={}&mode=routes&rtext={}&rtt=auto&ruri=~&z=14.41". format(str_coord, coord_str)
    
    js_mt = find_js(url_mt)
    js_auto = find_js(url_auto)
    lst_dist_time_auto = get_dist_n_time(js_auto, 'auto')
    try:
        lst_dist_time_mt = get_dist_n_time(js_mt, 'mt')
    except:
        lst_dist_time_mt = [None, None, None]
    x_from = one_reg_pt.x
    y_from = one_reg_pt.y
    x_to = two_reg_pt.x
    y_to = two_reg_pt.y
    
    line_geo = LineString(js_auto['routerResponse']['routes'][0]['coordinates'])
    
    lst_one_data = [[reg_id_from, reg_id_to] 
                    + lst_dist_time_auto 
                    + [lst_dist_time_mt[1]] + [x_from, y_from, x_to, y_to] + [line_geo]]
    #
    return lst_one_data

In [143]:
def get_mt_rt_data(one_reg_pt, two_reg_pt, reg_id_from, reg_id_to):
    coord_str = make_str_coords(one_reg_pt, two_reg_pt)
    
    str_coord = str(one_reg_pt.coords[0][0]) + "%2C" + str(one_reg_pt.coords[0][1])

    avoid_tp = '&routes%5BavoidTypes%5D=railway%2Cwater'

    # tp_ts_mt="mt"
    url_mt = "https://yandex.ru/maps/51/samara/?ll={}&mode=routes{}&rtext={}&rtt=mt&ruri=~&z=14.41". format(str_coord, avoid_tp, coord_str)
    
    js_mt = find_js(url_mt)

    rt_segm = js_mt['routerResponse']['routes'][0]['paths'][0]['segments']

    lst_tp_seq = []
    cnt=1
    i=0
    for i in (range(len(rt_segm))):
        if rt_segm[i]['type'] == 'transport':
            one_rt = rt_segm[i]['transports'][0]
            line_id = one_rt['id']
            rt_type = one_rt['type']
            rt_name = one_rt['name']
            distnc = round(rt_segm[i]['distance']['value'] / 1000, 2)
            lst_tp_seq.append([reg_id_from, reg_id_to, cnt, line_id, rt_type, rt_name, distnc])
            cnt+=1
    #
    return lst_tp_seq

In [12]:
lst_from = list(gdf_pts_select.reg_id.unique())
lst_to = list(gdf_pts_select.reg_id.unique())

In [144]:

lst_total_data=[]
lst_done=[]


j=0
for j in tqdm(range(1)):
# for j in tqdm(range(len(lst_from))):

    i=0
    for i in tqdm(range(4)):
#     for i in tqdm(range(len(lst_to))):
    
        reg_id_from, reg_id_to = lst_from[i], lst_to[j]
        
        dnpr = str(reg_id_from) + "_" + str(reg_id_to)
        if dnpr not in lst_done:
            lst_done.append(dnpr)
            if (reg_id_from != reg_id_to) & ((reg_id_from != 532) & (reg_id_to != 532)):

                one_reg_pt = gdf_pts_select[gdf_pts_select.reg_id 
                                            == reg_id_from].reset_index(drop=True).geometry[0]
                two_reg_pt = gdf_pts_select[gdf_pts_select.reg_id 
                                            == reg_id_to].reset_index(drop=True).geometry[0]
                try:
                    lst_one_data = get_mt_rt_data(one_reg_pt, two_reg_pt, reg_id_from, reg_id_to)
                    lst_total_data = lst_total_data + lst_one_data
                except:
                    time.sleep(30)
                    try:
                        lst_one_data = get_mt_rt_data(one_reg_pt, two_reg_pt, reg_id_from, reg_id_to)
                        lst_total_data = lst_total_data + lst_one_data
                    except:
                        print(reg_id_from,reg_id_to)
                        pass
                try:
                    lst_one_data2 = get_mt_rt_data(two_reg_pt, one_reg_pt, reg_id_to, reg_id_from)
                    lst_total_data = lst_total_data + lst_one_data2
                except:
                    time.sleep(30)
                    try:
                        lst_one_data2 = get_mt_rt_data(two_reg_pt, one_reg_pt, reg_id_to, reg_id_from)
                        lst_total_data = lst_total_data + lst_one_data2
                    except:
                        print(reg_id_to, reg_id_from)
                        pass

        #
# 

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))





In [145]:
clmns=['id_reg_from', 'id_reg_to', 'seq_in_rt', 'line_id', 
       'tp_ts', 'name', 'distance']
df_rt = pd.DataFrame(lst_total_data, columns = clmns)

In [84]:
df_rt.to_csv('./data/res/df_rt_mt1.csv', sep=';', encoding='utf-8-sig', index=False)