In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [1]:
import os
import json
import requests

import pandas as pd
import geopandas as gpd

from shapely.geometry import Point, LineString

In [None]:
if os.path.exists('/content/drive/My Drive/Capstone/Data'):
    base_dir = '/content/drive/My Drive/Capstone/Data'
else:
    base_dir = os.path.abspath(os.path.join(os.getcwd(), '..', 'Data'))

/content/drive/My Drive/Capstone/Codes


### Getting the Transport Line Info

In [None]:
all_items = []
all_stops = gpd.GeoDataFrame(columns=['geometry'], crs='EPSG:4326')
all_routes = gpd.GeoDataFrame(columns=['geometry'], crs='EPSG:4326')

In [None]:
import requests

cookies = {
    'maps_los': '1',
    'is_gdpr': '0',
    'is_gdpr_b': 'COiFURCktQI=',
    '_yasc': 'dchhoxKe1hjKtLYCnWfEBd3TtFsb8cEPLKwaApMhYObbauldOr0XHfLhkzAXeNnfGVNrcGU=',
    'i': 'qWgzidZiRzavYtmh4Mb2ib4HPUSTZKFhq842XGhIKPVH/ZXpiRRHrPVNn4ZQ/lL2OopUZ0jwhECnYElwD8uvqfCUOSo=',
    'yandexuid': '2231125781742116189',
    'yashr': '899727881742116189',
    'receive-cookie-deprecation': '1',
    'bh': 'YMuoi8EGahbcyuH/CJLYobEDn8/14QyD0vKOA5t7',
    'yuidss': '2231125781742116189',
    '_ym_uid': '174211619293781203',
    '_ym_d': '1742116193',
    'cycada': 'RWOXZ/+UpcQGOgQmGBXXvJ/6LGIxb4ZbU6QcSl/dbp0=',
    'maps_routes_travel_mode': 'masstransit',
    'yabs-vdrf': 'A0',
    'gdpr': '0',
    '_ym_isad': '2',
}

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:138.0) Gecko/20100101 Firefox/138.0',
    'Accept': '*/*',
    'Accept-Language': 'en-US,en;q=0.5',
    # 'Accept-Encoding': 'gzip, deflate, br, zstd',
    'Referer': 'https://yandex.com/maps/10262/yerevan/routes/bus_29/796d617073626d313a2f2f7472616e7369742f6c696e653f69643d34373134373237313333266c6c3d34342e34393831353725324334302e313534363933266e616d653d323926723d313230303026747970653d627573/?ll=44.538816%2C40.189420&tab=stops&z=14.81',
    'X-Retpath-Y': 'https://yandex.com/maps/10262/yerevan/routes/bus_29/796d617073626d313a2f2f7472616e7369742f6c696e653f69643d34373134373237313333266c6c3d34342e34393831353725324334302e313534363933266e616d653d323926723d313230303026747970653d627573/?ll=44.538816%2C40.189420&tab=stops&z=14.81',
    'Connection': 'keep-alive',
    # 'Cookie': 'maps_los=1; is_gdpr=0; is_gdpr_b=COiFURCktQI=; _yasc=dchhoxKe1hjKtLYCnWfEBd3TtFsb8cEPLKwaApMhYObbauldOr0XHfLhkzAXeNnfGVNrcGU=; i=qWgzidZiRzavYtmh4Mb2ib4HPUSTZKFhq842XGhIKPVH/ZXpiRRHrPVNn4ZQ/lL2OopUZ0jwhECnYElwD8uvqfCUOSo=; yandexuid=2231125781742116189; yashr=899727881742116189; receive-cookie-deprecation=1; bh=YMuoi8EGahbcyuH/CJLYobEDn8/14QyD0vKOA5t7; yuidss=2231125781742116189; _ym_uid=174211619293781203; _ym_d=1742116193; cycada=RWOXZ/+UpcQGOgQmGBXXvJ/6LGIxb4ZbU6QcSl/dbp0=; maps_routes_travel_mode=masstransit; yabs-vdrf=A0; gdpr=0; _ym_isad=2',
    'Sec-Fetch-Dest': 'empty',
    'Sec-Fetch-Mode': 'cors',
    'Sec-Fetch-Site': 'same-origin',
    'Priority': 'u=0',
    # Requests doesn't support trailers
    # 'TE': 'trailers',
}

params = {
    'ajax': '1',
    'csrfToken': 'da7278ca674e615d1f05578e3b50ceff9c195d30:1747113035',
    'lang': 'en',
    'lineId': '4714727133',
    'locale': 'en_AM',
    'mobileViewState': 'mini',
    'openedBy[stopId]': '1788012836',
    'openedBy[vehicleId]': 'yerevan2|29R5506N',
    's': '2002483088',
    'sessionId': '1747113035105633-899359693392209765-balancer-l7leveler-kubr-yp-vla-56-BAL',
    'threadId': '4714727743',
}

response = requests.get('https://yandex.com/maps/api/masstransit/getLine', params=params, cookies=cookies, headers=headers)

In [None]:
features = response.json()['data']['features']

In [None]:
def extract_yandex_data(features):
    """
    Parses a list of Yandex transport features to extract structured data for routes, stops, and metadata items.

    Args:
        features (list): A list of dictionaries representing Yandex transport JSON features,
                         each containing metadata and route segment information.

    Returns:
        tuple: A tuple containing three lists:
            - items (list of dict): Metadata for each transport line (IDs, names, types, etc.).
            - stops (list of dict): Stop-level information including geometry and IDs.
            - routes (list of dict): Route segments with start and end stop names, and LineString geometry.
    """
    items = []
    stops = []
    routes = []

    for obj in features:
        item = {}
        thread_meta = obj['properties']['ThreadMetaData']
        item['transport_id'] = thread_meta['id']
        item['line_id'] = thread_meta['lineId']
        item['name'] = thread_meta['name']
        item['type'] = thread_meta['type']
        item['essential_stop_id_1'] = thread_meta['EssentialStops'][0]['id']
        item['essential_stop_name_1'] = thread_meta['EssentialStops'][0]['name']
        item['essential_stop_id_2'] = thread_meta['EssentialStops'][1]['id']
        item['essential_stop_name_2'] = thread_meta['EssentialStops'][1]['name']

        if 'breadcrumbs' in thread_meta:
            item['line_link'] = thread_meta['breadcrumbs'][-1]['url']

        route_features = obj['features']
        stop_features = [f for f in route_features if 'id' in f and 'coordinates' in f]
        segment_features = [f for f in route_features if 'points' in f]

        for stop in stop_features:
            stops.append({
                'transport_id': thread_meta['id'],
                'line_id': thread_meta['lineId'],
                'stop_id': stop['id'],
                'stop_name': stop['name'],
                'geometry': Point([float(stop['coordinates'][0]), float(stop['coordinates'][1])])
            })

        for j, segment in enumerate(segment_features):
            points = segment['points']
            pts = [Point(float(p[0]), float(p[1])) for p in points]
            line = LineString(pts)

            if j < len(stop_features) - 1:
                routes.append({
                    'transport_id': thread_meta['id'],
                    'transport_name': thread_meta['name'],
                    'line_id': thread_meta['lineId'],
                    'start_id': stop_features[j]['id'],
                    'start_name': stop_features[j]['name'],
                    'end_id': stop_features[j + 1]['id'],
                    'end_name': stop_features[j + 1]['name'],
                    'geometry': line
                })

        items.append(item)
    return items, stops, routes

In [None]:
items, routes, stops = extract_yandex_data(features)
stops = gpd.GeoDataFrame(stops, geometry='geometry', crs='EPSG:4326')
routes = gpd.GeoDataFrame(routes, geometry='geometry', crs='EPSG:4326')

In [None]:
all_items = all_items + items
all_stops = gpd.GeoDataFrame(pd.concat([all_stops, stops], ignore_index=True))
all_routes = gpd.GeoDataFrame(pd.concat([all_routes, routes], ignore_index=True))

In [None]:
all_stops.to_file(os.path.join(base_dir, "yandex_scraped/full_stops.geojson"), driver='GeoJSON')
all_routes.to_file(os.path.join(base_dir, "yandex_scraped/full_routes.geojson"), driver='GeoJSON')
with open(os.path.join(base_dir, "yandex_scraped/full_items.json"), 'w', encoding='utf-8') as f:
    json.dump(all_items, f, ensure_ascii=False, indent=2)