## Yandex organization API

This notebook works with [Yandex Organization Search API](https://tech.yandex.ru/maps/geosearch/doc/concepts/request-docpage/), you have to get an API key to replicate the process.

Free limit with API key is 500 requests per day, 500 organizations in each request.

Retrieving organization points with coordinates according to given list of types within bounding box; bounding box is based on city borders from OSM.

In [2]:
import pandas as pd
import requests

import geopandas as gpd

from shapely.geometry import Point, LineString, Polygon
from shapely.geometry import box
import osmnx as ox
from pyproj import CRS

import matplotlib.pyplot as plt
import numpy as np

### importing data

In [3]:
# reading api keys
api_keys = pd.read_excel('../api_keys.xlsx')
api_keys.set_index('key_name', inplace=True)

# API Yandex organization search
search_api_key = api_keys.loc['yandex_search']['key']

In [4]:
# long place name for geocoding in OSM
place_name = 'городское поселение Альметьевск'

In [5]:
# Get place boundary related to the place name as a geodataframe
area = ox.geocode_to_gdf(place_name)

#grid cells CRS
area.crs = CRS.from_epsg(4326).to_wkt()

# Check data values
area

Unnamed: 0,geometry,place_name,bbox_north,bbox_south,bbox_east,bbox_west
0,"POLYGON ((52.20360 54.92292, 52.20461 54.91989...","Almetyevsk, Almetyevsky District, Tatarstan, V...",54.950882,54.850074,52.462423,52.203597


In [6]:
# south-west bounding box coordinates from OSM
sw_lat, sw_lon = area['bbox_south'][0], area['bbox_west'][0]

# north-east bounding box coordinates from OSM
ne_lat, ne_lon  = area['bbox_north'][0], area['bbox_east'][0]

In [7]:
# lists of items to request

education_list = ['детский сад', 'школа']
commercial_list = ['кафе', 'бар', 'супермаркет']

### request

In [8]:
# in case we go out of limit with skip counts

print(sw_lon, sw_lat, ne_lon, ne_lat)

def split_into_quadrants(sw_lon, sw_lat, ne_lon, ne_lat):
    for slon in [sw_lon, (sw_lon + ne_lon) / 2]:
        for slat in [sw_lat, (sw_lat + ne_lat) / 2]:
            yield slon, slat, slon + (ne_lon - sw_lon) / 2, slat + (ne_lat - sw_lat) / 2

print("\n".join(map(str, list(split_into_quadrants(sw_lon, sw_lat, ne_lon, ne_lat)))))

52.2035972 54.8500741 52.4624232 54.9508817
(52.2035972, 54.8500741, 52.333010200000004, 54.9004779)
(52.2035972, 54.9004779, 52.333010200000004, 54.9508817)
(52.333010200000004, 54.8500741, 52.4624232, 54.9004779)
(52.333010200000004, 54.9004779, 52.4624232, 54.9508817)


In [9]:
def split_single_query_request(search_query, sw_lon, sw_lat, ne_lon, ne_lat):
    print(f"Splitting {search_query} by 4 at", sw_lon, sw_lat, ne_lon, ne_lat)
    results = []
    for (lon1, lat1, lon2, lat2) in split_into_quadrants(sw_lon, sw_lat, ne_lon, ne_lat):
        results += single_query_request(search_query, lon1, lat1, lon2, lat2)
    return results


def single_query_request(search_query, sw_lon, sw_lat, ne_lon, ne_lat):
    URL = 'https://search-maps.yandex.ru/v1/'
    bbox = f"{sw_lon:.6f},{sw_lat:.6f}~{ne_lon:.6f},{ne_lat:.6f}"
    results = []
    for skip_cnt in [0, 500, 1000]:
        params = { 
            'text' : search_query, 
            'apikey': search_api_key,
            'lang': 'ru_RU',
            'type':'biz',
            'bbox': bbox,
            'rspn': 1,
            'results': 500,
            'skip': skip_cnt
        }
        response = requests.get(URL, params=params)
        response_json = response.json()
        results += response_json['features']
        feature_count = len(response_json['features'])
        if feature_count < 500:
            break
    else:
        return split_single_query_request(search_query, sw_lon, sw_lat, ne_lon, ne_lat)
    return results

In [12]:
def organization_request(item_type):
           
    results = []
    
    # request
    for search_query in item_type:
        results += single_query_request(search_query, sw_lon, sw_lat, ne_lon, ne_lat)

    # take only unique values is results

    result_dict = {}
    for result in results:
        result_dict[result['properties']['CompanyMetaData']['id']] = result
    results_unique = list(result_dict.values())

    # transform json, delete exceed columns, create category columns
    for res in results_unique:
        res.update(res['properties'])
        res['lon'], res['lat'] = res['geometry']['coordinates']
        res.update(res['CompanyMetaData'])
        if 'class' not in res['Categories'][0]:
            print("No 'class' in category: ", res['Categories'][0])
        res['category'] = res['Categories'][0].get('class', 'UNKNOWN')
        res['category_name'] = res['Categories'][0].get('name', 'UNKNOWN')
        for cat in res['Categories']:
            res['cat_' + cat.get('class', 'UNKNOWN').replace(' ', '_')] = True
        for key in ['properties', 'CompanyMetaData', 'boundedBy', 'type', 'geometry', 'url', 'Phones', 'Hours', 'Categories']:
            if key in res:
                del res[key]

    # create dataframe out of transformed results
    df = pd.DataFrame(results_unique)
    
    return df

### saving result

In [13]:
# create csv files for every category in request

cat_list = ['education_list', 'commercial_list']

for item in cat_list:
    df = organization_request(item)
    df.to_csv("./output/{}.csv".format(str(item)))