works with https://tech.yandex.ru/maps/geosearch/doc/concepts/request-docpage/

In [1]:
import pandas as pd
import folium
import requests

### importing data

In [2]:
# reading api keys
api_keys = pd.read_excel('../api_keys.xlsx')
api_keys.set_index('key_name', inplace=True)

# link for mapbox map as a underlay for folium
map_url = api_keys.loc['mapbox_map']['key']

# API Yandex organization search
search_api_key = api_keys.loc['yandex_search']['key']

In [3]:
# coords table
coords = pd.read_excel('./input_params.xlsx', sheet_name = 'coords')
coords.set_index('point_name', inplace=True)

def extract_lon_lat(location):
    loc = coords.loc[location]
    return loc['lon'], loc['lat']

# center point coords
center_lon, center_lat = extract_lon_lat('center')

# south-west bounding box coordinates
sw_lon, sw_lat = extract_lon_lat('southwest')

# north-east bounding box coordinates
ne_lon, ne_lat = extract_lon_lat('northeast')

### <font color='red'>need to change list for requests and add cycle for several lists</font>

In [4]:
# list of items to request

list_for_query = ['кафе', 'ресторан', 'школа', 'парикмахерская']

### request

In [5]:
# request
URL = 'https://search-maps.yandex.ru/v1/'
bbox = f"{sw_lon:.6f},{sw_lat:.6f}~{ne_lon:.6f},{ne_lat:.6f}"

results = []
for search_query in list_for_query:
    for skip_cnt in range(0, 25000, 500):
        params = { 
            'text' : search_query, 
            'apikey': search_api_key,
            'lang': 'ru_RU',
            'type':'biz',
            'bbox': bbox,
            'rspn': 1,
            'results': 500,
            'skip': skip_cnt
        }
        response = requests.get(URL, params=params)
        response_json = response.json()
        results += response_json['features']
        feature_count = len(response_json['features'])
        if feature_count < 500:
            break

In [7]:
# take only unique values is results

result_dict = {}
for result in results:
    result_dict[result['properties']['CompanyMetaData']['id']] = result
results_unique = list(result_dict.values())

In [8]:
# transform json, delete exceed columns, create category columns

for res in results_unique:
    res.update(res['properties'])
    res['lon'], res['lat'] = res['geometry']['coordinates']
    res.update(res['CompanyMetaData'])
    res['category'] = res['Categories'][0]['class']
    res['category_name'] = res['Categories'][0]['name']
    for cat in res['Categories']:
        res['cat_' + cat['class'].replace(' ', '_')] = True
    for key in ['properties', 'CompanyMetaData', 'boundedBy', 'type', 'geometry', 'url', 'Phones', 'Hours', 'Categories']:
        if key in res:
            del res[key]

In [9]:
# create dataframe out of transformed results
df = pd.DataFrame(results_unique)
df.head()

Unnamed: 0,name,description,lon,lat,id,address,category,category_name,cat_cafe,cat_fallback_services,...,cat_industrial_enterprise,cat_hotels,cat_malls,cat_college,cat_fitness,cat_kindergarten,cat_office_service,cat_medicine,cat_hairdressers,cat_spa
0,Матрешка,"8, 96-й квартал, Ангарск, Россия",103.877732,52.523456,44339200146,"Россия, Иркутская область, Ангарск, 96-й кварт...",cafe,Столовая,True,,...,,,,,,,,,,
1,Хэштег,"26, 13-й микрорайон, Ангарск, Россия",103.866628,52.518456,201664324825,"Россия, Иркутская область, Ангарск, 13-й микро...",fallback services,Доставка еды и обедов,,True,...,,,,,,,,,,
2,Гагарин,"44, 22-й микрорайон, Ангарск, Россия",103.851133,52.51275,22944307632,"Россия, Иркутская область, Ангарск, 22-й микро...",cafe,Кафе,True,,...,,,,,,,,,,
3,Тбилиси,"ул. Крупской, 19, Ангарск, Россия",103.880632,52.521496,1772636194,"Россия, Иркутская область, Ангарск, улица Круп...",cafe,Кафе,True,,...,,,,,,,,,,
4,Оливка,"6А, 29-й микрорайон, Ангарск, Россия",103.860735,52.531374,1683546995,"Россия, Иркутская область, Ангарск, 29-й микро...",cafe,Кафе,True,,...,,,,,,,,,,


In [None]:
df.to_csv('./example.csv')