# Задание
* Собрать информацию о всех строящихся объектах на сайте "наш.дом.рф"
* Cохранить ее в pandas dataframe
* Cохранить pandas dataframe в excel
* Cохранить pandas dataframe в pickle
* Cохранить pandas dataframe в БД

In [63]:
import requests

## Список объектов

In [2]:
objects_data = {'data': {'list':[], 'total': 10000}, 'errcode': '0'}
offset_ = 0
limit_ = 2000

In [3]:
while offset_ < objects_data['data']['total']:
    url = f'https://xn--80az8a.xn--d1aqf.xn--p1ai/%D1%81%D0%B5%D1%80%D0%B2%D0%B8%D1%81%D1%8B/api/kn/object?offset={offset_}&limit={limit_}&sortField=devId.devShortCleanNm&sortType=asc&objStatus=0'
    res = requests.get(url)
    json = res.json()
    if int(json['errcode']) != 0:
        break
    objects_data['data']['list'] += json['data']['list']
    objects_data['data']['total'] = json['data']['total']
    offset_ += limit_

In [None]:
objects_list = objects_data.get('data').get('list')
objects_list

In [None]:
len(objects_list)

In [6]:
import pandas as pd
df = pd.json_normalize(objects_list)

### Сохранение

In [7]:
df.to_excel('dmrf_all_objects.xlsx', index=False)

In [8]:
df.to_pickle('dmrf_all_objects.pkl')

In [9]:
import sqlite3
conn = sqlite3.connect('objects')

In [10]:
df.to_sql('objects', conn, if_exists='replace', index = False)
conn.commit()

### Проверка сохраненных данных

In [11]:
df_excel = pd.read_excel('dmrf_all_objects.xlsx')

In [None]:
df_excel == df

In [13]:
df_pickle = pd.read_pickle("dmrf_all_objects.pkl")

In [None]:
df_pickle == df

In [15]:
df_sql = pd.read_sql('select * from objects', conn)

In [None]:
df_sql == df

In [17]:
conn.close()

# Задание *
* Написать скрипт, который скачивает фотографии всех строящихся объектов с сайта "наш.дом.рф"

In [65]:
import asyncio
import os
import aiofiles
from  aiohttp import  ClientSession

In [66]:
single_object_url_template = 'https://xn--80az8a.xn--d1aqf.xn--p1ai/%D1%81%D0%B5%D1%80%D0%B2%D0%B8%D1%81%D1%8B/api/object/{}'

In [67]:
async def fetch_photo_render_dto(obj_url: str, session: ClientSession):
    async with session.get(obj_url) as response:
        result =  await response.json()
        return result['data']['photoRenderDTO']

In [68]:
async def fetch_image(image_url: str, session: ClientSession, image_name: str):
    async with session.get(image_url) as response:
        result = await response.read()
        return image_name, result

In [69]:
async def fetch_photo_render_dto_list(template_url: str, obj_ids: [str], session: ClientSession):
    tasks = []
    for obj_id in obj_ids:
        obj_url = template_url.format(obj_id)
        tasks.append(asyncio.create_task(fetch_photo_render_dto(obj_url, session)))
    photo_render_dto_list = await asyncio.gather(*tasks)
    photo_render_dto_list = [item for sublist in photo_render_dto_list for item in sublist]
    return photo_render_dto_list

In [70]:
async def fetch_image_list(photo_render_dto_list: [dict], session: ClientSession):
    tasks = []
    for photoDTO in photo_render_dto_list:
        url = photoDTO['objRenderPhotoUrl']
        tasks.append(asyncio.create_task(fetch_image(url, session, f'{photoDTO["objId"]}_{photoDTO["objRenderPhotoNm"]}')))
    images = await asyncio.gather(*tasks)
    return images

In [71]:
async def save_images(image_list: [tuple], directory: str):
    for image_name, data in image_list:
            f = await aiofiles.open(f"{directory}/{image_name}", mode='wb')
            await f.write(data)
            await f.close()

#### Т.к 10000 тысяч запросов будут выполняться как минут 10, то ограничимся 10ю id (это ~ 40 картинок)

In [72]:
obj_ids = df.objId[:10]
directory = "images"
os.mkdir(directory)
async with ClientSession() as session:
        photo_render_dtos = await fetch_photo_render_dto_list(single_object_url_template, obj_ids, session)
        images = await fetch_image_list(photo_render_dtos, session)
        await save_images(images, directory)