From abc60bc7f5cff8ba4429e70343209181017b8b23 Mon Sep 17 00:00:00 2001 From: Adrian <20292956+qwertyadrian@users.noreply.github.com> Date: Thu, 13 Feb 2020 22:45:36 +0300 Subject: [PATCH 01/16] =?UTF-8?q?=D0=98=D0=B7=D0=BC=D0=B5=D0=BD=D0=B8?= =?UTF-8?q?=D0=BD=D0=B8=D0=B5=20VkAudio=20=D0=B2=20=D1=81=D0=BE=D0=BE?= =?UTF-8?q?=D1=82=D0=B2=D0=B5=D1=81=D1=82=D0=B2=D0=B8=D0=B8=20=D1=81=20?= =?UTF-8?q?=D0=B8=D0=B7=D0=BC=D0=B5=D0=BD=D0=B5=D0=BD=D0=B8=D1=8F=D0=BC?= =?UTF-8?q?=D0=B8=20=D0=BD=D0=B0=20=D1=81=D1=82=D0=BE=D1=80=D0=BE=D0=BD?= =?UTF-8?q?=D0=B5=20=D0=92=D0=9A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- vk_api/audio.py | 77 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 53 insertions(+), 24 deletions(-) diff --git a/vk_api/audio.py b/vk_api/audio.py index 0a69d26e..c8739aaf 100644 --- a/vk_api/audio.py +++ b/vk_api/audio.py @@ -7,6 +7,8 @@ """ import re +import json +import time from itertools import islice from bs4 import BeautifulSoup @@ -18,6 +20,7 @@ RE_ALBUM_ID = re.compile(r'act=audio_playlist(-?\d+)_(\d+)') RE_ACCESS_HASH = re.compile(r'access_hash=(\w+)') RE_M3U8_TO_MP3 = re.compile(r'/[0-9a-f]+(/audios)?/([0-9a-f]+)/index.m3u8') +RPS_DELAY = 1.5 TRACKS_PER_USER_PAGE = 50 TRACKS_PER_ALBUM_PAGE = 100 @@ -119,7 +122,8 @@ def get_iter(self, owner_id=None, album_id=None, access_hash=None): tracks = scrap_data( response.text, self.user_id, - filter_root_el={'class_': 'audioPlaylist__list'} if album_id else None + filter_root_el={'class_': 'audioPlaylist__list'} if album_id else None, + http=self._vk.http ) if not tracks: @@ -216,7 +220,8 @@ def search_user(self, owner_id=None, q=''): tracks = scrap_data( response.text, self.user_id, - filter_root_el={'class_': 'AudioSerp__foundOwned'} + filter_root_el={'class_': 'AudioSerp__found'}, + http=self._vk.http ) return [track for track in tracks if track['owner_id'] == owner_id] @@ -247,7 +252,7 @@ def search_iter(self, q, offset=0): } ) - tracks = scrap_data(response.text, self.user_id) + tracks = scrap_data(response.text, self.user_id, http=self._vk.http) if not tracks: break @@ -288,13 +293,14 @@ def get_post_audio(self, owner_id, post_id): tracks = scrap_data( response.text, self.user_id, - filter_root_el={'class': 'audios_list'} + filter_root_el={'class': 'audios_list'}, + http=self._vk.http ) return tracks -def scrap_data(html, user_id, filter_root_el=None, convert_m3u8_links=True): +def scrap_data(html, user_id, filter_root_el=None, convert_m3u8_links=True, http=None): """ Парсинг списка аудиозаписей из html страницы """ if filter_root_el is None: @@ -302,6 +308,9 @@ def scrap_data(html, user_id, filter_root_el=None, convert_m3u8_links=True): soup = BeautifulSoup(html, 'html.parser') tracks = [] + ids = [] + + last_request = 0.0 root_el = soup.find(**filter_root_el) @@ -312,29 +321,49 @@ def scrap_data(html, user_id, filter_root_el=None, convert_m3u8_links=True): if 'audio_item_disabled' in audio['class']: continue - artist = audio.select_one('.ai_artist').text - title = audio.select_one('.ai_title').text - duration = int(audio.select_one('.ai_dur')['data-dur']) - full_id = tuple( - int(i) for i in audio['data-id'].split('_') + data_audio = json.loads(audio['data-audio']) + data_audio[13] = re.sub('(/+)', '/', data_audio[13].strip('/')).split('/')[-2:] + + full_id = ( + str(data_audio[1]), str(data_audio[0]), data_audio[13][0], data_audio[13][1] ) - link = audio.select_one('.ai_body').input['value'] + ids.append(full_id) - if 'audio_api_unavailable' in link: - link = decode_audio_url(link, user_id) + for ids_group in [ids[i:i + 10] for i in range(0, len(ids), 10)]: + delay = RPS_DELAY - (time.time() - last_request) - if convert_m3u8_links and 'm3u8' in link: - link = RE_M3U8_TO_MP3.sub(r'\1/\2.mp3', link) + if delay > 0: + time.sleep(delay) - tracks.append({ - 'id': full_id[1], - 'owner_id': full_id[0], - 'url': link, - - 'artist': artist, - 'title': title, - 'duration': duration, - }) + result = http.post( + 'https://m.vk.com/audio', + data={'act': 'reload_audio', 'ids': ','.join(['_'.join(i) for i in ids_group])} + ).json() + + last_request = time.time() + if result['data']: + data_audio = result['data'][0] + for audio in data_audio: + artist = BeautifulSoup(audio[4], 'html.parser').text + title = BeautifulSoup(audio[3].strip(), 'html.parser').text + duration = audio[5] + link = audio[2] + + if 'audio_api_unavailable' in link: + link = decode_audio_url(link, user_id) + + if convert_m3u8_links and 'm3u8' in link: + link = RE_M3U8_TO_MP3.sub(r'\1/\2.mp3', link) + + tracks.append({ + 'id': audio[0], + 'owner_id': audio[1], + 'url': link, + + 'artist': artist, + 'title': title, + 'duration': duration, + }) return tracks From 0e30675c1674d138729ff13c97dbfef895954f90 Mon Sep 17 00:00:00 2001 From: qwertyadrian <20292956+qwertyadrian@users.noreply.github.com> Date: Mon, 17 Feb 2020 16:55:57 +0300 Subject: [PATCH 02/16] =?UTF-8?q?=D0=94=D0=BE=D0=B1=D0=B0=D0=B2=D0=BB?= =?UTF-8?q?=D0=B5=D0=BD=D0=B0=20=D0=BE=D0=B1=D1=80=D0=B0=D0=B1=D0=BE=D1=82?= =?UTF-8?q?=D0=BA=D0=B0=20=D1=81=D0=B8=D1=82=D1=83=D0=B0=D1=86=D0=B8=D0=B8?= =?UTF-8?q?,=20=D0=BA=D0=BE=D0=B3=D0=B4=D0=B0=20data=5Faudio[13]=20=D0=B8?= =?UTF-8?q?=D0=BC=D0=B5=D0=B5=D1=82=206=20=D1=8D=D0=BB=D0=B5=D0=BC=D0=B5?= =?UTF-8?q?=D0=BD=D1=82=D0=BE=D0=B2,=20=D0=B2=D0=BC=D0=B5=D1=81=D1=82?= =?UTF-8?q?=D0=BE=203?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- vk_api/audio.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/vk_api/audio.py b/vk_api/audio.py index c8739aaf..eb4b6c6e 100644 --- a/vk_api/audio.py +++ b/vk_api/audio.py @@ -322,7 +322,11 @@ def scrap_data(html, user_id, filter_root_el=None, convert_m3u8_links=True, http continue data_audio = json.loads(audio['data-audio']) - data_audio[13] = re.sub('(/+)', '/', data_audio[13].strip('/')).split('/')[-2:] + data_audio[13] = re.sub('(/+)', '/', data_audio[13].strip('/')).split('/') + if len(data_audio[13]) == 6: + data_audio[13] = [data_audio[13][2], data_audio[13][4]] + else: + data_audio[13] = data_audio[13][-2:] full_id = ( str(data_audio[1]), str(data_audio[0]), data_audio[13][0], data_audio[13][1] From 7b9bbe45bf08213ce218bb8c8170f0181ecc26e2 Mon Sep 17 00:00:00 2001 From: qwertyadrian <20292956+qwertyadrian@users.noreply.github.com> Date: Mon, 17 Feb 2020 23:10:16 +0300 Subject: [PATCH 03/16] =?UTF-8?q?=D0=92=20=D0=B2=D1=8B=D0=B2=D0=BE=D0=B7?= =?UTF-8?q?=D1=8B=20=D1=84=D1=83=D0=BD=D0=BA=D1=86=D0=B8=D0=B9=20scrap=5Fd?= =?UTF-8?q?ata=20=D0=B8=D0=B7=20=D0=BA=D0=BB=D0=B0=D1=81=D1=81=D0=B0=20VkA?= =?UTF-8?q?udio=20=D0=BF=D0=B5=D1=80=D0=B5=D0=B4=D0=B0=D0=B5=D1=82=D1=81?= =?UTF-8?q?=D1=8F=20=D0=B0=D1=80=D0=B3=D1=83=D0=BC=D0=B5=D0=BD=D1=82=20VkA?= =?UTF-8?q?udio.convert=5Fm3u8=5Flinks=20=D0=92=D0=BE=D1=81=D1=81=D1=82?= =?UTF-8?q?=D0=B0=D0=BD=D0=BE=D0=B2=D0=BB=D0=B5=D0=BD=D0=B0=20=D1=80=D0=B0?= =?UTF-8?q?=D0=B1=D0=BE=D1=82=D0=B0=20=D1=84=D1=83=D0=BD=D0=BA=D1=86=D0=B8?= =?UTF-8?q?=D0=B8=20VkAudio.get=5Faudio=5Fby=5Fid?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- vk_api/audio.py | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/vk_api/audio.py b/vk_api/audio.py index eb4b6c6e..aa24d29c 100644 --- a/vk_api/audio.py +++ b/vk_api/audio.py @@ -123,6 +123,7 @@ def get_iter(self, owner_id=None, album_id=None, access_hash=None): response.text, self.user_id, filter_root_el={'class_': 'audioPlaylist__list'} if album_id else None, + convert_m3u8_links=self.convert_m3u8_links, http=self._vk.http ) @@ -221,6 +222,7 @@ def search_user(self, owner_id=None, q=''): response.text, self.user_id, filter_root_el={'class_': 'AudioSerp__found'}, + convert_m3u8_links=self.convert_m3u8_links, http=self._vk.http ) @@ -252,7 +254,12 @@ def search_iter(self, q, offset=0): } ) - tracks = scrap_data(response.text, self.user_id, http=self._vk.http) + tracks = scrap_data( + response.text, + self.user_id, + convert_m3u8_links=self.convert_m3u8_links, + http=self._vk.http + ) if not tracks: break @@ -272,13 +279,17 @@ def get_audio_by_id(self, owner_id, audio_id): 'https://m.vk.com/audio{}_{}'.format(owner_id, audio_id), allow_redirects=False ) - bs = BeautifulSoup(response.text, 'html.parser') - link = bs.select_one('.ai_body input[type=hidden]').attrs['value'] - decode_link = decode_audio_url(link, self.user_id) - if self.convert_m3u8_links and 'm3u8' in decode_link: - return RE_M3U8_TO_MP3.sub(r'\1/\2.mp3', decode_link) + track = scrap_data( + response.text, + self.user_id, + filter_root_el={'class': 'basisDefault'}, + convert_m3u8_links=self.convert_m3u8_links, + http=self._vk.http + ) + if track: + return track['url'] else: - return decode_link + return '' def get_post_audio(self, owner_id, post_id): """ Получить список аудиозаписей из поста пользователя или группы @@ -294,6 +305,7 @@ def get_post_audio(self, owner_id, post_id): response.text, self.user_id, filter_root_el={'class': 'audios_list'}, + convert_m3u8_links=self.convert_m3u8_links, http=self._vk.http ) From 5ca54c24820017cb27c569f7546a4bd8816e16cd Mon Sep 17 00:00:00 2001 From: qwertyadrian <20292956+qwertyadrian@users.noreply.github.com> Date: Mon, 17 Feb 2020 23:20:56 +0300 Subject: [PATCH 04/16] =?UTF-8?q?=D0=92=20=D1=81=D0=BB=D0=BE=D0=B2=D0=B0?= =?UTF-8?q?=D1=80=D1=8C=20=D1=81=20=D0=B8=D0=BD=D1=84=D0=BE=D1=80=D0=BC?= =?UTF-8?q?=D0=B0=D1=86=D0=B8=D0=B5=D0=B9=20=D0=BE=20=D1=82=D1=80=D0=B5?= =?UTF-8?q?=D0=BA=D0=B5=20=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2=D0=BB=D0=B5=D0=BD?= =?UTF-8?q?=20=D0=BA=D0=BB=D1=8E=D1=87,=20=D1=81=D0=BE=D0=B4=D0=B5=D1=80?= =?UTF-8?q?=D0=B6=D0=B0=D1=89=D0=B8=D0=B9=20=D1=81=D0=BF=D0=B8=D1=81=D0=BE?= =?UTF-8?q?=D0=BA=20=D1=81=D1=81=D1=8B=D0=BB=D0=BE=D0=BA=20=D0=BD=D0=B0=20?= =?UTF-8?q?=D0=BE=D0=B1=D0=BB=D0=BE=D0=B6=D0=BD=D1=83=20=D1=82=D1=80=D0=B5?= =?UTF-8?q?=D0=BA=D0=B0=20(=D0=BF=D1=83=D1=81=D1=82=D1=83=D1=8E=20=D1=81?= =?UTF-8?q?=D1=82=D1=80=D0=BE=D0=BA=D1=83,=20=D0=B5=D1=81=D0=BB=D0=B8=20?= =?UTF-8?q?=D0=BE=D0=B1=D0=BB=D0=BE=D0=B6=D0=B5=D0=BA=20=D0=BD=D0=B5=D1=82?= =?UTF-8?q?).?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- vk_api/audio.py | 1 + 1 file changed, 1 insertion(+) diff --git a/vk_api/audio.py b/vk_api/audio.py index aa24d29c..71f8337f 100644 --- a/vk_api/audio.py +++ b/vk_api/audio.py @@ -374,6 +374,7 @@ def scrap_data(html, user_id, filter_root_el=None, convert_m3u8_links=True, http tracks.append({ 'id': audio[0], 'owner_id': audio[1], + 'track_covers': audio[14].split(',') if audio[14] else '', 'url': link, 'artist': artist, From 3f3a52a475b1f1a636a6bff87aa2ad699be08637 Mon Sep 17 00:00:00 2001 From: qwertyadrian <20292956+qwertyadrian@users.noreply.github.com> Date: Mon, 17 Feb 2020 23:26:58 +0300 Subject: [PATCH 05/16] Fixed TypeError --- vk_api/audio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vk_api/audio.py b/vk_api/audio.py index 71f8337f..cea19ab6 100644 --- a/vk_api/audio.py +++ b/vk_api/audio.py @@ -287,7 +287,7 @@ def get_audio_by_id(self, owner_id, audio_id): http=self._vk.http ) if track: - return track['url'] + return track[0]['url'] else: return '' From 62d25146092bb9abe1ef53d42047aba5e52df8bc Mon Sep 17 00:00:00 2001 From: qwertyadrian <20292956+qwertyadrian@users.noreply.github.com> Date: Tue, 18 Feb 2020 00:04:13 +0300 Subject: [PATCH 06/16] =?UTF-8?q?=D0=9E=D1=82=D0=BA=D0=BB=D1=8E=D1=87?= =?UTF-8?q?=D0=B5=D0=BD=D0=B0=20=D0=BE=D1=82=D0=BF=D1=80=D0=B0=D0=B2=D0=BA?= =?UTF-8?q?=D0=B0=20=D0=BF=D0=BB=D0=B5=D0=B9=D0=BB=D0=B8=D1=81=D1=82=D0=BE?= =?UTF-8?q?=D0=B2=20=D0=B8=D0=B7=20=D0=BF=D0=BE=D1=81=D1=82=D0=B0=20=D0=B8?= =?UTF-8?q?=D0=B7-=D0=B7=D0=B0=20=D0=BD=D0=B5=D0=BD=D0=B0=D0=B4=D0=BE?= =?UTF-8?q?=D0=B1=D0=BD=D0=BE=D1=81=D1=82=D0=B8.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- vk_api/audio.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/vk_api/audio.py b/vk_api/audio.py index cea19ab6..72b16dd6 100644 --- a/vk_api/audio.py +++ b/vk_api/audio.py @@ -329,6 +329,10 @@ def scrap_data(html, user_id, filter_root_el=None, convert_m3u8_links=True, http if root_el is None: raise ValueError('Could not find root el for audio') + playlist_snippets = soup.find_all('div', {'class': "audioPlaylistSnippet__list"}) + for playlist in playlist_snippets: + playlist.decompose() + for audio in root_el.find_all('div', {'class': 'audio_item'}): if 'audio_item_disabled' in audio['class']: continue From be2f78fd0515c8d5ccc0ab456de41a074cffbca0 Mon Sep 17 00:00:00 2001 From: qwertyadrian <20292956+qwertyadrian@users.noreply.github.com> Date: Mon, 9 Mar 2020 07:48:07 +0300 Subject: [PATCH 07/16] =?UTF-8?q?=D0=A3=D0=B2=D0=B5=D0=BB=D0=B8=D1=87?= =?UTF-8?q?=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=B7=D0=BD=D0=B0=D1=87=D0=B5=D0=BD?= =?UTF-8?q?=D0=B8=D1=8F=20TRACKS=5FPER=5FUSER=5FPAGE=20=D0=B4=D0=BE=20100,?= =?UTF-8?q?=20=D1=82=D0=B0=D0=BA=20=D0=BA=D0=B0=D0=BA=20=D0=BF=D1=80=D0=B8?= =?UTF-8?q?=20=D0=B7=D0=BD=D0=B0=D1=87=D0=B5=D0=BD=D0=B8=D0=B8=2050=20?= =?UTF-8?q?=D0=B4=D1=83=D0=B1=D0=BB=D0=B8=D1=80=D0=BE=D0=B2=D0=B0=D0=BB?= =?UTF-8?q?=D0=B8=D1=81=D1=8C=20=D0=B0=D1=83=D0=B4=D0=B8=D0=BE=D0=B7=D0=B0?= =?UTF-8?q?=D0=BF=D0=B8=D1=81=D0=B8,=20=D0=B5=D1=81=D0=BB=D0=B8=20=D1=83?= =?UTF-8?q?=20=D0=BF=D0=BE=D0=BB=D1=8C=D0=B7=D0=BE=D0=B2=D0=B0=D1=82=D0=B5?= =?UTF-8?q?=D0=BB=D1=8F=20=D0=B8=D1=85=20=D0=B1=D0=BE=D0=BB=D1=8C=D1=88?= =?UTF-8?q?=D0=B5=2050?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- vk_api/audio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vk_api/audio.py b/vk_api/audio.py index 72b16dd6..2e562900 100644 --- a/vk_api/audio.py +++ b/vk_api/audio.py @@ -22,7 +22,7 @@ RE_M3U8_TO_MP3 = re.compile(r'/[0-9a-f]+(/audios)?/([0-9a-f]+)/index.m3u8') RPS_DELAY = 1.5 -TRACKS_PER_USER_PAGE = 50 +TRACKS_PER_USER_PAGE = 100 TRACKS_PER_ALBUM_PAGE = 100 ALBUMS_PER_USER_PAGE = 100 From 775e9ec47b5532af4d535a12b7b6167887ebfe8c Mon Sep 17 00:00:00 2001 From: qwertyadrian <20292956+qwertyadrian@users.noreply.github.com> Date: Sun, 5 Jul 2020 17:27:47 +0400 Subject: [PATCH 08/16] =?UTF-8?q?=D0=9C=D0=B5=D1=82=D0=BE=D0=B4=D1=8B=20?= =?UTF-8?q?=D0=BA=D0=BB=D0=B0=D1=81=D1=81=D0=B0=20(=D0=BA=D1=80=D0=BE?= =?UTF-8?q?=D0=BC=D0=B5=20get=5Falbums=5Fiter,=20get=5Faudio=5Fby=5Fid=20?= =?UTF-8?q?=D0=B8=20get=5Fpost=5Faudio)=20VkAudio=20=D0=BF=D0=B5=D1=80?= =?UTF-8?q?=D0=B5=D0=BF=D0=B8=D1=81=D0=B0=D0=BD=D1=8B=20=D1=81=20=D0=B8?= =?UTF-8?q?=D1=81=D0=BF=D0=BE=D0=BB=D1=8C=D0=B7=D0=BE=D0=B2=D0=B0=D0=BD?= =?UTF-8?q?=D0=B8=D0=B5=D0=BC=20PHP=20=D0=B1=D1=8D=D0=BA=D1=8D=D0=BD=D0=B4?= =?UTF-8?q?=D0=B0=20-=20al=5Faudio.php=20=D0=9C=D0=B5=D1=82=D0=BE=D0=B4=20?= =?UTF-8?q?search=5Fiter=20=D0=B1=D0=BE=D0=BB=D1=8C=D1=88=D0=B5=20=D0=BD?= =?UTF-8?q?=D0=B5=20=D0=BF=D1=80=D0=B8=D0=BD=D0=B8=D0=BC=D0=B0=D0=B5=D1=82?= =?UTF-8?q?=20=D0=BF=D0=B0=D1=80=D0=B0=D0=BC=D0=B5=D1=82=D1=80=20offset,?= =?UTF-8?q?=20=D0=BF=D0=BE=D1=81=D0=BA=D0=BE=D0=BB=D1=8C=D0=BA=D1=83=20"?= =?UTF-8?q?=D0=B0=D0=BA=D1=82"=20(=D0=BC=D0=B5=D1=82=D0=BE=D0=B4)=20sectio?= =?UTF-8?q?n=20=D0=B8=D0=B7=20al=5Faudio.php=20=D0=BD=D0=B5=20=D0=BF=D0=BE?= =?UTF-8?q?=D0=B4=D0=B4=D0=B5=D1=80=D0=B6=D0=B8=D0=B2=D0=B0=D0=B5=D1=82=20?= =?UTF-8?q?=D1=81=D0=BC=D0=B5=D1=89=D0=B5=D0=BD=D0=B8=D0=B5.=20=D0=94?= =?UTF-8?q?=D0=BB=D1=8F=20=D0=BF=D0=BE=D0=BB=D1=83=D1=87=D0=B5=D0=BD=D0=B8?= =?UTF-8?q?=D1=8F=20=D0=BF=D0=BE=D0=B4=D0=B3=D1=80=D0=B7=D1=83=D0=BA=D0=B8?= =?UTF-8?q?=20=D1=80=D0=B5=D0=B7=D1=83=D0=BB=D1=8C=D1=82=D0=B0=D1=82=D0=BE?= =?UTF-8?q?=D0=B2=20=D0=BF=D0=BE=D0=B8=D1=81=D0=BA=D0=B0=20=D0=B8=D1=81?= =?UTF-8?q?=D0=BF=D0=BE=D0=BB=D1=8C=D0=B7=D1=83=D0=B5=D1=82=D1=81=D1=8F=20?= =?UTF-8?q?=D0=BC=D0=B5=D1=82=D0=BE=D0=B4=20load=5Fcatalog=5Fsection.=20?= =?UTF-8?q?=D0=9F=D0=B0=D1=80=D0=B0=D0=BC=D0=B5=D1=82=D1=80=20count=20?= =?UTF-8?q?=D0=BC=D0=B5=D1=82=D0=BE=D0=B4=D0=B0=20search=20=D0=B1=D1=8B?= =?UTF-8?q?=D0=BB=20=D1=83=D0=B2=D0=B5=D0=BB=D0=B8=D1=87=D0=B5=D0=BD=20?= =?UTF-8?q?=D1=81=2050=20=D0=B4=D0=BE=20100.=20=D0=A4=D1=83=D0=BD=D0=BA?= =?UTF-8?q?=D1=86=D0=B8=D1=8F=20scrap=5Fdata=20=D0=B1=D1=8B=D0=BB=D0=B0=20?= =?UTF-8?q?=D1=80=D0=B0=D0=B7=D0=B4=D0=B5=D0=BB=D0=B5=D0=BD=D0=B0=20=D0=BD?= =?UTF-8?q?=D0=B0=20=D1=82=D1=80=D0=B8=20=D1=84=D1=83=D0=BD=D0=BA=D1=86?= =?UTF-8?q?=D0=B8=D0=B8:=20scrap=5Fids,=20scrap=5Fids=5Ffrom=5Fhtml=20?= =?UTF-8?q?=D0=B8=20scrap=5Ftracks=20=D0=91=D1=8B=D0=BB=D0=B0=20=D0=B4?= =?UTF-8?q?=D0=BE=D0=B1=D0=B0=D0=B2=D0=BB=D0=B5=D0=BD=D0=B0=20=D0=BF=D1=80?= =?UTF-8?q?=D0=BE=D0=B2=D0=B5=D1=80=D0=BA=D0=B0=20=D0=BD=D0=B0=D0=BB=D0=B8?= =?UTF-8?q?=D1=87=D0=B8=D1=8F=20=D0=B2=D1=81=D0=B5=D1=85=20=D0=BD=D0=B5?= =?UTF-8?q?=D0=BE=D0=B1=D1=85=D0=BE=D0=B4=D0=B8=D0=BC=D1=8B=D1=85=20=D1=85?= =?UTF-8?q?=D1=8D=D1=88=D0=B5=D0=B9=20=D0=B0=D1=83=D0=B4=D0=B8=D0=BE=D0=B7?= =?UTF-8?q?=D0=B0=D0=BF=D0=B8=D1=81=D0=B5=D0=B9.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- vk_api/audio.py | 206 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 138 insertions(+), 68 deletions(-) diff --git a/vk_api/audio.py b/vk_api/audio.py index 2e562900..7f4af5c7 100644 --- a/vk_api/audio.py +++ b/vk_api/audio.py @@ -20,10 +20,12 @@ RE_ALBUM_ID = re.compile(r'act=audio_playlist(-?\d+)_(\d+)') RE_ACCESS_HASH = re.compile(r'access_hash=(\w+)') RE_M3U8_TO_MP3 = re.compile(r'/[0-9a-f]+(/audios)?/([0-9a-f]+)/index.m3u8') -RPS_DELAY = 1.5 -TRACKS_PER_USER_PAGE = 100 -TRACKS_PER_ALBUM_PAGE = 100 +RPS_DELAY_RELOAD_AUDIO = 1.5 +RPS_DELAY_LOAD_SECTION = 2.0 + +TRACKS_PER_USER_PAGE = 2000 +TRACKS_PER_ALBUM_PAGE = 2000 ALBUMS_PER_USER_PAGE = 100 @@ -96,35 +98,42 @@ def get_iter(self, owner_id=None, album_id=None, access_hash=None): owner_id = self.user_id if album_id is not None: - url = 'https://m.vk.com/audio?act=audio_playlist{}_{}&access_hash={}'.format( - owner_id, album_id, access_hash or '' - ) offset_diff = TRACKS_PER_ALBUM_PAGE else: - url = 'https://m.vk.com/audios{}'.format(owner_id) offset_diff = TRACKS_PER_USER_PAGE offset = 0 while True: - response = self._vk.http.get( - url, - params={ - 'offset': offset + response = self._vk.http.post( + 'https://m.vk.com/audio', + data={ + 'act': 'load_section', + 'owner_id': owner_id, + 'playlist_id': album_id if album_id else -1, + 'offset': offset, + 'type': 'playlist', + 'access_hash': access_hash, + 'is_loading_all': 1 }, allow_redirects=False - ) + ).json() - if not response.text: + if not response['data'][0]: raise AccessDenied( - 'You don\'t have permissions to browse user\'s audio' + 'You don\'t have permissions to browse {}\'s albums'.format( + owner_id + ) ) - tracks = scrap_data( - response.text, + ids = scrap_ids( + response['data'][0]['list'] + ) + + tracks = scrap_tracks( + ids, self.user_id, - filter_root_el={'class_': 'audioPlaylist__list'} if album_id else None, - convert_m3u8_links=self.convert_m3u8_links, - http=self._vk.http + self._vk.http, + convert_m3u8_links=self.convert_m3u8_links ) if not tracks: @@ -133,6 +142,9 @@ def get_iter(self, owner_id=None, album_id=None, access_hash=None): for i in tracks: yield i + if response['data'][0]['totalCount'] <= 2000: + break + offset += offset_diff def get(self, owner_id=None, album_id=None, access_hash=None): @@ -202,33 +214,45 @@ def search_user(self, owner_id=None, q=''): if owner_id is None: owner_id = self.user_id - response = self._vk.http.get( - 'https://m.vk.com/audio', - params={ - 'id': owner_id, + response = self._vk.http.post( + 'https://vk.com/al_audio.php', + data={ + 'al': 1, + 'act': 'section', + 'claim': 0, + 'is_layer': 0, + 'owner_id': owner_id, + 'section': 'search', 'q': q - }, - allow_redirects=False + } ) + json_response = json.loads(response.text.replace('