From c9bc5f2228c94caa4f6185b29080711b2c02a7b2 Mon Sep 17 00:00:00 2001 From: bakatrouble Date: Sun, 24 Nov 2019 17:34:14 +0300 Subject: [PATCH 1/3] Allow converting m3u8 audio links to mp3 --- vk_api/audio.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/vk_api/audio.py b/vk_api/audio.py index 899a2e07..f96a98fe 100644 --- a/vk_api/audio.py +++ b/vk_api/audio.py @@ -17,6 +17,7 @@ RE_AUDIO_ID = re.compile(r'audio(-?\d+)_(\d+)') RE_ALBUM_ID = re.compile(r'act=audio_playlist(-?\d+)_(\d+)') RE_ACCESS_HASH = re.compile(r'access_hash=(\w+)') +RE_M3U8_TO_MP3 = re.compile(r'/[0-9a-f]+(/audios)?/([0-9a-f]+)/index.m3u8') TRACKS_PER_USER_PAGE = 50 TRACKS_PER_ALBUM_PAGE = 100 @@ -222,7 +223,7 @@ def get_audio_by_id(self, owner_id, audio_id): return decode_audio_url(link, self.user_id) -def scrap_data(html, user_id, filter_root_el=None): +def scrap_data(html, user_id, filter_root_el=None, convert_m3u8_links=True): """ Парсинг списка аудиозаписей из html страницы """ if filter_root_el is None: @@ -248,6 +249,9 @@ def scrap_data(html, user_id, filter_root_el=None): if 'audio_api_unavailable' in link: link = decode_audio_url(link, user_id) + if convert_m3u8_links and 'm3u8' in link: + link = RE_M3U8_TO_MP3.sub(r'\1/\2.mp3', link) + tracks.append({ 'id': full_id[1], 'owner_id': full_id[0], From 1b5be0456bdbc164e426e3a50f78c7c6e8fcec44 Mon Sep 17 00:00:00 2001 From: bakatrouble Date: Sun, 24 Nov 2019 18:26:43 +0300 Subject: [PATCH 2/3] Fix extraction of owner and audio ids --- vk_api/audio.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/vk_api/audio.py b/vk_api/audio.py index f96a98fe..fbd21dcc 100644 --- a/vk_api/audio.py +++ b/vk_api/audio.py @@ -14,7 +14,6 @@ from .audio_url_decoder import decode_audio_url from .exceptions import AccessDenied -RE_AUDIO_ID = re.compile(r'audio(-?\d+)_(\d+)') RE_ALBUM_ID = re.compile(r'act=audio_playlist(-?\d+)_(\d+)') RE_ACCESS_HASH = re.compile(r'access_hash=(\w+)') RE_M3U8_TO_MP3 = re.compile(r'/[0-9a-f]+(/audios)?/([0-9a-f]+)/index.m3u8') @@ -242,7 +241,7 @@ def scrap_data(html, user_id, filter_root_el=None, convert_m3u8_links=True): title = audio.select_one('.ai_title').text duration = int(audio.select_one('.ai_dur')['data-dur']) full_id = tuple( - int(i) for i in RE_AUDIO_ID.search(audio['id']).groups() + int(i) for i in audio['data-id'].split() ) link = audio.select_one('.ai_body').input['value'] From 5a6434bae5a71a3b4da58cee2e0c831502c40755 Mon Sep 17 00:00:00 2001 From: bakatrouble Date: Sun, 24 Nov 2019 18:34:44 +0300 Subject: [PATCH 3/3] Fix the fix extraction of owner and audio ids --- vk_api/audio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vk_api/audio.py b/vk_api/audio.py index fbd21dcc..6b287905 100644 --- a/vk_api/audio.py +++ b/vk_api/audio.py @@ -241,7 +241,7 @@ def scrap_data(html, user_id, filter_root_el=None, convert_m3u8_links=True): title = audio.select_one('.ai_title').text duration = int(audio.select_one('.ai_dur')['data-dur']) full_id = tuple( - int(i) for i in audio['data-id'].split() + int(i) for i in audio['data-id'].split('_') ) link = audio.select_one('.ai_body').input['value']