diff --git a/vk_api/audio.py b/vk_api/audio.py index 899a2e07..6b287905 100644 --- a/vk_api/audio.py +++ b/vk_api/audio.py @@ -14,9 +14,9 @@ from .audio_url_decoder import decode_audio_url from .exceptions import AccessDenied -RE_AUDIO_ID = re.compile(r'audio(-?\d+)_(\d+)') RE_ALBUM_ID = re.compile(r'act=audio_playlist(-?\d+)_(\d+)') RE_ACCESS_HASH = re.compile(r'access_hash=(\w+)') +RE_M3U8_TO_MP3 = re.compile(r'/[0-9a-f]+(/audios)?/([0-9a-f]+)/index.m3u8') TRACKS_PER_USER_PAGE = 50 TRACKS_PER_ALBUM_PAGE = 100 @@ -222,7 +222,7 @@ def get_audio_by_id(self, owner_id, audio_id): return decode_audio_url(link, self.user_id) -def scrap_data(html, user_id, filter_root_el=None): +def scrap_data(html, user_id, filter_root_el=None, convert_m3u8_links=True): """ Парсинг списка аудиозаписей из html страницы """ if filter_root_el is None: @@ -241,13 +241,16 @@ def scrap_data(html, user_id, filter_root_el=None): title = audio.select_one('.ai_title').text duration = int(audio.select_one('.ai_dur')['data-dur']) full_id = tuple( - int(i) for i in RE_AUDIO_ID.search(audio['id']).groups() + int(i) for i in audio['data-id'].split('_') ) link = audio.select_one('.ai_body').input['value'] if 'audio_api_unavailable' in link: link = decode_audio_url(link, user_id) + if convert_m3u8_links and 'm3u8' in link: + link = RE_M3U8_TO_MP3.sub(r'\1/\2.mp3', link) + tracks.append({ 'id': full_id[1], 'owner_id': full_id[0],