In [1]:
# for Ipykernels

import nest_asyncio
nest_asyncio.apply()

In [2]:
import asyncio
import aiohttp
import pandas as pd

async def fetch_genres(session, url):
    try:
        async with session.get(url) as res:
            res = await res.read()
            df = pd.read_html(res)[0]
            df.drop(df.columns[[0, 1]], axis=1, inplace=True)
            df.columns = ["genre"]
            genres = df.genre.tolist()
            for i in range(len(genres)):
                genres[i] = genres[i][:-1] if genres[i][-1] in "-+" else genres[i]
            return genres
    except Exception as e:
        print(f"an error occurred: {e}")
        return []

async def get_genres(genres=None):
    def get_url(genre):
        if genre is None:
            return "https://everynoise.com/everynoise1d.cgi?vector=popularity&scope=all"
        else:
            return f"https://everynoise.com/everynoise1d.cgi?root={genre}&scope=all"

    if not isinstance(genres, list):
        genres = [genres]

    tasks = []
    async with aiohttp.ClientSession() as session:
        for genre in genres:
            tasks.append(
                asyncio.ensure_future(fetch_genres(session, get_url(genre)))
            )
        return await asyncio.gather(*tasks)

In [3]:
loop = asyncio.get_event_loop()
future = asyncio.ensure_future(get_genres())
genres = loop.run_until_complete(future)[0]
print(f"total genres: {len(genres)}")
print(f"{str(genres[:5])[1:-1]} etc.")

total genres: 5521
'pop', 'dance pop', 'rap', 'post-teen pop', 'rock' etc.


In [4]:
import numpy as np

np.savetxt("./genres", genres, fmt="%s")
np.savez_compressed("./genres", genres, fmt="%s")

In [11]:
genres = np.load("./genres.npz")
genres = genres["arr_0"]
print(genres)

['pop' 'dance pop' 'rap' ... 'yunnan traditional' 'classical string trio'
 'himene tarava']


In [12]:
# loop = asyncio.get_event_loop()
# future = asyncio.ensure_future(get_genres(genres))
# genre_ranks = loop.run_until_complete(future)

import time
from tqdm import tqdm

genres_ranks = []

marks = list(range(len(genres)))[::1]
marks.append(-1)

for i in tqdm(range(len(marks) - 1)):
    loop = asyncio.get_event_loop()
    future = asyncio.ensure_future(get_genres(genres[marks[i]:marks[i+1]]))
    temp_genre_ranks = loop.run_until_complete(future)
    genres_ranks.extend(temp_genre_ranks)
    time.sleep(10)

  0%|          | 0/5521 [00:00<?, ?it/s]

In [None]:
import numpy as np

genres_ranks = np.array(genres_ranks)
np.savetxt("./genres_ranks", genres_ranks)