In [2]:
import tqdm
import time
import multiprocessing
import requests
import pandas as pd

In [2]:
API_KEY = "660a4395f992ff67786584e238f501aa"
ARTIST_URL = "https://www.vagalume.com.br/{artist}/index.js"
HEADERS = {'User-Agent': 'Mozilla/5.0'}

---

Example
---

In [3]:
url = ARTIST_URL.format(artist="roberto-carlos")
resp = requests.get(url=url, headers=HEADERS)

In [4]:
len(resp.json()['artist']['toplyrics']['item'])

25

In [5]:
resp = requests.get(url="https://api.vagalume.com.br/search.php?art=roberto-carlos&mus=nossa-senhora-2")

In [6]:
resp.json()

{'type': 'exact',
 'art': {'id': '3ade68b5g3758eda3',
  'name': 'Roberto Carlos',
  'url': 'https://www.vagalume.com.br/roberto-carlos/'},
 'mus': [{'id': '3ade68b7g2f8d8ea3',
   'name': 'Nossa Senhora',
   'url': 'https://www.vagalume.com.br/roberto-carlos/nossa-senhora-2.html',
   'lang': 1,
   'text': 'Cubra-me com seu manto de amor\nGuarda-me na paz desse olhar\nCura-me as feridas e a dor me faz suportar\nQue as pedras do meu caminho\nMeus pés suportem pisar\nMesmo ferido de espinhos me ajude a passar\n\nSe ficaram mágoas em mim\nMãe tira do meu coração\nE aqueles que eu fiz sofrer peço perdão\nSe eu curvar meu corpo na dor\nMe alivia o peso da cruz\nInterceda por mim minha mãe junto a Jesus\n\nNossa Senhora me dê a mão\nCuida do meu coração\nDa minha vida do meu destino\n\nNossa Senhora me dê a mão\nCuida do meu coração\nDa minha vida do meu destino\nDo meu caminho\nCuida de mim\n\nSempre que o meu pranto rolar\nPonha sobre mim suas mãos\nAumenta minha fé e acalma o meu coração\nG

---

# Download Top 100 Artists

In [14]:
period = 202003
limit = 100
RANK_URL = f"https://api.vagalume.com.br/rank.php?apikey=660a4395f992ff67786584e238f501aa&type=art&period=month&periodVal={period}&limit={limit}"

In [15]:
top_100_artists = requests.get(url=RANK_URL, headers=HEADERS)

In [19]:
artists_df = pd.DataFrame.from_dict(top_100_artists.json()['art']['month']['all'])

In [20]:
artists_df["selecionado"] = 1

In [21]:
artists_df.to_csv("../data/artistas.csv", index=False, sep=";")

---

# List of top artists

In [22]:
# read updated artistas with selected ones
artists_df = pd.read_csv("../data/artistas.csv", sep=";")

In [23]:
artists_df.head()

Unnamed: 0,id,name,url,pic_small,pic_medium,uniques,views,rank,selecionado
0,3ade68b7ga71d2ea3,Dua Lipa,https://www.vagalume.com.br/dua-lipa/,https://s2.vagalume.com/dua-lipa/images/profil...,https://s2.vagalume.com/dua-lipa/images/dua-li...,110987,195605,233.6,1
1,3ade68b7g30dd1ea3,Ed Sheeran,https://www.vagalume.com.br/ed-sheeran/,https://s2.vagalume.com/ed-sheeran/images/prof...,https://s2.vagalume.com/ed-sheeran/images/ed-s...,54795,118926,115.3,1
2,3ade68b6g61f9eda3,Maroon 5,https://www.vagalume.com.br/maroon-5/,https://s2.vagalume.com/maroon-5/images/profil...,https://s2.vagalume.com/maroon-5/images/maroon...,43029,116561,90.6,1
3,3ade68b5gbfe6eda3,Eminem,https://www.vagalume.com.br/eminem/,https://s2.vagalume.com/eminem/images/profile.jpg,https://s2.vagalume.com/eminem/images/eminem.jpg,33962,57524,71.5,1
4,3ade68b7gd85f1ea3,Imagine Dragons,https://www.vagalume.com.br/imagine-dragons/,https://s2.vagalume.com/imagine-dragons/images...,https://s2.vagalume.com/imagine-dragons/images...,33379,72174,70.3,1


In [26]:
artists_df = artists_df.query("selecionado == 1")

In [27]:
artist_url_list = artists_df["url"].values

In [28]:
artist_url_list[:3]

array(['https://www.vagalume.com.br/dua-lipa/',
       'https://www.vagalume.com.br/ed-sheeran/',
       'https://www.vagalume.com.br/maroon-5/'], dtype=object)

In [29]:
def get_artist_songs(artist_url: str) -> pd.DataFrame:
    url = artist_url + "index.js"
    resp = requests.get(url=url, headers=HEADERS)
    
    songs_df = pd.DataFrame.from_dict(resp.json()['artist']['toplyrics']['item'])
    songs_df["url_artista"] = artist_url
    
    return songs_df

In [30]:
with multiprocessing.Pool(processes=8) as pool:
    results = pool.map(get_artist_songs, artist_url_list)

In [31]:
songs_df = pd.concat(results)

In [32]:
songs_df = songs_df.merge(artists_df, left_on="url_artista", right_on="url", suffixes=["_musica", "_artista"])

In [33]:
songs_df.to_csv("../data/musicas.csv", sep=";", index=False)

---

# Get Lyrics

In [34]:
MUSIC_URL = "https://api.vagalume.com.br/search.php?musid={musid}"

In [35]:
songs_df = pd.read_csv("../data/musicas.csv", sep=";")

In [36]:
songs_df.head()

Unnamed: 0,id_musica,desc,url_musica,url_artista,id_artista,name,url_artista.1,pic_small,pic_medium,uniques,views,rank,selecionado
0,3ade68b8gaccee0b3,Levitating (feat. DaBaby),/dua-lipa/levitating-feat-dababy.html,https://www.vagalume.com.br/dua-lipa/,3ade68b7ga71d2ea3,Dua Lipa,https://www.vagalume.com.br/dua-lipa/,https://s2.vagalume.com/dua-lipa/images/profil...,https://s2.vagalume.com/dua-lipa/images/dua-li...,110987,195605,233.6,1
1,3ade68b8g03c1e0b3,Don't Start Now,/dua-lipa/dont-start-now.html,https://www.vagalume.com.br/dua-lipa/,3ade68b7ga71d2ea3,Dua Lipa,https://www.vagalume.com.br/dua-lipa/,https://s2.vagalume.com/dua-lipa/images/profil...,https://s2.vagalume.com/dua-lipa/images/dua-li...,110987,195605,233.6,1
2,3ade68b8gd627e0b3,Break My Heart,/dua-lipa/break-my-heart.html,https://www.vagalume.com.br/dua-lipa/,3ade68b7ga71d2ea3,Dua Lipa,https://www.vagalume.com.br/dua-lipa/,https://s2.vagalume.com/dua-lipa/images/profil...,https://s2.vagalume.com/dua-lipa/images/dua-li...,110987,195605,233.6,1
3,3ade68b8gaa27e0b3,Levitating,/dua-lipa/levitating.html,https://www.vagalume.com.br/dua-lipa/,3ade68b7ga71d2ea3,Dua Lipa,https://www.vagalume.com.br/dua-lipa/,https://s2.vagalume.com/dua-lipa/images/profil...,https://s2.vagalume.com/dua-lipa/images/dua-li...,110987,195605,233.6,1
4,3ade68b8g534fb0b3,New Rules,/dua-lipa/new-rules.html,https://www.vagalume.com.br/dua-lipa/,3ade68b7ga71d2ea3,Dua Lipa,https://www.vagalume.com.br/dua-lipa/,https://s2.vagalume.com/dua-lipa/images/profil...,https://s2.vagalume.com/dua-lipa/images/dua-li...,110987,195605,233.6,1


In [37]:
songs_df.shape

(2396, 13)

In [38]:
musid_list = songs_df["id_musica"].values

In [39]:
musid_list[:4]

array(['3ade68b8gaccee0b3', '3ade68b8g03c1e0b3', '3ade68b8gd627e0b3',
       '3ade68b8gaa27e0b3'], dtype=object)

In [40]:
def get_lyrics(musid: str) -> pd.DataFrame:
    try:
        url = MUSIC_URL.format(musid=musid)
        resp = requests.get(url=url, headers=HEADERS)

        lyrics_df = pd.DataFrame.from_dict(resp.json()['mus'])    
        
        # sleep so we don't get our IP blocked
        time.sleep(5)
        
        return lyrics_df
    except Exception as e:
        print(e, musid)
        return pd.DataFrame()

In [41]:
lyrics_results = []

# Wont be able to use parallelism because the site is blocking our requests
for mid in tqdm.tqdm(musid_list):
    lyrics_results.append(get_lyrics(mid))

100%|██████████| 2396/2396 [3:47:31<00:00,  5.70s/it]  


In [42]:
lyrics_df = pd.concat(lyrics_results)

In [43]:
lyrics_df = lyrics_df.merge(songs_df, left_on="id", right_on="id_musica", suffixes=["_letra", "_musica"])

In [44]:
lyrics_df.to_csv("../data/letras.csv", sep=";", index=False)

In [45]:
lyrics_df.head()

Unnamed: 0,id,name_letra,url,lang,text,translate,id_musica,desc,url_musica,url_artista,id_artista,name_musica,url_artista.1,pic_small,pic_medium,uniques,views,rank,selecionado
0,3ade68b8gaccee0b3,Levitating (feat. DaBaby),https://www.vagalume.com.br/dua-lipa/levitatin...,2,"[DaBaby:]\nBillboard Baby, Dua Lipa make 'em d...","[{'id': '3ade68b8g95eee0b3', 'lang': 1, 'url':...",3ade68b8gaccee0b3,Levitating (feat. DaBaby),/dua-lipa/levitating-feat-dababy.html,https://www.vagalume.com.br/dua-lipa/,3ade68b7ga71d2ea3,Dua Lipa,https://www.vagalume.com.br/dua-lipa/,https://s2.vagalume.com/dua-lipa/images/profil...,https://s2.vagalume.com/dua-lipa/images/dua-li...,110987,195605,233.6,1
1,3ade68b8g03c1e0b3,Don't Start Now,https://www.vagalume.com.br/dua-lipa/dont-star...,2,"If you don't wanna see me\n\nDid a full 180, c...","[{'id': '3ade68b8g13c1e0b3', 'lang': 1, 'url':...",3ade68b8g03c1e0b3,Don't Start Now,/dua-lipa/dont-start-now.html,https://www.vagalume.com.br/dua-lipa/,3ade68b7ga71d2ea3,Dua Lipa,https://www.vagalume.com.br/dua-lipa/,https://s2.vagalume.com/dua-lipa/images/profil...,https://s2.vagalume.com/dua-lipa/images/dua-li...,110987,195605,233.6,1
2,3ade68b8gd627e0b3,Break My Heart,https://www.vagalume.com.br/dua-lipa/break-my-...,2,I've always been the one to say the first good...,"[{'id': '3ade68b8ge627e0b3', 'lang': 1, 'url':...",3ade68b8gd627e0b3,Break My Heart,/dua-lipa/break-my-heart.html,https://www.vagalume.com.br/dua-lipa/,3ade68b7ga71d2ea3,Dua Lipa,https://www.vagalume.com.br/dua-lipa/,https://s2.vagalume.com/dua-lipa/images/profil...,https://s2.vagalume.com/dua-lipa/images/dua-li...,110987,195605,233.6,1
3,3ade68b8gaa27e0b3,Levitating,https://www.vagalume.com.br/dua-lipa/levitatin...,2,If you wanna run away with me\nI know a galaxy...,"[{'id': '3ade68b8gfd27e0b3', 'lang': 1, 'url':...",3ade68b8gaa27e0b3,Levitating,/dua-lipa/levitating.html,https://www.vagalume.com.br/dua-lipa/,3ade68b7ga71d2ea3,Dua Lipa,https://www.vagalume.com.br/dua-lipa/,https://s2.vagalume.com/dua-lipa/images/profil...,https://s2.vagalume.com/dua-lipa/images/dua-li...,110987,195605,233.6,1
4,3ade68b8g534fb0b3,New Rules,https://www.vagalume.com.br/dua-lipa/new-rules...,2,Talkin' in my sleep at night\nMakin' myself cr...,"[{'id': '3ade68b8g634fb0b3', 'lang': 1, 'url':...",3ade68b8g534fb0b3,New Rules,/dua-lipa/new-rules.html,https://www.vagalume.com.br/dua-lipa/,3ade68b7ga71d2ea3,Dua Lipa,https://www.vagalume.com.br/dua-lipa/,https://s2.vagalume.com/dua-lipa/images/profil...,https://s2.vagalume.com/dua-lipa/images/dua-li...,110987,195605,233.6,1


---

In [4]:
lyrics_df = pd.read_csv("../data/letras.csv", sep=";")

In [8]:
final_df = lyrics_df[["name_musica", "name_letra", "text", "lang"]]
final_df.columns = ["artista", "musica", "letra", "lang"]
final_df.head()

Unnamed: 0,artista,musica,letra,lang
0,Dua Lipa,Levitating (feat. DaBaby),"[DaBaby:]\nBillboard Baby, Dua Lipa make 'em d...",2
1,Dua Lipa,Don't Start Now,"If you don't wanna see me\n\nDid a full 180, c...",2
2,Dua Lipa,Break My Heart,I've always been the one to say the first good...,2
3,Dua Lipa,Levitating,If you wanna run away with me\nI know a galaxy...,2
4,Dua Lipa,New Rules,Talkin' in my sleep at night\nMakin' myself cr...,2


In [9]:
final_df.to_csv("../data/data_raw.csv", sep=";", index=False)