In [109]:
import tqdm
import time
import multiprocessing
import requests
import pandas as pd

In [5]:
API_KEY = "660a4395f992ff67786584e238f501aa"
ARTIST_URL = "https://www.vagalume.com.br/{artist}/index.js"
HEADERS = {'User-Agent': 'Mozilla/5.0'}

---

Example
---

In [41]:
url = ARTIST_URL.format(artist="roberto-carlos")
resp = requests.get(url=url, headers=HEADERS)

In [42]:
len(resp.json()['artist']['toplyrics']['item'])

25

In [13]:
resp = requests.get(url="https://api.vagalume.com.br/search.php?art=roberto-carlos&mus=nossa-senhora-2")

In [14]:
resp.json()

{'type': 'exact',
 'art': {'id': '3ade68b5g3758eda3',
  'name': 'Roberto Carlos',
  'url': 'https://www.vagalume.com.br/roberto-carlos/'},
 'mus': [{'id': '3ade68b7g2f8d8ea3',
   'name': 'Nossa Senhora',
   'url': 'https://www.vagalume.com.br/roberto-carlos/nossa-senhora-2.html',
   'lang': 1,
   'text': 'Cubra-me com seu manto de amor\nGuarda-me na paz desse olhar\nCura-me as feridas e a dor me faz suportar\nQue as pedras do meu caminho\nMeus pés suportem pisar\nMesmo ferido de espinhos me ajude a passar\n\nSe ficaram mágoas em mim\nMãe tira do meu coração\nE aqueles que eu fiz sofrer peço perdão\nSe eu curvar meu corpo na dor\nMe alivia o peso da cruz\nInterceda por mim minha mãe junto a Jesus\n\nNossa Senhora me dê a mão\nCuida do meu coração\nDa minha vida do meu destino\n\nNossa Senhora me dê a mão\nCuida do meu coração\nDa minha vida do meu destino\nDo meu caminho\nCuida de mim\n\nSempre que o meu pranto rolar\nPonha sobre mim suas mãos\nAumenta minha fé e acalma o meu coração\nG

---

# Download Top 100 Artists

In [15]:
period = 201912
limit = 100
RANK_URL = f"https://api.vagalume.com.br/rank.php?apikey=660a4395f992ff67786584e238f501aa&type=art&period=month&periodVal={period}&scope=nacional&limit={limit}"

In [18]:
top_100_artists = requests.get(url=RANK_URL, headers=HEADERS)

In [27]:
artists_df = pd.DataFrame.from_dict(top_100_artists.json()['art']['month']['nacional'])

In [28]:
artists_df["selecionado"] = 1

In [30]:
artists_df.to_csv("data/artistas.csv", index=False, sep=";")

---

# List of top artists

In [33]:
# read updated artistas with selected ones
artists_df = pd.read_csv("data/artistas.csv", sep=";")

In [34]:
artists_df.head()

Unnamed: 0,id,name,pic_medium,pic_small,rank,uniques,url,views,selecionado
0,3ade68b7gf6932ea3,Ludmilla,https://s2.vagalume.com/ludmilla/images/ludmil...,https://s2.vagalume.com/ludmilla/images/profil...,89.0,11537,https://www.vagalume.com.br/ludmilla/,15727,0
1,3ade68b6g9609eda3,Racionais Mc's,https://s2.vagalume.com/racionais-mcs/images/r...,https://s2.vagalume.com/racionais-mcs/images/p...,71.7,9302,https://www.vagalume.com.br/racionais-mcs/,14160,0
2,3ade68b7g56470ea3,Músicas Católicas,https://s2.vagalume.com/musicas-catolicas/imag...,https://s2.vagalume.com/musicas-catolicas/imag...,63.9,8279,https://www.vagalume.com.br/musicas-catolicas/,24442,0
3,3ade68b5g3758eda3,Roberto Carlos,https://s2.vagalume.com/roberto-carlos/images/...,https://s2.vagalume.com/roberto-carlos/images/...,52.0,6743,https://www.vagalume.com.br/roberto-carlos/,18632,1
4,3ade68b7g3f7e1ea3,Marília Mendonça,https://s2.vagalume.com/marilia-mendonca/image...,https://s2.vagalume.com/marilia-mendonca/image...,40.8,5284,https://www.vagalume.com.br/marilia-mendonca/,24141,1


In [35]:
artists_df = artists_df.query("selecionado == 1")

In [39]:
artist_url_list = artists_df["url"].values

In [49]:
artist_url_list[:3]

array(['https://www.vagalume.com.br/roberto-carlos/',
       'https://www.vagalume.com.br/marilia-mendonca/',
       'https://www.vagalume.com.br/zeze-di-camargo-e-luciano/'],
      dtype=object)

In [51]:
def get_artist_songs(artist_url: str) -> pd.DataFrame:
    url = artist_url + "index.js"
    resp = requests.get(url=url, headers=HEADERS)
    
    songs_df = pd.DataFrame.from_dict(resp.json()['artist']['toplyrics']['item'])
    songs_df["url_artista"] = artist_url
    
    return songs_df

In [53]:
with multiprocessing.Pool(processes=8) as pool:
    results = pool.map(get_artist_songs, artist_url_list)

In [56]:
songs_df = pd.concat(results)

In [61]:
songs_df = songs_df.merge(artists_df, left_on="url_artista", right_on="url", suffixes=["_musica", "_artista"])

In [62]:
songs_df.to_csv("data/musicas.csv", sep=";", index=False)

---

# Get Lyrics

In [73]:
MUSIC_URL = "https://api.vagalume.com.br/search.php?musid={musid}"

In [63]:
songs_df = pd.read_csv("data/musicas.csv", sep=";")

In [65]:
songs_df.head()

Unnamed: 0,desc,id_musica,url_musica,url_artista,id_artista,name,pic_medium,pic_small,rank,uniques,url_artista.1,views,selecionado
0,Nossa Senhora,3ade68b7g2f8d8ea3,/roberto-carlos/nossa-senhora-2.html,https://www.vagalume.com.br/roberto-carlos/,3ade68b5g3758eda3,Roberto Carlos,https://s2.vagalume.com/roberto-carlos/images/...,https://s2.vagalume.com/roberto-carlos/images/...,52.0,6743,https://www.vagalume.com.br/roberto-carlos/,18632,1
1,Amor Sem Limite,3ade68b7g709a4ea3,/roberto-carlos/amor-sem-limite.html,https://www.vagalume.com.br/roberto-carlos/,3ade68b5g3758eda3,Roberto Carlos,https://s2.vagalume.com/roberto-carlos/images/...,https://s2.vagalume.com/roberto-carlos/images/...,52.0,6743,https://www.vagalume.com.br/roberto-carlos/,18632,1
2,Como É Grande O Meu Amor Por Você,3ade68b7gef8a4ea3,/roberto-carlos/como-e-grande-o-meu-amor-por-v...,https://www.vagalume.com.br/roberto-carlos/,3ade68b5g3758eda3,Roberto Carlos,https://s2.vagalume.com/roberto-carlos/images/...,https://s2.vagalume.com/roberto-carlos/images/...,52.0,6743,https://www.vagalume.com.br/roberto-carlos/,18632,1
3,Amigo,3ade68b7ga19a4ea3,/roberto-carlos/amigo-letras.html,https://www.vagalume.com.br/roberto-carlos/,3ade68b5g3758eda3,Roberto Carlos,https://s2.vagalume.com/roberto-carlos/images/...,https://s2.vagalume.com/roberto-carlos/images/...,52.0,6743,https://www.vagalume.com.br/roberto-carlos/,18632,1
4,Emoções,3ade68b7gdf8a4ea3,/roberto-carlos/emocoes-letras.html,https://www.vagalume.com.br/roberto-carlos/,3ade68b5g3758eda3,Roberto Carlos,https://s2.vagalume.com/roberto-carlos/images/...,https://s2.vagalume.com/roberto-carlos/images/...,52.0,6743,https://www.vagalume.com.br/roberto-carlos/,18632,1


In [83]:
musid_list = songs_df["id_musica"].values

In [84]:
musid_list[:4]

array(['3ade68b7g2f8d8ea3', '3ade68b7g709a4ea3', '3ade68b7gef8a4ea3',
       '3ade68b7ga19a4ea3'], dtype=object)

In [128]:
def get_lyrics(musid: str) -> pd.DataFrame:
    try:
        url = MUSIC_URL.format(musid=musid)
        resp = requests.get(url=url, headers=HEADERS)

        lyrics_df = pd.DataFrame.from_dict(resp.json()['mus'])    
        
        # sleep so we don't get our IP blocked
        time.sleep(5)
        
        return lyrics_df
    except Exception as e:
        print(e, musid)
        return pd.DataFrame()

In [129]:
lyrics_results = []

# Wont be able to use parallelism because the site is blocking our requests
for mid in tqdm.tqdm(musid_list):
    lyrics_results.append(get_lyrics(mid))


  0%|          | 0/1419 [00:00<?, ?it/s][A
  0%|          | 1/1419 [00:05<2:13:14,  5.64s/it][A
  0%|          | 2/1419 [00:11<2:13:04,  5.63s/it][A
  0%|          | 3/1419 [00:16<2:13:12,  5.64s/it][A
  0%|          | 4/1419 [00:22<2:13:42,  5.67s/it][A
  0%|          | 5/1419 [00:28<2:13:28,  5.66s/it][A
  0%|          | 6/1419 [00:33<2:13:09,  5.65s/it][A
  0%|          | 7/1419 [00:39<2:13:01,  5.65s/it][A
  1%|          | 8/1419 [00:45<2:12:53,  5.65s/it][A
  1%|          | 9/1419 [00:50<2:12:33,  5.64s/it][A
  1%|          | 10/1419 [00:56<2:12:41,  5.65s/it][A
  1%|          | 11/1419 [01:02<2:12:54,  5.66s/it][A
  1%|          | 12/1419 [01:07<2:12:48,  5.66s/it][A
  1%|          | 13/1419 [01:13<2:12:41,  5.66s/it][A
  1%|          | 14/1419 [01:19<2:12:37,  5.66s/it][A
  1%|          | 15/1419 [01:24<2:12:13,  5.65s/it][A
  1%|          | 16/1419 [01:30<2:12:14,  5.66s/it][A
  1%|          | 17/1419 [01:36<2:12:08,  5.66s/it][A
  1%|▏         | 18/1419 [01:

In [130]:
lyrics_df = pd.concat(lyrics_results)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  """Entry point for launching an IPython kernel.


In [131]:
lyrics_df = lyrics_df.merge(songs_df, left_on="id", right_on="id_musica", suffixes=["_letra", "_musica"])

In [132]:
lyrics_df.to_csv("data/letras.csv", sep=";", index=False)

In [133]:
lyrics_df.head()

Unnamed: 0,id,lang,name_letra,text,translate,url,desc,id_musica,url_musica,url_artista,id_artista,name_musica,pic_medium,pic_small,rank,uniques,url_artista.1,views,selecionado
0,3ade68b7g2f8d8ea3,1,Nossa Senhora,Cubra-me com seu manto de amor\nGuarda-me na p...,,https://www.vagalume.com.br/roberto-carlos/nos...,Nossa Senhora,3ade68b7g2f8d8ea3,/roberto-carlos/nossa-senhora-2.html,https://www.vagalume.com.br/roberto-carlos/,3ade68b5g3758eda3,Roberto Carlos,https://s2.vagalume.com/roberto-carlos/images/...,https://s2.vagalume.com/roberto-carlos/images/...,52.0,6743,https://www.vagalume.com.br/roberto-carlos/,18632,1
1,3ade68b7g709a4ea3,1,Amor Sem Limite,Quando a gente ama alguém de verdade\nEsse amo...,,https://www.vagalume.com.br/roberto-carlos/amo...,Amor Sem Limite,3ade68b7g709a4ea3,/roberto-carlos/amor-sem-limite.html,https://www.vagalume.com.br/roberto-carlos/,3ade68b5g3758eda3,Roberto Carlos,https://s2.vagalume.com/roberto-carlos/images/...,https://s2.vagalume.com/roberto-carlos/images/...,52.0,6743,https://www.vagalume.com.br/roberto-carlos/,18632,1
2,3ade68b7gef8a4ea3,1,Como É Grande O Meu Amor Por Você,Eu tenho tanto pra lhe falar\nMas com palavras...,,https://www.vagalume.com.br/roberto-carlos/com...,Como É Grande O Meu Amor Por Você,3ade68b7gef8a4ea3,/roberto-carlos/como-e-grande-o-meu-amor-por-v...,https://www.vagalume.com.br/roberto-carlos/,3ade68b5g3758eda3,Roberto Carlos,https://s2.vagalume.com/roberto-carlos/images/...,https://s2.vagalume.com/roberto-carlos/images/...,52.0,6743,https://www.vagalume.com.br/roberto-carlos/,18632,1
3,3ade68b7ga19a4ea3,1,Amigo,"Você meu amigo de fé, meu irmão camarada\nAmig...",,https://www.vagalume.com.br/roberto-carlos/ami...,Amigo,3ade68b7ga19a4ea3,/roberto-carlos/amigo-letras.html,https://www.vagalume.com.br/roberto-carlos/,3ade68b5g3758eda3,Roberto Carlos,https://s2.vagalume.com/roberto-carlos/images/...,https://s2.vagalume.com/roberto-carlos/images/...,52.0,6743,https://www.vagalume.com.br/roberto-carlos/,18632,1
4,3ade68b7gdf8a4ea3,1,Emoções,Quando eu estou aqui\nEu vivo esse momento lin...,,https://www.vagalume.com.br/roberto-carlos/emo...,Emoções,3ade68b7gdf8a4ea3,/roberto-carlos/emocoes-letras.html,https://www.vagalume.com.br/roberto-carlos/,3ade68b5g3758eda3,Roberto Carlos,https://s2.vagalume.com/roberto-carlos/images/...,https://s2.vagalume.com/roberto-carlos/images/...,52.0,6743,https://www.vagalume.com.br/roberto-carlos/,18632,1


In [134]:
final_df = lyrics_df.query("lang == 1")[["name_musica", "name_letra", "text", "selecionado"]]
final_df.columns = ["artista", "musica", "letra", "selecionado"]
final_df.head()

Unnamed: 0,artista,musica,letra,selecionado
0,Roberto Carlos,Nossa Senhora,Cubra-me com seu manto de amor\nGuarda-me na p...,1
1,Roberto Carlos,Amor Sem Limite,Quando a gente ama alguém de verdade\nEsse amo...,1
2,Roberto Carlos,Como É Grande O Meu Amor Por Você,Eu tenho tanto pra lhe falar\nMas com palavras...,1
3,Roberto Carlos,Amigo,"Você meu amigo de fé, meu irmão camarada\nAmig...",1
4,Roberto Carlos,Emoções,Quando eu estou aqui\nEu vivo esse momento lin...,1


In [135]:
final_df.to_csv("data/data_raw.csv", sep=";")