# Récupération des informations des musiques

## Documentation
La documentation de l'API Genius se trouve [ici](https://docs.genius.com/)

## Paquets utilisés

In [8]:
import json
import requests
import time
import re
import pandas as pd
from datetime import datetime
from typing import List, Any
from rauth import OAuth2Service
from bs4 import BeautifulSoup, PageElement
from IPython.display import clear_output

## Classe pour la manipulation de l'API

In [9]:
class API:
    """
    Simplify the requests to the Genius API
    """
    genius: Any
    session: Any

    def __init__(
        self,
        client_id: str,
        client_secret: str,
        authorize_url: str,
        base_url: str,
        token: str,
    ):
        """
        Initialize the Genius API session
        """
        self.genius = OAuth2Service(
            client_id=client_id,
            client_secret=client_secret,
            authorize_url=authorize_url,
            base_url=base_url
        )

        self.session = self.genius.get_session(
            token=token
        )

    def get_lyrics(
        self,
        url: str,
        retry: bool = False,
        wait_retry: int = 30,
        wait: int = 0
    ) -> dict:
        """
        Get the lyrics from the Genius website

        Parameters:
            url (str): The lyrics page URL
            retry (bool): Retry the request if it fails (default False)
            wait_retry (int): Wait x seconds before retrying (default 30)
            wait (int): Wait x seconds before returning the result (default 0)

        Returns:
            lyrics (str or None): The lyrics of the song
        """

        htmlRes = requests.get(url).text

        html = BeautifulSoup(htmlRes, 'html.parser')

        # Find the correct lyrics div in the html file
        lyricsDiv: PageElement = html.find("div", class_="lyrics")
        if lyricsDiv == None:
            lyricsDiv = html.find("div", class_="Lyrics__Root-sc-1ynbvzw-0")

        if lyricsDiv != None:
            for br in lyricsDiv.find_all("br"):
                br.replace_with("\n")

            lyrics: str = lyricsDiv.get_text()

            lyrics = re.sub(r"\n", " ", lyrics)
            lyrics = re.sub(r"\s{2,}", " ", lyrics)
            lyrics = lyrics.strip()

            time.sleep(wait)

            return lyrics

        elif retry:
            # Retry if it failed
            print(f"Cannot scrap lyrics... waiting {wait_retry} secondes")
            time.sleep(wait_retry)
            return self.get_lyrics(url, retry, wait_retry, wait)

        return None

    def get_song(
        self,
        id: int,
        with_lyrics: bool = False,
        retry: bool = False,
        wait_retry: int = 30,
        wait: int = 0
    ) -> dict:
        """
        Get a song from the Genius API and parse it into objects (selecting only interesting fields)

        Parameters:
            id (int): The song's ID
            with_lyrics (bool): Get the song with it's lyrics (default False)
            retry (bool): Retry the request if the getting lyrics method failed (default False)
            wait_retry (int): Wait x seconds if the getting lyrics method failed (default 30)
            wait (int): Wait x seconds after the lyrics method (default 0)

        Returns:
            lyrics (dict): The song
        """

        res = self.session.get(f"songs/{id}?text_format=plain").json()
        url = res['response']['song']['url']
        lyrics = ""

        album: dict = None

        if res['response']['song']['album']:
            album = {
                "name": res['response']['song']['album']['name'],
                "id": res['response']['song']['album']['id']
            }            

        artist = {
            "name": res['response']['song']['primary_artist']['name'],
            "id": res['response']['song']['primary_artist']['id'],
            "url": res['response']['song']['primary_artist']['url'],
            "image": res['response']['song']['primary_artist']['image_url']
        }

        song = {
            "id": res['response']['song']['id'],
            "name": res['response']['song']['title'],
            "album": album,
            "artist": artist,
            "image": res['response']['song']['header_image_url'],
            "url": url,
            "original_lyrics": lyrics,
            "date": datetime.strptime(
                res['response']['song']['release_date'] or "1900-01-01", "%Y-%m-%d").isoformat()
        }

        if with_lyrics:
            lyrics = self.get_lyrics(url, retry, wait_retry, wait)

        if lyrics != None:
            song["original_lyrics"] = lyrics

        return song

    def search(self, query: str) -> List[dict]:
        """
        Search for an album, artist, song in the Genius API

        Parameters:
            query (str): The search query

        Returns:
            Results (List[dict]): The provided results
        """

        search = self.session.get(f"search?q={query}").json()
        hits = search['response']['hits']
        results: List[dict] = []

        for hit in hits:
            hitResult = hit['result']

            artist = {
                "name": hitResult['primary_artist']['name'],
                "id": hitResult['primary_artist']['id'],
                "image": hitResult['primary_artist']['image_url'],
                "url": hitResult['primary_artist']['url']
            }

            search = {
                "title": hitResult['title'],
                "url": hitResult['url'],
                "artist": artist,
                "id": hitResult['id']
            }

            results.append(search)

        return results

    def print_json(content: str):
        print(json.dumps(content, indent=2, sort_keys=True))

# Authentification à l'API

In [10]:
# Pour la démonstration on a volontairement laissé les tokens d'accès en clair
api = API(
    client_id="xAP0jvOkLrC3eAjwE4iCeY5BdSrgH7qKUQyh8907-2fGiAGYEHJMNhtFglSLznAq",
    client_secret="WIVq7t1Jq5uaN0OkYCPzhVMr4mt_d-ufoq5fSC6qmyUaxodx5kZ4bS56J87C-LXGRqeeXp9nFpjgrgPtZ_8niA",
    authorize_url="https://api.genius.com/oauth/authorize",
    base_url="https://api.genius.com/",
    token="qfhBonIalyiGK0DcsHmg3-heXf485c1dSV-gOM3ZU4Wn3eD-6-pKjESnhYg4kJ1y"
)

# Récupération d'une musique avec l'API Genius

In [11]:
song = api.get_song(1)
API.print_json(song)

{
  "album": {
    "id": 360842,
    "name": "Purple Haze (Advance)"
  },
  "artist": {
    "id": 1,
    "image": "https://images.genius.com/923801a14122014b9991a84a820f061c.420x420x1.png",
    "name": "Cam\u2019ron",
    "url": "https://genius.com/artists/Camron"
  },
  "date": "2004-12-07T00:00:00",
  "id": 1,
  "image": "https://images.rapgenius.com/bf520db552b4637bd9a8bbd072d9d290.316x314x1.jpg",
  "name": "Killa Cam",
  "original_lyrics": "",
  "url": "https://genius.com/Camron-killa-cam-lyrics"
}


# Recherche d'une musique avec l'API Genius

In [12]:
results = api.search("damso")
API.print_json(results)

[
  {
    "artist": {
      "id": 45855,
      "image": "https://images.genius.com/57aa49f445bb856c82f4a8e6e5bffe55.1000x1000x1.jpg",
      "name": "Damso",
      "url": "https://genius.com/artists/Damso"
    },
    "id": 3756041,
    "title": "Feu de bois",
    "url": "https://genius.com/Damso-feu-de-bois-lyrics"
  },
  {
    "artist": {
      "id": 45855,
      "image": "https://images.genius.com/57aa49f445bb856c82f4a8e6e5bffe55.1000x1000x1.jpg",
      "name": "Damso",
      "url": "https://genius.com/artists/Damso"
    },
    "id": 2685480,
    "title": "Amn\u00e9sie",
    "url": "https://genius.com/Damso-amnesie-lyrics"
  },
  {
    "artist": {
      "id": 45855,
      "image": "https://images.genius.com/57aa49f445bb856c82f4a8e6e5bffe55.1000x1000x1.jpg",
      "name": "Damso",
      "url": "https://genius.com/artists/Damso"
    },
    "id": 3038927,
    "title": "\u0393. Mosa\u00efque solitaire",
    "url": "https://genius.com/Damso-mosaique-solitaire-lyrics"
  },
  {
    "artist":

## Récupération des paroles d'une musique

In [13]:
lyrics = api.get_lyrics(song["url"])
print(lyrics)

[Chorus: Opera Steve & Cam'ron] Killa Cam, Killa Cam, Cam Killa Cam, Killa Cam Killa Cam, Cam Killa Cam, Killa Cam, Cam Killa Killa Killa Cam Killa Cam, Cam, Killa (Killa!) Killa Cam, Killa Cam, Cam (Bases loaded) Killa Cam, Killa Cam (Uh-huh) Killa Cam, Cam (Santana on second, Jim on third) Killa Cam, Killa Cam, Cam (I'm at bat) Killa Killa Killa Cam Killa Cam, Cam, Killa (I'm 'bout to hit this shit out the world) Killa Cam (Ugh, Heatmakerz), Killa Cam, Cam Killa Cam, Killa Cam Killa Cam, Cam (Hahahaha) Killa Cam, Killa Cam, Cam Killa Killa Killa Cam Killa Cam, Cam, Killa (Clap) Killa Cam, Killa Cam, Cam Killa Cam, Killa Cam Killa Cam, Cam Killa Cam, Killa Cam, Cam Killa Killa Killa Cam (Killa! Killa!) Killa Cam, Cam, Killa [Verse 1] With the goons I spar, stay in tune with ma (What up?) She like, "Damn, this the realest since 'Kumbaya'" Bomaye, Killa Cam, my Lord (My Lord) Still the man with the pan, scrilla, fam, on board Now bitches, they want to neuter me, niggas, they want to tut

## Scraping des données

### Récupération de toutes les musiques d'un fichier

In [14]:
input_path: str = "./datas/songs.txt"
output_path: str = "./datas/api_songs.json"

with open(input_path, "r", encoding="utf8") as file:
    songs_inputs = file.read().splitlines()
    songs: List[str] = []

    for index, song_input in enumerate(songs_inputs, start=1):
        results = api.search(song_input)

        if len(results) > 0:
            song = api.get_song(results[0]["id"], True, True, 0, 0)
            songs.append(song)

            print(f"{index / len(songs_inputs) * 100:.2f}%     {song['artist']['name']} - {song['name']}")

        with open(output_path, "w", encoding="utf8") as output:
            output.write(
                json.dumps(
                    songs,
                    default=lambda obj: obj.__dict__,
                    ensure_ascii=False
                )
            )
            output.close()
    file.close()

    print("Output written at", output_path)

0.25%     Céline Dion - Encore un soir
0.49%     Céline Dion - Je nous veux
0.74%     Céline Dion - Les yeux au ciel
0.98%     Céline Dion - Si c’était à refaire
1.23%     Céline Dion - Ordinaire
1.47%     Céline Dion - Tu sauras
1.72%     Céline Dion - Toutes ces choses
1.97%     Céline Dion - Le bonheur en face
2.21%     Céline Dion - À la plus haute branche
2.46%     Céline Dion - À vous
2.70%     Céline Dion - Ma force
3.19%     Carla Bruni - Quelqu’un m’a dit (English Translation)
3.44%     Carla Bruni - Raphaël
3.69%     Carla Bruni - Tout le monde
3.93%     Carla Bruni - La noyée
4.18%     Carla Bruni - Le Toi Du Moi
4.42%     Carla Bruni - Le ciel dans une chambre
4.67%     Carla Bruni - J’en connais
4.91%     Carla Bruni - Le plus beau du quartier
5.16%     Carla Bruni - Chanson triste
5.41%     Carla Bruni - L’excessive
5.65%     Carla Bruni - L’amoureuse
5.90%     Carla Bruni - La dernière minute
6.14%     Clara Luciani - La grenade
6.39%     Clara Luciani - La baie
6.63%   

SSLError: HTTPSConnectionPool(host='genius.com', port=443): Max retries exceeded with url: /Claude-francois-si-douce-a-mon-souvenir-lyrics (Caused by SSLError(SSLEOFError(8, 'EOF occurred in violation of protocol (_ssl.c:1123)')))

# RENOMMEZ `./datas/api_songs.json` EN `./datas/songs.json` POUR LES PROCHAINS NOTEBOOKS