In [1]:
import pandas as pd
import numpy as np
import requests
import json
from rapidfuzz import fuzz

In [2]:
lnos = pd.read_csv('lnos.csv')
lnos

Unnamed: 0,country,genre,song name,artist,rating
0,antarctica,indie rock,how many people,nunatuk,6.5
1,argentina,indie rock,bichos,homogenica,7.0
2,argentina,indie rock,turista,homogenica,8.0
3,argentina,indie rock,animales,homogenica,8.0
4,argentina,indie rock,ratas,homogenica,8.5
...,...,...,...,...,...
185,uzbekistan,indie rock,Нам не нужны друзья,wladek sheen,7.0
186,venezuela,sifri rock,retroceder,los mesoneros,6.5
187,vietnam,indie rock,hay la,"tora drums, hoanghieu, drumc",8.5
188,vietnam,indie rock,nho em,do it dad,9.0


In [5]:
lnos.iloc[86]

country              japan
genre           indie rock
song name              心の実
artist       indigo la end
rating                 9.5
Name: 86, dtype: object

## Make API Request

In [8]:
import re
import time

In [9]:
def remove_punctuation(input_string):
    no_punc= re.sub(r'[^\w\s]', '', input_string)
    return re.sub(r'\s+', ' ', no_punc).strip()


In [10]:
# delete apostrophes and commas
remove_punctuation(lnos.iloc[86]['song name'])

'心の実'

In [14]:
requests.get(f'https://api.deezer.com/search?q={'+'.join(remove_punctuation(lnos.iloc[86]['song name']).split()) + '+' + '+'.join(remove_punctuation(lnos.iloc[86]['artist']).split())}&limit=5').json()

{'data': [{'id': 769863962,
   'readable': True,
   'title': 'Konomi',
   'title_short': 'Konomi',
   'title_version': '',
   'link': 'https://www.deezer.com/track/769863962',
   'duration': 242,
   'rank': 159926,
   'explicit_lyrics': False,
   'explicit_content_lyrics': 6,
   'explicit_content_cover': 2,
   'preview': 'https://cdnt-preview.dzcdn.net/api/1/1/f/e/0/0/fe0b475af439cb2b11cbe7cb21050226.mp3?hdnea=exp=1746137025~acl=/api/1/1/f/e/0/0/fe0b475af439cb2b11cbe7cb21050226.mp3*~data=user_id=0,application_id=42~hmac=9c10d14598f7d54448df104ef9e5f1f9df87e0f756173635be5a2f4a24667e34',
   'md5_image': 'b3b2067d06c3af264ff5da197679c03d',
   'artist': {'id': 4512694,
    'name': 'indigo la End',
    'link': 'https://www.deezer.com/artist/4512694',
    'picture': 'https://api.deezer.com/artist/4512694/image',
    'picture_small': 'https://cdn-images.dzcdn.net/images/artist/4acf027022fe0dfe943a16a9d8c3cf93/56x56-000000-80-0-0.jpg',
    'picture_medium': 'https://cdn-images.dzcdn.net/images

In [16]:
requests.get(f'https://api.deezer.com/search?q={'histeria llegas'}&limit=5').json()

{'data': [{'id': 1442205912,
   'readable': True,
   'title': 'Y Es Que Llegaste Tu',
   'title_short': 'Y Es Que Llegaste Tu',
   'title_version': '',
   'link': 'https://www.deezer.com/track/1442205912',
   'duration': 249,
   'rank': 274922,
   'explicit_lyrics': False,
   'explicit_content_lyrics': 0,
   'explicit_content_cover': 2,
   'preview': 'https://cdnt-preview.dzcdn.net/api/1/1/9/7/f/0/97fbef273cc1b708c48f0f7bb421c924.mp3?hdnea=exp=1746137207~acl=/api/1/1/9/7/f/0/97fbef273cc1b708c48f0f7bb421c924.mp3*~data=user_id=0,application_id=42~hmac=a745908f01b85ccd10cabfa8dfdda16719ef70198a59d9d2d754f4a1036db2f9',
   'md5_image': 'f47f16f0d0c86c5ed53133ff93775176',
   'artist': {'id': 483395,
    'name': 'Paquito Guzmán',
    'link': 'https://www.deezer.com/artist/483395',
    'picture': 'https://api.deezer.com/artist/483395/image',
    'picture_small': 'https://cdn-images.dzcdn.net/images/artist/d0aca8eb5780daa7c279a5e3061d46d0/56x56-000000-80-0-0.jpg',
    'picture_medium': 'https:/

In [22]:
minidata = lnos.copy().iloc[85:89]

In [23]:
import time
import requests
from rapidfuzz import fuzz
from unidecode import unidecode  # For Chinese or general romanization
import pykakasi  # For Japanese romanization

kks = pykakasi.kakasi()

def romanize_japanese(text):
    return ''.join([item['hepburn'] for item in kks.convert(text)])

def clean_and_join(text):
    return '+'.join(remove_punctuation(text).split())

def fuzzy_match(song_name, artist_name, candidates):
    best_score = 0
    best_item = None

    for item in candidates:
        song_match = item['title'].lower().strip()
        artist_match = item['artist']['name'].lower().strip()
        song_score = fuzz.partial_ratio(song_name, song_match)
        artist_score = fuzz.partial_ratio(artist_name, artist_match)
        total_score = (song_score + artist_score) / 2

        if total_score > best_score:
            best_score = total_score
            best_item = item

    return best_score, best_item

album_urls = []
preview_urls = []
responses = []

for i in range(len(minidata)):
    if (i + 1) % 49 == 0:
        time.sleep(5)

    row = minidata.iloc[i]
    song_name_raw = row['song name']
    artist_raw = row['artist']
    song_name_clean = clean_and_join(song_name_raw)
    artist_clean = clean_and_join(artist_raw)
    term = f"{song_name_clean}+{artist_clean}"
    
    url = f'https://api.deezer.com/search?q={term}&limit=5'
    response = requests.get(url)
    print(term)
    print(response)

    album_url = None
    preview_url = None

    if response.status_code == 200:
        response_json = response.json()
        responses.append(response_json)
        candidates = response_json.get('data', [])

        target_song = song_name_raw.lower().strip()
        target_artist = artist_raw.lower().strip()
        best_score, best_item = fuzzy_match(target_song, target_artist, candidates)

        # If the match score is too low, try romanizing
        if best_score < 80 and (any('\u3040' <= c <= '\u30ff' or '\u4e00' <= c <= '\u9fff' for c in song_name_raw + artist_raw)):
            # Use romanized version for fallback
            romanized_song = romanize_japanese(song_name_raw)
            romanized_artist = romanize_japanese(artist_raw)
            best_score, best_item = fuzzy_match(romanized_song.lower(), romanized_artist.lower(), candidates)

        if best_score > 80 and best_item:
            album_url = best_item['album']['cover_medium']
            preview_url = best_item['preview']
    else:
        responses.append('No Results')

    album_urls.append(album_url)
    preview_urls.append(preview_url)


シュレディンガーの猫+brian+the+sun
<Response [200]>
心の実+indigo+la+end
<Response [200]>
girl+no+buses
<Response [200]>
with+or+without+it+no+buses
<Response [200]>


In [18]:
# response = requests.get('https://itunes.apple.com/search?term=montreal+40+malajube&limit=1')
# print(response.json())
# print(response)

In [27]:
responses

[{'data': [{'id': 1585669192,
    'readable': True,
    'title': "Schrodinger's Cat",
    'title_short': "Schrodinger's Cat",
    'title_version': '',
    'link': 'https://www.deezer.com/track/1585669192',
    'duration': 215,
    'rank': 26586,
    'explicit_lyrics': False,
    'explicit_content_lyrics': 0,
    'explicit_content_cover': 2,
    'preview': 'https://cdnt-preview.dzcdn.net/api/1/1/5/4/6/0/546387e93c6399fbe5708d1daf612bc1.mp3?hdnea=exp=1746137694~acl=/api/1/1/5/4/6/0/546387e93c6399fbe5708d1daf612bc1.mp3*~data=user_id=0,application_id=42~hmac=f6717f804491457a2be7b04c85268bc584f04bfa07598d2da6b2ac317dca4ca7',
    'md5_image': 'e75f966094fff898704e62f8c1d242bc',
    'artist': {'id': 1432870,
     'name': 'Brian The Sun',
     'link': 'https://www.deezer.com/artist/1432870',
     'picture': 'https://api.deezer.com/artist/1432870/image',
     'picture_small': 'https://cdn-images.dzcdn.net/images/artist/9e8d318ebf335493d1d2934bb6c43fa8/56x56-000000-80-0-0.jpg',
     'picture_med

In [28]:
len(album_urls)

4

In [29]:
album_urls

[None,
 'https://cdn-images.dzcdn.net/images/cover/b3b2067d06c3af264ff5da197679c03d/250x250-000000-80-0-0.jpg',
 'https://cdn-images.dzcdn.net/images/cover/1c1760ca5ca2e9cb71b00697aa65810d/250x250-000000-80-0-0.jpg',
 'https://cdn-images.dzcdn.net/images/cover/1399c43f914bd441205a507086e119b1/250x250-000000-80-0-0.jpg']

In [30]:
preview_urls

[None,
 'https://cdnt-preview.dzcdn.net/api/1/1/f/e/0/0/fe0b475af439cb2b11cbe7cb21050226.mp3?hdnea=exp=1746137694~acl=/api/1/1/f/e/0/0/fe0b475af439cb2b11cbe7cb21050226.mp3*~data=user_id=0,application_id=42~hmac=a76d7a583e44288085ac336af6b443dac2d0d2ec994574aa5e2806834c7e871b',
 'https://cdnt-preview.dzcdn.net/api/1/1/2/2/2/0/222edff8c93e53dcae564bedb083b61d.mp3?hdnea=exp=1746137695~acl=/api/1/1/2/2/2/0/222edff8c93e53dcae564bedb083b61d.mp3*~data=user_id=0,application_id=42~hmac=b1123a007e93b9a95847cff4c45f08ce139c809468cbe8d668439499750f69c8',
 'https://cdnt-preview.dzcdn.net/api/1/1/5/d/9/0/5d97c26d1745a3e211275f00195f41d3.mp3?hdnea=exp=1746137695~acl=/api/1/1/5/d/9/0/5d97c26d1745a3e211275f00195f41d3.mp3*~data=user_id=0,application_id=42~hmac=dfc1884cb5e073ac3f720b79bb5cdd74f1f839c4db0c10a4f76347b07a3c7727']

## Add to lnos.csv

In [33]:
new_lnos = lnos.copy()
new_lnos['album_url']=pd.Series(album_urls)
new_lnos['preview_url']=pd.Series(preview_urls)

In [34]:
new_lnos.to_csv('lnos2.csv')