In [31]:
import pandas as pd
import numpy as np
import requests
import spotipy
from spotipy.oauth2 import SpotifyOAuth
import os
from datetime import datetime, timedelta
from bs4 import BeautifulSoup
from typing import Tuple, List, NamedTuple

In [3]:
CLIENT_ID = os.environ["SPOTIFY_CLIENT_ID"]
CLIENT_SECRET = os.environ["SPOTIFY_CLIENT_SECRET"]

sp = spotipy.Spotify(auth_manager=SpotifyOAuth(client_id=CLIENT_ID, client_secret=CLIENT_SECRET, redirect_uri='http://example.com', scope="playlist-modify-public"))

track_name = "Save Your Tears"
artist_name = "Weeknd"

a = sp.search(q=f"track: {track_name} artist: {artist_name}", type='track', limit=1)

Enter the URL you were redirected to:  https://example.com/?code=AQAAO8oqzOHBgcWRzrQroTFqQ3Hb8VvLcTb73rJPe4xOZQLvuNgSdb7oqr_22tNQLCwjhVffEkV_l5KAt7hS_r0d5BAbwYHRmvWgVSgvcylDPy_F2nZS_Qu5H93JiTvsczdLJB27AMccwtVx534WeD8hwM5eTQlQCpyzeRAkmJgIY4oy72RL9G85ZfGBZg


In [97]:
ENDPOINT_CHARTS = "https://www.officialcharts.com/charts/singles-chart/%s/7501/"

def generate_dates(week_gap: int = 2, years_back: int = 50):

    today = datetime.now()
    start_date = today - timedelta(days=365*years_back)
    print(len([n for n in range(int((today-start_date).days//(week_gap*7)))]))
    
    for n in range(int((today - start_date).days//(week_gap*7))):
        yield start_date + timedelta(days=n*week_gap*7)

In [114]:
class SongInfo:

    def __init__(self, artist: str, song: str):
        self.artist = artist
        self.song = song

    def __eq__(self, other) -> bool:
        if isinstance(other, SongInfo):
            return all([self.artist == other.artist, self.song == other.song])
        return False


class IDSongInfo(SongInfo):

    def __init__(self, artist: str, song:str, release_date: str, id: str, featured: bool):
        super().__init__(artist, song)
        self.release_date = release_date
        self.id = id
        self.featured = featured

    
    def __eq__(self, other) -> bool:
        if isinstance(other, IDSongInfo):
            return self.id == other.id
        return False


    def __str__(self) -> str:
        return f"{self.artist}: '{self.song}' | {self.id}"


class SongsContainer:

    def __init__(self):
        self.songs = []


    def add_song(self, song: SongInfo) -> None:
        if song not in self.songs and isinstance(song, SongInfo):
            self.songs.append(song)

    
    def get_csv(self) -> str:
        return "\n".join([f"{song.song},{song.artist}" for song in self.songs])

    
    def from_csv(self, csv_path: str) -> None:
        data = pd.read_csv(csv_path, on_bad_lines='skip', header=None, names=["Song", "Artist"])
        for idx, song in data.iterrows():
            try:
                self.songs.append(SongInfo(song["Artist"], song["Song"]))
            except Exception as exception:
                print(idx)
                raise exception from None
        

    def __len__(self) -> int:
        return len(self.songs)

In [106]:
def retrieve_top_songs(date: str, container: SongsContainer) -> SongsContainer:
    try:
        response = requests.get(ENDPOINT_CHARTS % date)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, "html.parser")
        items = soup.find_all("div", {"class": "chart-item"})

        for item in items:
            try:
                song = item.find("a", {"class": "chart-name"}).find("span", {"class": None})
                artist = item.find("a", {"class": "chart-artist"}).find("span", {"class": None})
    
                container.add_song(SongInfo(artist.text, song.text))
            except:
                continue
        return container
        
    except Exception as exception:
        print(exception)

In [107]:
songs = SongsContainer()
for idx, single_date in enumerate(generate_dates(week_gap=1, years_back=60)):
    date = single_date.strftime("%Y%m%d")
    retrieve_top_songs(date, songs)
    if idx % 101 == 100:
        print(idx, date)

3128
100 19660615
201 19680522
302 19700429
403 19720405
504 19740313
605 19760218
706 19780125
807 19800102
908 19811209
1009 19831116
1110 19851023
1211 19870930
1312 19890906
1413 19910814
1514 19930721
1615 19950628
1716 19970604
1817 19990512
1918 20010418
2019 20030326
2120 20050302
2221 20070207
2322 20090114
2423 20101222
2524 20121128
2625 20141105
2726 20161012
2827 20180919
2928 20200826
3029 20220803


In [108]:
len(songs)

43220

In [109]:
with open("songs2.csv", "w", encoding="utf-8") as file:
    file.write(songs.get_csv())

In [111]:
songs = SongsContainer()
songs.from_csv("songs2.csv")

In [112]:
len(songs)

42927

In [118]:
def get_song_id(s: SongInfo) -> IDSongInfo:
    ext_info = sp.search(q=f"track: {s.song} artist: {s.artist}", type='track', limit=1)
    item = ext_info["tracks"]["items"][0]
    
    id = item["id"]
    date = item["album"]["release_date"]
    artist = item["artists"][0]["name"]
    song = item["name"]
    featured = len(item["artists"]) > 1

    return IDSongInfo(artist, song, date, id, featured)

In [119]:
start = datetime.now()
for s in songs.songs[:5]:
    print(s.artist, s.song)
    print(get_song_id(s))
print("\n\n", datetime.now()-start)

THE ANIMALS HOUSE OF THE RISING SUN
Italian Hitmakers: 'House of the rising sun (style the animals) - [karaoke version]' | 66G2sH4SjwYymh4YsSmiyv
THE ROLLING STONES IT'S ALL OVER NOW
The Rolling Stones: 'It's All Over Now - Mono Version' | 6fLTxkL2wgC40fBM6IIe7r
P J PROBY HOLD ME
ØGM: 'Hold Me' | 7xJ9izrPOR2qyPW7N6jIjP
ROY ORBISON IT'S OVER
Roy Orbison: 'It's Over' | 3HZMEIZY9Z6GdhPaG5bAK2
THE BACHELORS RAMONA
The Bachelors: 'Ramona' | 0v7jFrp11rRCc61st81ZYB


 0:00:00.936188
