In [1]:
import pandas as pd
import numpy as np
import requests
import spotipy
from spotipy.oauth2 import SpotifyOAuth
import os
from datetime import datetime, timedelta
from time import sleep
from bs4 import BeautifulSoup
from typing import Tuple, List, NamedTuple
from db_management.db import SongInfo, IDSongInfo, SongsContainer, SongsDB 

In [2]:
CLIENT_ID = os.environ["SPOTIFY_CLIENT_ID"]
CLIENT_SECRET = os.environ["SPOTIFY_CLIENT_SECRET"]

sp = spotipy.Spotify(auth_manager=SpotifyOAuth(client_id=CLIENT_ID, client_secret=CLIENT_SECRET, redirect_uri='http://example.com', scope="playlist-modify-public"))

track_name = "Save Your Tears"
artist_name = "Weeknd"

a = sp.search(q=f"track: {track_name} artist: {artist_name}", type='track', limit=1)

In [3]:
ENDPOINT_CHARTS = "https://www.officialcharts.com/charts/singles-chart/%s/7501/"

def generate_dates(week_gap: int = 2, years_back: int = 50):

    today = datetime.now()
    start_date = today - timedelta(days=365*years_back)
    print(len([n for n in range(int((today-start_date).days//(week_gap*7)))]))
    
    for n in range(int((today - start_date).days//(week_gap*7))):
        yield start_date + timedelta(days=n*week_gap*7)

In [5]:
def retrieve_top_songs(date: str, container: SongsContainer) -> SongsContainer:
    try:
        response = requests.get(ENDPOINT_CHARTS % date)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, "html.parser")
        items = soup.find_all("div", {"class": "chart-item"})

        for item in items:
            try:
                song = item.find("a", {"class": "chart-name"}).find("span", {"class": None})
                artist = item.find("a", {"class": "chart-artist"}).find("span", {"class": None})
    
                container.add_song(SongInfo(artist.text, song.text))
            except:
                continue
        return container
        
    except Exception as exception:
        print(exception)

In [107]:
songs = SongsContainer()
for idx, single_date in enumerate(generate_dates(week_gap=1, years_back=60)):
    date = single_date.strftime("%Y%m%d")
    retrieve_top_songs(date, songs)
    if idx % 101 == 100:
        print(idx, date)

3128
100 19660615
201 19680522
302 19700429
403 19720405
504 19740313
605 19760218
706 19780125
807 19800102
908 19811209
1009 19831116
1110 19851023
1211 19870930
1312 19890906
1413 19910814
1514 19930721
1615 19950628
1716 19970604
1817 19990512
1918 20010418
2019 20030326
2120 20050302
2221 20070207
2322 20090114
2423 20101222
2524 20121128
2625 20141105
2726 20161012
2827 20180919
2928 20200826
3029 20220803


In [108]:
len(songs)

43220

In [109]:
#with open("songs69.csv", "w", encoding="utf-8") as file:
#    file.write(songs.get_csv())

In [6]:
songs = SongsContainer()
songs.from_csv("songs2.csv")

In [7]:
len(songs)

42927

In [8]:
def get_song_id(s: SongInfo) -> IDSongInfo:
    ext_info = sp.search(q=f"track: {s.song} artist: {s.artist}", type='track', limit=1)
    item = ext_info["tracks"]["items"][0]
    
    id = item["id"]
    date = item["album"]["release_date"]
    artist = item["artists"][0]["name"]
    song = item["name"]
    featured = int(len(item["artists"]) > 1)

    return IDSongInfo(artist, song, date, id, featured)

In [None]:
start = datetime.now()

songs_id = SongsContainer()

for idx, s in enumerate(songs.songs[:5]):
    try:
        songs_id.add_song(get_song_id(s))
        if idx % 2 == 0 and idx != 0:
            print(f"{idx}: cooldown 5s...")
            songs_id.save_to_csv("songs_spotify_ids.csv", mode="a")
            sleep(5)
    except Exception as exception:
        print(exception)
        sleep(10)
        
print("\n\n", datetime.now()-start)

50: cooldown 5s...
100: cooldown 5s...
150: cooldown 5s...
200: cooldown 5s...
250: cooldown 5s...
300: cooldown 5s...
350: cooldown 5s...
400: cooldown 5s...
450: cooldown 5s...
500: cooldown 5s...
550: cooldown 5s...
600: cooldown 5s...
650: cooldown 5s...
700: cooldown 5s...
750: cooldown 5s...
800: cooldown 5s...
850: cooldown 5s...
900: cooldown 5s...
950: cooldown 5s...
1000: cooldown 5s...
1050: cooldown 5s...
1100: cooldown 5s...
1150: cooldown 5s...
1200: cooldown 5s...
1250: cooldown 5s...
1300: cooldown 5s...
1350: cooldown 5s...
1400: cooldown 5s...
1450: cooldown 5s...
1500: cooldown 5s...
1550: cooldown 5s...
1600: cooldown 5s...
1650: cooldown 5s...
1700: cooldown 5s...
1750: cooldown 5s...
1800: cooldown 5s...
1850: cooldown 5s...
1900: cooldown 5s...
1950: cooldown 5s...
2000: cooldown 5s...
2050: cooldown 5s...
2100: cooldown 5s...
2150: cooldown 5s...
2200: cooldown 5s...
2250: cooldown 5s...
2300: cooldown 5s...
2350: cooldown 5s...
2400: cooldown 5s...
2450: cooldo

In [33]:
songs_id.save_to_csv("songs_test.csv")