In [1]:
import os
import time
import pandas as pd
import requests
from bs4 import BeautifulSoup

In [2]:
BASE_DIR = os.getcwd()
DATA_DIR = os.path.join(BASE_DIR, 'Data')
TEMP_DIR = os.path.join(BASE_DIR, 'Temp')

In [26]:
def links(url):
    """Return a list of links."""
    base_url = 'https://www.beatport.com'
    r = requests.get(url)
    if r.status_code not in range(200,299):
        return False
    soup = BeautifulSoup(r.content, 'html.parser')
    links = []
    for link in soup.select('li.bucket-item'):
        link = link.find('a').get('href')
        links.append(base_url + link)
    return links

def chart(link): 
    """Extract data from a single link, return Pandas Series."""
    headers = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36"}
    try:
        r = requests.get(link, headers)
        soup = BeautifulSoup(r.content, 'html.parser')
        date = soup.find('span', class_='value').string
        chart = []
        for genre in soup.find_all('p', class_='buk-track-labels')[1:]:
            genre = genre.find('a').string
            item = genre
            chart.append(item)
        series = pd.Series(chart, dtype=str, name=date)
        return series
    except AttributeError:
        print('Invalid.')
        
def run(url):
    """Run the scraper for all links in the list and return Pandas DataFrame."""
    my_links = links(url)
    my_links.pop(-1)
    temp = [chart(link) for link in my_links]
    data = pd.concat(temp, axis=1)
    print('Done.')
    return data

In [27]:
url = 'https://www.beatport.com/artist/roger-m/94966/charts'
df = run(url)

Done.


In [28]:
df

Unnamed: 0,2021-08-16,2021-07-05,2021-06-08,2021-05-06,2021-03-28,2021-02-28,2021-01-26,2021-01-02,2020-12-17,2020-10-30,...,2020-06-12,2020-05-08,2020-03-26,2019-11-17,2019-10-11,2019-09-14,2019-08-06,2019-07-08,2019-06-22,2019-05-14
0,Perfecto Records,Black Hole Recordings,Incorrect,Armada Music,EDMA,EDMA,Perfecto House,Armada Music,D4 D4NCE,EDMA,...,PinkStar Records,SPINNIN' RECORDS,EDMA,PornoStar Records,Sirup Records,Armada Music Albums,Get Physical Music,EDMA,Wired,Catch & Release
1,Anjunadeep,Perfecto Records,STEREOHYPE,Caballero Recordings,SPINNIN' DEEP,Ministry of Sound Recordings,EDMA,Defected,Perfecto Records,AFTR:HRS,...,Armada Music,EDMA,Armada Music Albums,Repopulate Mars,ABODE Records,PornoStar Records,New State Music,Big Beat Records,Catch & Release,Wired
2,Pryda Presents,Sink or Swim,Farris Wheel Recordings,MASTERMIX RECORDS,Afterlife Records,Tiger Records,Virgin,EDMA,Defected,Musical Freedom,...,Ultra,Nativa Recordings,Kontor Records,Drumcode,Cr2 Records,PornoStar Records,Ammo Recordings,SPINNIN' DEEP,EDMA,Sirup Records
3,CircoLoco Records,Enormous Chills,Rose Avenue,Crosstown Rebels,Savage Disco,Armada Music,Juicy Traxx,Armind (Armada),RCA Records Label,Generation HEX,...,RCA Records Label,Idol Records,Tactical Records,Cr2 Records,EDMA,Infectious Music,Ninja Tune,Lost Records,Stashed,Club Sweat
4,EDMA,The Vault,Oddity Records,Lost & Found,EDMA,Perfecto House,DOORN RECORDS,Flashover Recordings,ISO LAT,Groovy Firehorse 66,...,Armada Subjekt,SPINNIN' RECORDS,HouseU,Ministry of Sound Recordings,Low:Res,Get Physical Music,Defected,Catch & Release,EDMA,Ministry of Sound Recordings
5,DAYS like NIGHTS,Anjunabeats,FSOE,Repopulate Mars,Pryda Presents,Generation HEX,Pryda Presents,EDMA,Glasgow Underground,Tactical Records,...,EDMA,Tactical Records,Tactical Records,Glasgow Underground,DFTD,Axtone Records,Club Sweat,EDMA,Sub Society,Defected
6,Hell Beach,SPINNIN' DEEP,EDMA,Black Book Records,Flashover Recordings,Anjunadeep,Big Beat Records,Anjunadeep,METAPHYSICAL,Enormous Tunes,...,Fishtone Records,A state of Trance,Whore House,Embassy One,Defected,DIRTYBIRD,Anjunadeep,Maya Records,Diper Records,Ninja Tune
7,Ultra,Dog Triumph,Another Rhythm,Pryda Presents,A state of Trance,Juicy Traxx,EDMA,Siamese,Glasgow Underground,SPINNIN' RECORDS,...,EDMA,SPINNIN' RECORDS,Darksound Recordings,Diynamic,PornoStar Records,Next-Gen-Records,SPINNIN' DEEP,Votiva Records,Renaissance Records,Big Beat Records
8,The Myth of NYX,Armada Electronic Elements,ZYX TRANCE,Hell Beach,Sink or Swim,Afterlife Records,Arcane Music,Parlophone (France),Toolroom,SPINNIN' RECORDS,...,Trance All-Stars Records,EDMA,Different,Ministry of Sound Recordings,Defected,RCA Records Label,Afterlife Records,Stashed,Big Top Amsterdam,Selador
9,Space Motion Records,Armada Music,Tactical Records,Crosstown Rebels,Timeless Moment,Anjunabeats,EDMA,FFRR,Fishtone Records,Caballero Recordings,...,,,EDMA,Capitol Records,Juicy Music,EDMA,SOLOTOKO,,EDMA,


In [29]:
df.to_csv(os.path.join(DATA_DIR, 'roger.csv'))