### Crawler

In [163]:
import pymongo

myclient = pymongo.MongoClient("mongodb://localhost:27017/")

mydb = myclient['1001']
mydb['track_docs'].delete_many({})
mydb['artist_docs'].delete_many({})
mydb['played_docs'].delete_many({})
mydb['tracklist_docs'].delete_many({})
mydb['sequential_docs'].delete_many({})
print(len(list(mydb['track_docs'].find({}))))
print(len(list(mydb['artist_docs'].find({}))))
print(len(list(mydb['played_docs'].find({}))))

0
0
0


In [None]:
def tracklist_track_data(html):
    
    '''
    Extract track related data from set
    '''
    track_docs = {}
    index = 0
    while find_str(html, 'tracknumber_value">', index) != -1:

        index = find_str(html, 'tracknumber_value">', index)
        #print(index)
        track_chunk = html[index:].split('<br>')[0]

        # Extract track number
        track_num = track_chunk[:22].split('<')[0].strip('tracknumber_value">')
        #print('Track Number:', track_num)

        # Extract track information
        chunk_index = 0
        chunk_index = find_str(track_chunk, 'meta itemprop="name" content=', chunk_index)
        extracted_value = track_chunk[chunk_index:].strip('meta itemprop="name" content=').split('>')[0].strip('"')
        clean_string = fix_decoding_errors(extracted_value)
        #print(clean_string)
        
        if len(clean_string) > 1:
            try:
                artist_list, track, remixer = parse_track_and_artist(clean_string)
            except:
                artist_list, track, remixer = None, None, None
        else:
            artist_list, track, remixer = None, None, None
            
        # Avoid ID's for now
        if artist_list is None:
            pass
        # If track info pull failed then pass
        elif (('ID' in artist_list) or ('ID' in track)):
            pass
        else:
            
            # Tends to be multiple artists so artists parsed to list even if only one
            for artist in artist_list:
                
                #print('Artist:',artist)
                #print('Track:', track)
                #print('Remixer:', remixer)

                # Extract artist page
                artist_index = 0
                artist_index = find_str(track_chunk, 'title="open artist page"', artist_index)
                if artist_index != -1:
                    try:
                        artist_url = track_chunk[artist_index:].split('class')[1].split('href="')[1].rstrip('" ')
                        artist_url = 'https://www.1001tracklists.com' + artist_url
                        #print('Aritst url:', artist_url)
                    except:
                        artist_url = 'N/A'
                else:
                    artist_url = 'N/A'

                # Extract remixer page (if exists)
                if remixer != 'N/A':
                    remixer_index = find_str(track_chunk, 'title="open remixer artist page"', artist_index)
                    if remixer_index != -1:
                        try:
                            remixer_url = track_chunk[remixer_index:].split('class')[1].split('href="')[1].rstrip('" ')
                            remixer_url = 'https://www.1001tracklists.com' + remixer_url
                            #print('Remixer url:', remixer_url)
                        except:
                            remixer_url = 'N/A'
                    else:
                        remixer_url = 'N/A'
                else:
                    remixer_url = 'N/A'

                # Extract track page
                track_index = 0
                track_index = find_str(track_chunk, 'title="open track page"', artist_index)
                if track_index != -1:
                    try:
                        track_url = track_chunk[track_index:].split('class')[1].split('href="')[1].split('"')[0]
                        track_url = 'https://www.1001tracklists.com' + track_url
                        #print('track url:', track_url)
                    except:
                        track_url = 'N/A'
                else:
                    track_url = 'N/A'

                track_doc = {\
                            'track_num': track_num,
                            'artist': artist.strip(' '),
                            'artist_url': artist_url.strip(' '),
                            'name': track.strip(' '),
                            'track_url': track_url.strip(' '),
                            'remixer': remixer.strip(' '),
                            'remixer_url': remixer_url.strip(' ')
                            }
                track_docs[track_num] = track_doc
                #print('\n\n\n')

    return track_docs

def build_artist_edges(url_doc, url):
    '''
    Build artist set-adjacency docs -- order n^2.
    Dont iterate over full set twice since will be considered non-directional
    '''
    all_tracks = {}
    count = 0
    these_tracks = list(url_doc['track_docs'].values())
    for i in range(len(these_tracks)):
        for j in range(i,len(these_tracks)):

            track = these_tracks[i]
            other_track = these_tracks[j]

            first_artist = track['artist']
            second_artist = other_track['artist']

            if first_artist != second_artist:
                _id = '_'.join([url,first_artist,second_artist])
                all_tracks[_id] = \
                                    {
                                    #'_id': _id,
                                    'artist1': first_artist,
                                    'artist2': second_artist,
                                    'url': url
                                    }
    return all_tracks

def build_track_edges(track_docs, url):
    '''
    Build track set-adjacency docs -- order n^2.
    Dont iterate over full set twice since will be considered non-directional
    '''
    edge_docs = {}
    keys = sorted(list(track_docs.keys()))
    for i in range(len(keys)):
        for j in range(i, len(keys)):
            
            key = keys[i]
            other_key = keys[j]
            
            if key != other_key:
                _id = '_'.join([url,'_'.join(key),'_'.join(other_key)])
                edge_docs[_id] = \
                                    {
                                    #'_id': _id,
                                    'track1': key,
                                    'track2': other_key,
                                    'url': url
                                    }
    return edge_docs
                
def build_sequential_track_edges(track_docs, url):
    '''
    Allows for later "next track lookup" functionality
    
    '''
    enumerated_tracks = [(track_docs[key]['track_num'], key) for key in list(track_docs.keys())]
    enumerated_tracks = sorted(enumerated_tracks, key=lambda x: x[0])
    
    seq_docs = {}
    for track_idx in range(len(enumerated_tracks)-1):
        _id = '_'.join(\
                      [\
                       url,\
                       '_'.join(enumerated_tracks[track_idx][1]),\
                       '_'.join(enumerated_tracks[track_idx+1][1])
                      ]
                    )
        seq_docs[_id] = \
                       {
                       #'_id': _id,
                       'url': url,
                       'first_track': enumerated_tracks[track_idx][1],
                       'second_track': enumerated_tracks[track_idx+1][1],
                       'first_position': str(enumerated_tracks[track_idx][0]),
                       'second_position': str(enumerated_tracks[track_idx+1][0]),   
                       }
    return seq_docs

def build_played_playedby_edge(url_doc, url):
    '''
    Allows you to map who plays who.
    I think it would be interesting to study directional graphs from this.
    
    '''
    dj_name = url_doc['dj_name']
    dj_url = url_doc['dj_url']
    
    if (dj_name == 'N/A') or (dj_url == 'N/A'):
        return {}
    
    played_docs = {}
    for track_doc in list(url_doc['track_docs'].values()):
        _id = '_'.join([url,dj_name,track_doc['name']])
        played_docs[_id] = \
                          {
                          #'_id': _id,
                          'url': url,
                          'played_by': dj_name,
                          'played_by_url': dj_url,
                          'played': track_doc['name'],
                          'played_track_url': track_doc['track_url'],
                          'played_artist': track_doc['artist'],
                          'played_artist_url': track_doc['artist_url'],
                          'played_remixer': track_doc['remixer'],
                          'played_remixer_url': track_doc['remixer_url']
                          }
    return played_docs



class Crawler:
    
    def __init__(self, max_depth=1):
        
        import time
        import pymongo
        from urllib.request import Request, urlopen
        
        myclient = pymongo.MongoClient("mongodb://localhost:27017/")
        db = myclient['1001']
        
        self.url_html_map = db['url_html_map']
        self.tracklist_collection = db['tracklist_docs']
        self.played_collection = db['played_docs']
        self.track_collection = db['track_docs']
        self.artist_collection = db['artist_docs']
        self.sequential_collection = db['sequential_docs']
        
        self.stop_search = False
        
        self.urls = []
        self.max_depth = max_depth
        self.page_hash = {}
        self.tracklist_hash = {}
        
        self.track_docs = []
        self.played_docs = []
        self.artist_docs = []
        self.tracklist_docs = []
        self.sequential_docs = []

    def find_str(self, s, char, start_index=0):

        index = 0
        s = s[start_index+1:]
        if char in s:
            c = char[0]
            for ch in s:
                if ch == c:
                    if s[index:index+len(char)] == char:
                        return start_index + 1 + index
                index += 1
        return -1

    def request(self,url):

        req = Request(url,\
                      headers={'User-Agent': 'Mozilla/5.0'})
        html = str(urlopen(req).read())
        return html
    
    def parse(self, url, html): 

        url_doc = {}
        url_doc['url'] = url
        url_doc['html'] = html
        self.url_html_map.insert_one(url_doc)
        
        '''
        
        GET RID OF THIS LATER
        
        '''
        try:
            
            url_doc.update(tracklist_meta_data(html))
            url_doc.update(tracklist_general_information(html))

            track_docs = tracklist_track_data(html)
            url_doc['track_docs'] = track_docs

            track_edges = build_track_edges(track_docs, url).values()
            sequential_edges = build_sequential_track_edges(track_docs, url).values()
            played_edges = build_played_playedby_edge(url_doc, url).values()
            artist_edges = build_artist_edges(url_doc, url).values()

            self.tracklist_collection.insert_one(url_doc)
            for doc in track_edges:
                self.track_collection.insert_one(doc)
            for doc in artist_edges:
                self.artist_collection.insert_one(doc)
            for doc in played_edges:
                self.played_collection.insert_one(doc)
            for doc in sequential_edges:
                self.sequential_collection.insert_one(doc)

            self.played_docs.extend(played_edges)
            self.track_docs.extend(track_edges)
            self.tracklist_docs.append(url_doc)
            self.artist_docs.extend(artist_edges)        
            self.sequential_docs.extend(sequential_edges)

            print('Len played docs:', len(self.played_docs))
            print('Len sequential docs:', len(self.sequential_docs))
            print('Len track docs:', len(self.track_docs))
            print('Len tracklist docs:', len(self.tracklist_docs))
            print('Len artist docs:', len(self.artist_docs))

            with open('played_docs_crawl.pkl', 'wb') as f:
                pickle.dump(self.played_docs, f)
            with open('sequential_docs_crawl.pkl', 'wb') as f:
                pickle.dump(self.sequential_docs, f)
            with open('track_docs_crawl.pkl', 'wb') as f:
                pickle.dump(self.track_docs, f)
            with open('tracklist_docs_crawl.pkl', 'wb') as f:
                pickle.dump(self.tracklist_docs, f)
            with open('artist_docs_crawl.pkl', 'wb') as f:
                pickle.dump(self.artist_docs, f)
            with open('url_html_map_crawl.pkl', 'wb') as f:
                pickle.dump(self.page_hash, f)
                
        except:
            pass
    
    def crawl(self, url, depth):
        
        if len(self.tracklist_docs) == 5000:
            print('STOPPING SEARCH')
            self.stop_search = True
        
        print('Depth:', depth)
        if (depth == self.max_depth) or (self.stop_search):
            return
        
        # Check if already reached by search
        if self.page_hash.get(url, 0) == False:
            
            # Only sleep if about to request
            time.sleep(10)
            
            # Make http request
            try:
                html = self.request(url)
            except:
                return
            
            print(url)
            
            # Cache url-html map
            self.page_hash[url] = html
            
            # If html, parse and extract necessary data 
            if ('/tracklist/' in url):
                self.parse(url, html)
                    
            index = 0
            # Iterate over links found in html
            while self.find_str(html, 'a href="', index) != -1:
                
                # Extract url
                index = self.find_str(html, 'a href="', index)
                url_chunk = html[index:].split('"')[1]
                
                # Make sure it is either a referenced tracklist or 1001 page
                if ('/tracklist/' in url_chunk) and\
                   ('http' not in url_chunk) and\
                   ('#tlp' not in url_chunk):
                
                    self.urls.append(url)
                    new_page = 'https://www.1001tracklists.com' + url_chunk
                    self.crawl(new_page, depth + 1)
                
                if ('/dj/' in url_chunk) and\
                   ('http' not in url_chunk) and\
                   ('#tlp' not in url_chunk):
                
                    self.urls.append(url)
                    new_page = 'https://www.1001tracklists.com' + url_chunk
                    self.crawl(new_page, depth + 1)
                
                if ('www.1001tracklists.com' in url_chunk) and\
                   ('#tlp' not in url_chunk) and\
                   ('.xml' not in url_chunk):
                    
                    self.urls.append(url)
                    self.crawl(url_chunk, depth + 1)
                    
        return
        
    def start_crawl(self, startUrl):
        
        depth = 0
        self.crawl(startUrl, depth)
        
crawler = Crawler(max_depth=50)
crawler.start_crawl('https://www.1001tracklists.com/source/v7m7k3/the-anjunadeep-edition/index.html')

https://www.1001tracklists.com/tracklist/yx1xjj9/makj-revolution-radio-show-289-2018-11-19.html
Len played docs: 99240
Len sequential docs: 101116
Len track docs: 1923578
Len tracklist docs: 4030
Len artist docs: 1384909
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 49
Depth: 49
Depth: 48
Depth: 48
Depth: 47
Depth: 47
Depth: 46
Depth: 46
Depth: 46
Depth: 46
https://www.1001tracklists.com/tracklist/27wbw50k/makj-revolution-radio-show-288-2018-11-12.html
Len played docs: 99259
Len sequential docs: 101134
Len track docs: 1923749
Len tracklist docs: 4031
Len artist docs: 1385080
Depth: 47
Depth: 47
Depth: 47
https://www.1001tracklists.com/tracklist/28u3ubht/makj-revolution-radio-show-287-2018-11-05.html
Len played docs: 99278
Len sequential docs: 101152
Len track docs: 1923920
Len tracklist docs: 4032
Len artist docs: 1385251
Depth: 48
Depth: 48
Depth: 48
https://www.1001tracklists.com/tracklist/1r3v8v19/makj-revolution-radio-show-286-2018-10-29.html
Len played docs: 99294
Len s

Depth: 48
Depth: 48
https://www.1001tracklists.com/tracklist/f6zhgyt/frank-nitty-kryders-kryteria-radio-168-2019-01-09.html
Len played docs: 99849
Len sequential docs: 101713
Len track docs: 1931029
Len tracklist docs: 4059
Len artist docs: 1391766
Depth: 49
https://www.1001tracklists.com/dj/franknitty/index.html
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 49
https://www.1001tracklists.com/tracklist/jyk8cnk/swedish-egil-frank-nitty-groove-radio-international-1373-2019-03-20.html
Len played docs: 99866
Len sequential docs: 101729
Len track docs: 1931165
Len tracklist docs: 4060
Len artist docs: 1391902
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 49
https://www.1001tracklists.com/tracklist/1nbv957t/richard-vission-frank-nitty-swedish-egil-powertools-mixshow-2019-03-02.html
Len played d

Depth: 49
Depth: 49
https://www.1001tracklists.com/tracklist/6rr4kd9/danny-howard-mark-knight-bbc-radio-1-dance-anthems-2013-11-16.html
Len played docs: 100291
Len sequential docs: 102154
Len track docs: 1937845
Len tracklist docs: 4078
Len artist docs: 1397617
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 49
Depth: 49
Depth: 49
https://www.1001tracklists.com/tracklist/5mzxk49/danny-howard-fatboy-slim-bbc-radio-1-dance-anthems-2013-12-07.html
Len played docs: 100343
Len sequential docs: 102207
Len track docs: 1939276
Len tracklist docs: 4079
Len artist docs: 1398843
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 49
Depth: 49
Depth: 49
https://www.1001tracklists.com/tracklist/5btrj3k/danny-howard-dimitri-vegas-and-like-mike-bbc-radio-1-dance-anthems-2013-06-29.html
Len played docs: 100394
Len sequential docs: 102261
Len track docs: 1940761
Len trac

Depth: 49
Depth: 49
Depth: 49
Depth: 49
Depth: 48
https://www.1001tracklists.com/tracklist/2nx9qyg9/reuben-keeney-crizify-outhouse-022-2018-08-11.html
Len played docs: 100851
Len sequential docs: 102702
Len track docs: 1946633
Len tracklist docs: 4101
Len artist docs: 1404996
Depth: 49
Depth: 49
Depth: 49
Depth: 49
Depth: 48
https://www.1001tracklists.com/tracklist/x14h3s9/reuben-keeney-outhouse-021-2018-08-04.html
Len played docs: 100863
Len sequential docs: 102713
Len track docs: 1946699
Len tracklist docs: 4102
Len artist docs: 1405062
Depth: 49
Depth: 49
Depth: 49
Depth: 48
https://www.1001tracklists.com/tracklist/10s9y10k/reuben-keeney-latmun-outhouse-020-2018-07-28.html
Len played docs: 100875
Len sequential docs: 102724
Len track docs: 1946765
Len tracklist docs: 4103
Len artist docs: 1405128
Depth: 49
Depth: 49
https://www.1001tracklists.com/dj/latmun/index.html
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 

Len played docs: 101256
Len sequential docs: 103091
Len track docs: 1950353
Len tracklist docs: 4128
Len artist docs: 1408115
Depth: 47
Depth: 47
https://www.1001tracklists.com/dj/manpower/index.html
Depth: 48
Depth: 48
Depth: 48
https://www.1001tracklists.com/tracklist/xyu0rm9/man-power-juan-ramos-bis-radio-show-945-2018-07-03.html
Len played docs: 101283
Len sequential docs: 103118
Len track docs: 1950731
Len tracklist docs: 4129
Len artist docs: 1408445
Depth: 49
Depth: 49
Depth: 49
Depth: 48
https://www.1001tracklists.com/tracklist/1ft9k8pk/man-power-trax-magazine-284-2018-05-09.html
Len played docs: 101294
Len sequential docs: 103128
Len track docs: 1950786
Len tracklist docs: 4130
Len artist docs: 1408500
Depth: 49
Depth: 49
Depth: 48
https://www.1001tracklists.com/tracklist/2ffswuyt/b.traits-peggy-gou-julia-govor-bbc-radio-1-b.traits-show-2017-07-01.html
Len played docs: 101331
Len sequential docs: 103164
Len track docs: 1951452
Len tracklist docs: 4131
Len artist docs: 1409027


https://www.1001tracklists.com/tracklist/bc7uz89/kink-boiler-room-moscow-2014-11-27.html
Len played docs: 101932
Len sequential docs: 103777
Len track docs: 1962616
Len tracklist docs: 4151
Len artist docs: 1418626
Depth: 49
Depth: 49
Depth: 48
https://www.1001tracklists.com/tracklist/30wuzpk/kink-bbc-radio-1-essential-mix-2014-05-31.html
Len played docs: 101957
Len sequential docs: 103801
Len track docs: 1962916
Len tracklist docs: 4152
Len artist docs: 1418906
Depth: 49
Depth: 49
https://www.1001tracklists.com/tracklist/55gpqz9/wankelmut-bbc-radio-1-essential-mix-2014-06-07.html
Len played docs: 101979
Len sequential docs: 103822
Len track docs: 1963147
Len tracklist docs: 4153
Len artist docs: 1419137
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 49
https://www.1001tracklists.com/tracklist/3yq1jr9/grum-bbc-radio-1-essential-mix-2014-05-24.html
Len played docs: 102004
Len sequential docs: 103847
Len track docs: 1963472
Len tracklist docs: 4154
Len artist docs: 1419341
Dept

Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 49
https://www.1001tracklists.com/tracklist/1h0c44t/marc-romboy-systematic-session-287-2015-05-28.html
Len played docs: 102378
Len sequential docs: 104211
Len track docs: 1967859
Len tracklist docs: 4175
Len artist docs: 1423124
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 49
https://www.1001tracklists.com/tracklist/8rgpqrk/marc-romboy-roy-rosenfeld-systematic-session-286-2015-05-23.html
Len played docs: 102390
Len sequential docs: 104222
Len track docs: 1967925
Len tracklist docs: 4176
Len artist docs: 1423183
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 49
https://www.1001tracklists.com/tracklist/1gzhq71/marc-romboy-systematic-session-285-2015-05-15.html
Len played docs: 102400
Len sequential docs: 104231
Len track docs: 1967970
Len tracklist docs: 4177
Len artist docs: 1423228
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 49
https://www.1001tracklists.com/tracklist/4r8f51t

Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 49
https://www.1001tracklists.com/tracklist/2kcu9h9/simian-mobile-disco-x-adidas-originals-boiler-room-primavera-sound-2013-barcelona-spain-2013-05-23.html
Len played docs: 102850
Len sequential docs: 104669
Len track docs: 1974175
Len tracklist docs: 4202
Len artist docs: 1428136
Depth: 50
Depth: 50
Depth: 49
Depth: 49
https://www.1001tracklists.com/tracklist/ykujw1/simian-mobile-disco-ministry-of-sound-radio-2013-01-30.html
Len played docs: 102876
Len sequential docs: 104694
Len track docs: 1974500
Len tracklist docs: 4203
Len artist docs: 1428461
Depth: 50
Depth: 50
Depth: 50
Depth: 49
https://www.1001tracklists.com/tracklist/2tv3xz9/simian-mobile-disco-bbc-radio-1-essential-mix-snar-festival-spain-2012-06-16.html
Len played docs: 102890
Len sequential docs: 104707
Len track docs: 1974591
Len tracklist docs: 4204
Len artist docs: 1428461
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 49
https://www.1001tracklists.com

Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 49
https://www.1001tracklists.com/tracklist/1gqr54dt/josh-butler-joris-voorns-spectrum-radio-036-2017-12-14.html
Len played docs: 103279
Len sequential docs: 105091
Len track docs: 1979130
Len tracklist docs: 4227
Len artist docs: 1432139
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 49
https://www.1001tracklists.com/tracklist/1zh462n9/sam-divine-josh-butler-defected-radio-show-2017-11-24.html
Len played docs: 103297
Len sequential docs: 105109
Len track docs: 1979301
Len tracklist docs: 4228
Len artist docs: 1432299
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 49
https://www.1001tracklists.com/tracklist/1vrr2p51/josh-butler-data-transmission-podcast-557-2017-09-19.html
Len played docs: 103309
Len sequential docs: 105120
Len track docs: 1979367
Len tracklist docs: 4229
Len artist docs: 1432347
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth

https://www.1001tracklists.com/tracklist/9bv4j9/tim-berg-fun-radio-party-fun-2010-12-29.html
Len played docs: 103721
Len sequential docs: 105528
Len track docs: 1984547
Len tracklist docs: 4252
Len artist docs: 1436301
Depth: 48
Depth: 48
https://www.1001tracklists.com/tracklist/7l0uyt/avicii-energy-we-are-the-network-jaarbeurs-utrecht-netherlands-2011-02-19.html
Len played docs: 103732
Len sequential docs: 105538
Len track docs: 1984602
Len tracklist docs: 4253
Len artist docs: 1436346
Depth: 49
Depth: 49
Depth: 49
Depth: 49
Depth: 49
Depth: 48
https://www.1001tracklists.com/tracklist/jtm129/avicii-dance-tunes-radio-energy-radio-episode-13-2011-02-04.html
Len played docs: 103743
Len sequential docs: 105548
Len track docs: 1984657
Len tracklist docs: 4254
Len artist docs: 1436391
Depth: 49
Depth: 49
Depth: 49
Depth: 49
Depth: 49
Depth: 48
Depth: 48
Depth: 47
https://www.1001tracklists.com/tracklist/bu5lfk/steve-smart-tim-berg-blow-ya-speakers-2010-10-26.html
Len played docs: 103755
Len

Depth: 47
Depth: 47
Depth: 47
Depth: 47
Depth: 46
https://www.1001tracklists.com/tracklist/1syc57k9/avicii-avicii-fm-002-2017-10-02.html
Len played docs: 104276
Len sequential docs: 106058
Len track docs: 1990725
Len tracklist docs: 4279
Len artist docs: 1441031
Depth: 47
Depth: 47
Depth: 47
Depth: 47
Depth: 46
https://www.1001tracklists.com/tracklist/2h9jq4t9/avicii-avicii-fm-001-2017-09-02.html
Len played docs: 104294
Len sequential docs: 106075
Len track docs: 1990878
Len tracklist docs: 4280
Len artist docs: 1441147
Depth: 47
Depth: 47
Depth: 47
Depth: 47
Depth: 46
Depth: 46
https://www.1001tracklists.com/tracklist/25zff279/avicii-le7els-062-2017-07-31.html
Len played docs: 104314
Len sequential docs: 106094
Len track docs: 1991068
Len tracklist docs: 4281
Len artist docs: 1441337
Depth: 47
Depth: 47
https://www.1001tracklists.com/tracklist/h1h9k7t/avicii-sunshine-live-mix-mission-2017-12-28.html
Depth: 47
https://www.1001tracklists.com/tracklist/25zbcwv9/avicii-le7els-061-2017-06-

https://www.1001tracklists.com/tracklist/10t1f82k/nicky-romero-charmes-protocol-radio-344-2019-03-14.html
Len played docs: 104787
Len sequential docs: 106546
Len track docs: 1995771
Len tracklist docs: 4307
Len artist docs: 1445274
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 49
https://www.1001tracklists.com/tracklist/237qm4dt/nicky-romero-protocol-radio-343-2019-03-07.html
Len played docs: 104803
Len sequential docs: 106561
Len track docs: 1995891
Len tracklist docs: 4308
Len artist docs: 1445394
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 49
https://www.1001tracklists.com/tracklist/1wr2bp91/nicky-romero-dont-let-daddy-know-ziggo-dome-amsterdam-netherlands-2019-03-02.html
Len played docs: 104814
Len sequential docs: 106571
Len track docs: 1995946
Len tracklist docs: 4309
Len artist docs: 1445449
Depth: 50
Depth: 50
Depth: 49
https://www.1001tracklists.com/tracklist/1p8nqf4k/nicky-romero-keanu-silva-protocol-radio-342-20

https://www.1001tracklists.com/tracklist/zsvl3n1/avicii-le7els-053-2016-10-23.html
Len played docs: 105213
Len sequential docs: 106950
Len track docs: 1999736
Len tracklist docs: 4332
Len artist docs: 1448792
Depth: 49
Depth: 49
Depth: 49
https://www.1001tracklists.com/tracklist/2k4b1x9k/avicii-le7els-052-2016-09-26.html
Len played docs: 105228
Len sequential docs: 106964
Len track docs: 1999841
Len tracklist docs: 4333
Len artist docs: 1448897
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 49
Depth: 49
Depth: 49
Depth: 48
Depth: 48
Depth: 48
Depth: 47
Depth: 46
Depth: 46
Depth: 46
Depth: 46
https://www.1001tracklists.com/tracklist/qj65m6k/avicii-1live-dj-session-2016-10-26.html
Len played docs: 105243
Len sequential docs: 106978
Len track docs: 1999946
Len tracklist docs: 4334
Len artist docs: 1449002
Depth: 47
Depth: 47
https://www.1001tracklists.com/tracklist/g2ps8vk/moguai-1live-dj-session-2016-11-20.html
Len played docs: 105259
Len sequentia

Depth: 49
Depth: 49
https://www.1001tracklists.com/dj/kito/index.html
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 49
Depth: 49
Depth: 49
Depth: 49
Depth: 49
Depth: 48
Depth: 48
Depth: 47
Depth: 47
Depth: 46
Depth: 46
Depth: 45
Depth: 45
Depth: 45
Depth: 44
Depth: 44
Depth: 44
Depth: 44
Depth: 43
https://www.1001tracklists.com/tracklist/1bv2v3t/annie-mac-kove-gotsome-annie-mac-radio-show-2013-11-01.html
Len played docs: 105667
Len sequential docs: 107384
Len track docs: 2005048
Len tracklist docs: 4357
Len artist docs: 1453210
Depth: 44
Depth: 44
Depth: 44
Depth: 44
https://www.1001tracklists.com/tracklist/22qcts1/annie-mac-le-youth-annie-mac-radio-show-2013-11-08.html
Len played docs: 105692
Len sequential docs: 107410
Len track docs: 2005399
Len tracklist docs: 4358
Len artist docs: 1453552
Depth: 45
Depth: 45
https://www.1001tracklists.com/dj/leyouth/index.html
Depth: 46
Depth: 46
https://www.1001tracklists.com/tracklist/2h9mb1ck/le-youth-mixmag-sunset-session-

Depth: 49
Depth: 49
https://www.1001tracklists.com/dj/lliamtaylor/index.html
Depth: 50
Depth: 50
Depth: 50
Depth: 49
Depth: 49
Depth: 48
https://www.1001tracklists.com/tracklist/ktvuw01/latroit-morten-lradio-007-2016-05-23.html
Len played docs: 106162
Len sequential docs: 107867
Len track docs: 2010249
Len tracklist docs: 4383
Len artist docs: 1457798
Depth: 49
Depth: 49
https://www.1001tracklists.com/dj/morten/index.html
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 49
Depth: 49
Depth: 48
https://www.1001tracklists.com/tracklist/235dlfl9/latroit-lradio-005-2016-04-01.html
Len played docs: 106184
Len sequential docs: 107888
Len track docs: 2010480
Len tracklist docs: 4384
Len artist docs: 1458016
Depth: 49
Depth: 49
Depth: 48
https://www.1001tracklists.com/tracklist/2gbfqllk/latroit-stooki-sound-l-

Depth: 50
Depth: 50
Depth: 50
Depth: 49
https://www.1001tracklists.com/tracklist/2pu1kdt9/heatbeat-monster-072-2017-07-25.html
Len played docs: 106670
Len sequential docs: 108367
Len track docs: 2018473
Len tracklist docs: 4406
Len artist docs: 1463730
Depth: 50
Depth: 50
Depth: 50
Depth: 49
https://www.1001tracklists.com/tracklist/1sy4wugk/heatbeat-monster-071-2017-07-18.html
Len played docs: 106679
Len sequential docs: 108375
Len track docs: 2018509
Len tracklist docs: 4407
Len artist docs: 1463766
Depth: 50
Depth: 50
Depth: 50
Depth: 49
https://www.1001tracklists.com/tracklist/15d8sgxt/heatbeat-monster-070-2017-06-21.html
Len played docs: 106690
Len sequential docs: 108385
Len track docs: 2018564
Len tracklist docs: 4408
Len artist docs: 1463821
Depth: 50
Depth: 50
Depth: 50
Depth: 49
https://www.1001tracklists.com/tracklist/2l2r6l8t/heatbeat-monster-069-2017-06-13.html
Len played docs: 106700
Len sequential docs: 108394
Len track docs: 2018609
Len tracklist docs: 4409
Len artist do

Depth: 50
Depth: 50
Depth: 50
Depth: 49
https://www.1001tracklists.com/tracklist/12nlt6mt/marlo-altitude-radio-018-2019-02-25.html
Len played docs: 107077
Len sequential docs: 108755
Len track docs: 2021912
Len tracklist docs: 4434
Len artist docs: 1466405
Depth: 50
Depth: 50
Depth: 50
Depth: 49
https://www.1001tracklists.com/tracklist/w3yy46t/marlo-main-stage-tech-energy-set-a-state-of-trance-festival-900-jaarbeurs-utrecht-netherlands-2019-02-23.html
Len played docs: 107100
Len sequential docs: 108779
Len track docs: 2022212
Len tracklist docs: 4435
Len artist docs: 1466577
Depth: 50
Depth: 50
Depth: 49
https://www.1001tracklists.com/tracklist/1lh6lhp9/marlo-altitude-radio-017-2019-02-17.html
Len played docs: 107116
Len sequential docs: 108794
Len track docs: 2022332
Len tracklist docs: 4436
Len artist docs: 1466693
Depth: 50
Depth: 50
Depth: 50
Depth: 49
https://www.1001tracklists.com/tracklist/1hptchz1/marlo-main-stage-trancemission-heartbeat-a2-arena-saint-petersburg-russia-2019-02

Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 49
https://www.1001tracklists.com/tracklist/25y6yddk/justin-oh-worldwide-stage-ultra-music-festival-miami-united-states-2016-03-18.html
Len played docs: 107579
Len sequential docs: 109350
Len track docs: 2038199
Len tracklist docs: 4463
Len artist docs: 1471000
Depth: 50
Depth: 50
Depth: 50
Depth: 49
https://www.1001tracklists.com/tracklist/781yujt/justin-oh-mainstage-ultra-music-festival-tokyo-japan-2015-09-19.html
Len played docs: 107595
Len sequential docs: 109366
Len track docs: 2038335
Len tracklist docs: 4464
Len artist docs: 1471125
Depth: 50
Depth: 50
Depth: 49
Depth: 49
https://www.1001tracklists.com/tracklist/8uhlbgt/justin-oh-mainstage-ultra-music-festival-korea-south-korea-2015-06-11.html
Len played docs: 107607
Len sequential docs: 109377
Len track docs: 2038401
Len tracklist docs: 4465
Len artist docs: 1471191
Depth: 50
Depth: 50
Depth: 49
https:/

https://www.1001tracklists.com/tracklist/85h02kk/wandw-mainstage-podcast-251-umf-miami-2015-04-03.html
Len played docs: 108100
Len sequential docs: 109863
Len track docs: 2045362
Len tracklist docs: 4486
Len artist docs: 1476182
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 49
Depth: 49
Depth: 48
Depth: 47
Depth: 47
Depth: 47
Depth: 47
Depth: 47
https://www.1001tracklists.com/tracklist/7fnump1/the-magician-worldwide-stage-ultra-music-festival-miami-united-states-2015-03-29.html
Len played docs: 108116
Len sequential docs: 109878
Len track docs: 2045482
Len tracklist docs: 4487
Len artist docs: 1476302
Depth: 48
Depth: 48
Depth: 48
Depth: 47
Depth: 46
https://www.1001tracklists.com/tracklist/8gku9d1/le-youth-thump-splash-house-sessions-2015-06-10.html
Len played docs: 108128
Len sequential docs: 109889
Len track docs: 2045548
Len tracklist docs: 4488
Len artist docs: 1476349
Depth: 47
Depth: 47
Depth: 46
https://www.1001tracklists.com/tracklist/4

https://www.1001tracklists.com/tracklist/1m8dhsk/annie-mac-steve-angello-fake-blood-disclosure-annie-mac-radio-show-2012-11-09.html
Len played docs: 108924
Len sequential docs: 110711
Len track docs: 2075455
Len tracklist docs: 4512
Len artist docs: 1496221
Depth: 47
Depth: 47
Depth: 47
https://www.1001tracklists.com/dj/fakeblood/index.html
Depth: 48
Depth: 48
Depth: 48
https://www.1001tracklists.com/tracklist/2117cq1/fake-blood-fabric-promo-mix-july-2015-2015-07-28.html
Len played docs: 108944
Len sequential docs: 110730
Len track docs: 2075645
Len tracklist docs: 4513
Len artist docs: 1496398
Depth: 49
Depth: 49
Depth: 48
https://www.1001tracklists.com/tracklist/96d61g1/fake-blood-rinse-fm-shadowchild-show-2015-06-01.html
Len played docs: 108954
Len sequential docs: 110739
Len track docs: 2075690
Len tracklist docs: 4514
Len artist docs: 1496440
Depth: 49
Depth: 49
Depth: 48
https://www.1001tracklists.com/tracklist/9003unk/fake-blood-the-selector-show-2014-11-21.html
Len played docs:

https://www.1001tracklists.com/tracklist/242pxbhk/fake-blood-bbc-radio-1-essential-mix-2009-03-14.html
Len played docs: 109610
Len sequential docs: 111407
Len track docs: 2089592
Len tracklist docs: 4537
Len artist docs: 1506317
Depth: 49
Depth: 49
https://www.1001tracklists.com/tracklist/qzhvgt/marc-romboy-essential-mix-825-2009-03-21.html
Len played docs: 109630
Len sequential docs: 111426
Len track docs: 2089782
Len tracklist docs: 4538
Len artist docs: 1506466
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 49
https://www.1001tracklists.com/tracklist/235fwywt/herve-bbc-radio-1-essential-mix-2009-02-21.html
Len played docs: 109662
Len sequential docs: 111457
Len track docs: 2090278
Len tracklist docs: 4539
Len artist docs: 1506940
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 49
Depth: 49
Depth: 47
Depth: 47
Depth: 47
https://www.1001tracklists.com/tracklist/3xyjxr1/annie-mac-duke-dumont-steve-angello-annie-mac-radio-show-2013-03-29.html
Len played docs: 109706
L

https://www.1001tracklists.com/tracklist/2gw5wnt/fred-v-and-grafix-hospital-podcast-225-2014-03-26.html
Len played docs: 110455
Len sequential docs: 112250
Len track docs: 2107662
Len tracklist docs: 4560
Len artist docs: 1517878
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 49
https://www.1001tracklists.com/tracklist/8jd5gdt/fred-v-and-grafix-best-of-2013-megamix-2013-12-04.html
Len played docs: 110471
Len sequential docs: 112265
Len track docs: 2107782
Len tracklist docs: 4561
Len artist docs: 1517963
Depth: 50
Depth: 50
Depth: 49
https://www.1001tracklists.com/tracklist/9263tc9/fred-v-and-grafix-bbc-radio-1xtra-mistajam-daily-dose-2013-10-22.html
Len played docs: 110506
Len sequential docs: 112299
Len track docs: 2108377
Len tracklist docs: 4562
Len artist docs: 1518321
Depth: 50
Depth: 50
Depth: 50
Depth: 49
https://www.1001tracklists.com/tracklist/4vyn681/pete-tong-alesso-rudimental-essential-selection-2013-08-02.html
Len played docs: 110582
Len sequential docs: 112376


Depth: 49
https://www.1001tracklists.com/dj/anden/index.html
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 49
Depth: 49
https://www.1001tracklists.com/tracklist/1twxccj9/diversion-colorcast-077-2018-07-16.html
Len played docs: 111096
Len sequential docs: 112883
Len track docs: 2119672
Len tracklist docs: 4584
Len artist docs: 1527133
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 49
Depth: 48
Depth: 48
Depth: 47
https://www.1001tracklists.com/tracklist/16btpt7k/mahalo-enhanced-sessions-447-2018-04-09.html
Len played docs: 111116
Len sequential docs: 112902
Len track docs: 2119862
Len tracklist docs: 4585
Len artist docs: 1527306
Depth: 48
Depth: 48
https://www.1001tracklists.com/tracklist/1uuh3llt/marcus-santoro-enhanced-sessions-448-2018-04-16.html
Len played docs: 111132
Len sequential docs: 112917
Len track docs: 2119982
Len

Depth: 50
Depth: 50
Depth: 50
Depth: 49
https://www.1001tracklists.com/tracklist/x0nqk8t/freejak-jaks-house-radio-011-2018-02-19.html
Len played docs: 111504
Len sequential docs: 113309
Len track docs: 2123986
Len tracklist docs: 4608
Len artist docs: 1530850
Depth: 50
Depth: 50
Depth: 50
Depth: 49
https://www.1001tracklists.com/tracklist/kuy4mxt/freejak-jaks-house-radio-episode-10-2018-01-22.html
Len played docs: 111518
Len sequential docs: 113322
Len track docs: 2124077
Len tracklist docs: 4609
Len artist docs: 1530941
Depth: 50
Depth: 50
Depth: 50
Depth: 49
https://www.1001tracklists.com/tracklist/178b3kx9/freejak-jaks-house-radio-09-2017-12-18.html
Len played docs: 111537
Len sequential docs: 113340
Len track docs: 2124248
Len tracklist docs: 4610
Len artist docs: 1531112
Depth: 50
Depth: 50
Depth: 50
Depth: 49
https://www.1001tracklists.com/tracklist/2qs56ykk/freejak-jaks-house-radio-08-2017-11-17.html
Len played docs: 111553
Len sequential docs: 113355
Len track docs: 2124368
Len

Depth: 50
Depth: 50
Depth: 50
Depth: 49
https://www.1001tracklists.com/tracklist/1tx5fm5t/electrik-cat-catcast-077-2018-09-26.html
Len played docs: 111994
Len sequential docs: 113774
Len track docs: 2128727
Len tracklist docs: 4636
Len artist docs: 1535215
Depth: 50
Depth: 50
Depth: 50
Depth: 49
https://www.1001tracklists.com/tracklist/np1sd4t/electrik-cat-jerry-ropero-catcast-076-2018-09-15.html
Len played docs: 112005
Len sequential docs: 113784
Len track docs: 2128782
Len tracklist docs: 4637
Len artist docs: 1535270
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 49
https://www.1001tracklists.com/tracklist/15fh9pu9/electrik-cat-alex-kenji-catcast-075-2018-09-08.html
Len played docs: 112027
Len sequential docs: 113805
Len track docs: 2129013
Len tracklist docs: 4638
Len artist docs: 1535488
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 49
https://www.1001tracklists.com/tracklist/27w49guk/electrik-cat-peter-brown-catcast-074

Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 49
https://www.1001tracklists.com/tracklist/1grqv9jt/piem-too-many-rules-podcast-006-2018-11-22.html
Len played docs: 112566
Len sequential docs: 114324
Len track docs: 2135050
Len tracklist docs: 4663
Len artist docs: 1540605
Depth: 50
Depth: 50
Depth: 50
Depth: 49
Depth: 49
https://www.1001tracklists.com/tracklist/zv0u0tt/piem-javi-bora-cessle-innit-trustnobody-show-021-2018-08-19.html
Len played docs: 112584
Len sequential docs: 114346
Len track docs: 2135303
Len tracklist docs: 4664
Len artist docs: 1540742
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 49
https://www.1001tracklists.com/tracklist/2m1p82vt/piem-the-next-evolution-027-2018-08-04.html
Len played docs: 112596
Len sequential docs: 114357
Len track docs: 2135369
Len tracklist docs: 4665
Len artist docs: 1540808
Depth: 50
Depth: 50
Depth: 50
Depth: 49
https://www.1001tracklists.com/tracklist/1xnqdgv9/piem-juany-bravo-trustnobody-show-020-2018-07-

Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 49
https://www.1001tracklists.com/tracklist/xyqgq61/mde-my-digital-enemy-byor-mde-radio-290-2018-05-25.html
Len played docs: 112978
Len sequential docs: 114718
Len track docs: 2138305
Len tracklist docs: 4690
Len artist docs: 1543383
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 49
https://www.1001tracklists.com/tracklist/bdufcc1/mde-my-digital-enemy-mde-radio-289-2018-05-15.html
Len played docs: 112990
Len sequential docs: 114729
Len track docs: 2138371
Len tracklist docs: 4691
Len artist docs: 1543449
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 49
https://www.1001tracklists.com/tracklist/w325vz1/mde-my-digital-enemy-mde-radio-288-2018-05-14.html
Len played docs: 113005
Len sequential docs: 114743
Len track docs: 2138476
Len tracklist docs: 4692
Len artist docs: 1543554
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 

Depth: 50
Depth: 50
Depth: 50
Depth: 49
https://www.1001tracklists.com/tracklist/1p8lkf49/adam-foster-adam-foster-radio-026-2019-02-08.html
Len played docs: 113400
Len sequential docs: 115112
Len track docs: 2141425
Len tracklist docs: 4718
Len artist docs: 1546241
Depth: 50
Depth: 50
Depth: 50
Depth: 49
https://www.1001tracklists.com/tracklist/j0gjj2k/adam-foster-adam-foster-radio-025-2019-01-27.html
Len played docs: 113418
Len sequential docs: 115129
Len track docs: 2141578
Len tracklist docs: 4719
Len artist docs: 1546385
Depth: 50
Depth: 50
Depth: 50
Depth: 49
https://www.1001tracklists.com/tracklist/2j8tfbz9/adam-foster-adam-foster-radio-024-2019-01-19.html
Len played docs: 113434
Len sequential docs: 115144
Len track docs: 2141698
Len tracklist docs: 4720
Len artist docs: 1546492
Depth: 50
Depth: 50
Depth: 50
Depth: 49
https://www.1001tracklists.com/tracklist/v5zl9pt/adam-foster-adam-foster-radio-best-of-2018-2018-12-30.html
Len played docs: 113476
Len sequential docs: 115185
Len

https://www.1001tracklists.com/tracklist/2hbhfubk/black-caviar-exquisite-house-selections-vol.-2-2018-03-06.html
Len played docs: 113974
Len sequential docs: 115657
Len track docs: 2148292
Len tracklist docs: 4747
Len artist docs: 1552636
Depth: 49
Depth: 49
Depth: 48
https://www.1001tracklists.com/tracklist/1vsn8qtt/black-caviar-exquisite-house-selections-vol.-1-2017-12-13.html
Len played docs: 114000
Len sequential docs: 115682
Len track docs: 2148617
Len tracklist docs: 4748
Len artist docs: 1552923
Depth: 49
Depth: 49
Depth: 47
Depth: 47
https://www.1001tracklists.com/tracklist/1nbwtg59/richard-vission-lavelle-dupree-gettoblaster-diplo-powertools-mixshow-mixshow-2019-03-15.html
Len played docs: 114036
Len sequential docs: 115717
Len track docs: 2149247
Len tracklist docs: 4749
Len artist docs: 1553456
Depth: 48
Depth: 48
https://www.1001tracklists.com/dj/lavelledupree/index.html
Depth: 49
Depth: 49
https://www.1001tracklists.com/tracklist/xzq6b61/freshcobar-lavelle-dupree-discotheq

Depth: 49
https://www.1001tracklists.com/dj/spektre/index.html
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 49
Depth: 49
https://www.1001tracklists.com/tracklist/1q4ygdg9/moullinex-traxsource-live-140-2017-10-05.html
Len played docs: 114390
Len sequential docs: 116055
Len track docs: 2153009
Len tracklist docs: 4769
Len artist docs: 1556711
Depth: 50
Depth: 50
Depth: 50
Depth: 50
Depth: 49
Depth: 49
Depth: 48
Depth: 46
https://www.1001tracklists.com/tracklist/2dk18xm9/doorly-traxsource-live-183-2018-08-02.html
Len played docs: 114403
Len sequential docs: 116067
Len track docs: 2153087
Len tracklist docs: 4770
Len artist docs: 1556752


### Parser

- Should probably remove features from artist values
- and break up artists into multiple entries if there is like & or vs. or something

In [156]:
import ftfy
import urllib
from urllib.request import Request, urlopen

def find_str(s, char, start_index=0):
    '''
    Find substring char in string s. Found on internet, probably not efficient.
    
    '''
    index = 0
    s = s[start_index+1:]
    if char in s:
        c = char[0]
        for ch in s:
            if ch == c:
                if s[index:index+len(char)] == char:
                    return start_index + 1 + index
            index += 1
    return -1

def extract_value(html, key_value):
    
    content_string = html.strip(key_value).split('>')[0].strip('"')
    return content_string

def fix_decoding_errors(string):
    '''
    Fix UTF-8 decoding issues. Probably need to find more systematic/thorough approach to this.
    
    REPLACE THIS WITH ftfy.fix_text() -- python package which should be one stop shop for fixes
    '''
    string = string.replace('&amp;','&')
    string = string.replace('&#39;',"'")
    string = string.replace('\\xc3\\xb6','o')
    string = string.replace('\\xc3\\xab','e')
    string = string.replace('\\xc3\\x9','u')
    string = string.replace('\\xc3\\xb8','o')
    string = string.replace("\\'","'")
    
    return ftfy.fix_text(string)

def parse_track_and_artist(track_string):
    '''
    Extract the artist, track name, and remixer (if any) from the standard formatting used by 1001.
    
    '''
    # Check if Remix/Bootleg/Edit and parse accordingly
    if ('Remix' in track_string) or ('Bootleg' in track_string) or ('Edit' in track_string):
        
        artist, track_remixer = [string.strip(' ') for string in track_string.split(' - ')]
        track_remixer = [string.strip(' ') for string in track_remixer.split('(')]
        
        if len(track_remixer) > 2:
            track = track_remixer[0]
            remixer = '('.join(track_remixer[1:])
        else:
            track, remixer = track_remixer
            remixer = remixer.rstrip('Remix)').strip(' ')
        
    # If not remix, then should follow standard layout "Artist Name - Track Name"
    # This layout is expressed explicitly in html
    else:
        
        artist, track = [string.strip(' ') for string in track_string.split(' - ')]
        remixer = 'N/A'

    # Check for multiple artists -- Big Room sets tend to have hella mashups
    # Sometimes there is more structured formatting to exploit i.e. (Artist1 vs. Artist2 - Track1 vs. Track2)
    # Not worrying about that now b/c big room sux
    if 'vs.' in artist:
        artist = artist.replace('vs.','&')
    if '&' in artist:
        artist = [a.strip(' ') for a in artist.split('&')]
    
    # Remove features
    # We could make features a separate field but for now just removing
    if isinstance(artist, str):
        if ('feat.' in artist) or ('ft.' in artist):
            artist = artist.split('feat.')[0].strip(' ')
            artist = artist.split('ft.')[0].strip(' ')
    if isinstance(artist, list):
            artist = [a.split('feat.')[0].split('ft.')[0].strip(' ') for a in artist]
        
    if isinstance(artist, list):
        return (artist, track, remixer)
    else:
        return ([artist], track, remixer)


def tracklist_meta_data(html):
    '''
    Extract meta data about tracklist/set.
    
    '''
    meta_data = {}
    
    # Extract set description
    index = 0
    start_term = 'meta name="description" content="'
    index = find_str(html, start_term, index)
    description = html[index:].split('>')[0]
    description = description.lstrip(start_term).rstrip('"')
    meta_data['description'] = description
    
    # Set creation date - This should probably be the point in time we use for building prediction data
    index = 0
    start_term = 'meta name="dcterms.created" content="'
    index = find_str(html, start_term, index)
    created = html[index:].split('>')[0]
    created = created.lstrip(start_term).rstrip('"')
    meta_data['created'] = created
    
    # Set last modified data
    index = 0
    start_term = 'meta name="dcterms.modified" content="'
    index = find_str(html, start_term, index)
    modified = html[index:].split('>')[0]
    modified = modified.lstrip(start_term).rstrip('"')
    meta_data['modified'] = modified
    
    return meta_data
    
def tracklist_general_information(html):
    '''
    Extract general info about tracklist/set.
    
    '''
    info_doc = {}
    index = 0
    start_term = 'General Information'
    index = find_str(html, start_term, index)
    info_chunk = html[index:].split('Most Liked Tracklists')[0]
    
    # Genres -- can use these to build genre-specific graphs
    style_index = 0
    style_index = find_str(info_chunk, 'Tracklist Musicstyle', style_index)
    styles = info_chunk[style_index:].split('id="tl_music_styles">')[1].split('</td>')[0]
    styles = [style.strip(' ') for style in styles.split(',')]
    info_doc['styles'] = styles
    
    # If 1001 recognizes the dj who played the set they link their dj page
    # Its my understanding dj pages are independent of artist pages -- we'll need to map these
    index = 0
    start_term = 'a href="/dj'
    index = find_str(html, start_term, index)
    if index != -1:
        dj_url = html[index:].split('class')[0].split('"')[1]
        dj_url = 'https://www.1001tracklists.com' + dj_url
        info_doc['dj_url'] = dj_url

        dj_name = html[index:].split('</a>')[0].split('>')[1]
        info_doc['dj_name'] = dj_name
    else:
        info_doc['dj_url'] = 'N/A'
        info_doc['dj_name'] = 'N/A'
        
    return info_doc
    
def tracklist_track_data(html):
    '''
    Extract track related data from set
    '''
    track_docs = {}
    index = 0
    while find_str(html, 'tracknumber_value">', index) != -1:

        index = find_str(html, 'tracknumber_value">', index)
        #print(index)
        track_chunk = html[index:].split('<br>')[0]
        #print(track_chunk)
        
        # Extract track number
        track_num = track_chunk[:22].split('<')[0].strip('tracknumber_value">')
        #print('Track Number:', track_num)

        # Extract track information
        chunk_index = 0
        chunk_index = find_str(track_chunk, 'meta itemprop="name" content=', chunk_index)
        extracted_value = track_chunk[chunk_index:].strip('meta itemprop="name" content=').split('>')[0].strip('"')
        clean_string = fix_decoding_errors(extracted_value)
        #print(clean_string)
        
        if len(clean_string) > 1:
            try:
                artist_list, track, remixer = parse_track_and_artist(clean_string)
            except:
                artist_list, track, remixer = None, None, None
        else:
            artist_list, track, remixer = None, None, None
            
        # Avoid ID's for now
        if artist_list is None:
            pass
        # If track info pull failed then pass
        elif (('ID' in artist_list) or ('ID' in track)): 
            pass
        else:
            
            # Tends to be multiple artists so artists parsed to list even if only one
            for artist in artist_list:
                
                #print('Artist:',artist)
                #print('Track:', track)
                #print('Remixer:', remixer)

                # Extract artist page
                artist_index = 0
                artist_index = find_str(track_chunk, 'title="open artist page"', artist_index)
                if artist_index != -1:
                    artist_url = track_chunk[artist_index:].split('class')[1].split('href="')[1].rstrip('" ')
                    #print('Aritst url:', artist_url)
                    artist_url = 'https://www.1001tracklists.com' + artist_url
                else:
                    artist_url = 'N/A'

                # Extract remixer page (if exists)
                if remixer != 'N/A':
                    remixer_index = find_str(track_chunk, 'title="open remixer artist page"', artist_index)
                    if remixer_index != -1:
                        remixer_url = track_chunk[remixer_index:].split('class')[1].split('href="')[1].rstrip('" ')
                        #print('Remixer url:', remixer_url)
                        remixer_url = 'https://www.1001tracklists.com' + remixer_url
                    else:
                        remixer_url = 'N/A'
                else:
                    remixer_url = 'N/A'

                # Extract track page
                track_index = 0
                track_index = find_str(track_chunk, 'title="open track page"', artist_index)
                if track_index != -1:
                    track_url = track_chunk[track_index:].split('class')[1].split('href="')[1].split('"')[0]
                    #print('track url:', track_url)
                    track_url = 'https://www.1001tracklists.com' + track_url
                else:
                    track_url = 'N/A'

                track_doc = {\
                            'track_num': track_num,
                            'artist': artist.strip(' '),
                            'artist_url': artist_url.strip(' '),
                            'name': track.strip(' '),
                            'track_url': track_url.strip(' '),
                            'remixer': remixer.strip(' '),
                            'remixer_url': remixer_url.strip(' ')
                            }
                track_docs[track_num] = track_doc
                #print('\n\n\n')

    return track_docs

def build_artist_edges(url_doc, url):
    '''
    Build artist set-adjacency docs -- order n^2.
    Dont iterate over full set twice since will be considered non-directional
    '''
    all_tracks = []
    these_tracks = list(url_doc['track_docs'].values())
    for i in range(len(these_tracks)):
        for j in range(i,len(these_tracks)):

            track = these_tracks[i]
            other_track = these_tracks[j]

            first_artist = track['artist']
            second_artist = other_track['artist']

            if first_artist != second_artist:
                all_tracks.append(\
                                {
                                'artist1': first_artist,
                                'artist2': second_artist,
                                'url': url
                                }
                            )
    return all_tracks

def build_track_edges(track_docs, url):
    '''
    Build track set-adjacency docs -- order n^2.
    Dont iterate over full set twice since will be considered non-directional
    '''
    edge_docs = []
    keys = sorted(list(track_docs.keys()))
    for i in range(len(keys)):
        for j in range(i, len(keys)):
            
            key = keys[i]
            other_key = keys[j]
            
            if key != other_key:
                edge_docs.append(\
                                {
                                'track1': key,
                                'track2': other_key,
                                'url': url
                                }
                            )
    return edge_docs
                
def build_sequential_track_edges(track_docs, url):
    '''
    Allows for later "next track lookup" functionality
    
    '''
    enumerated_tracks = [(track_docs[key]['track_num'], key) for key in list(track_docs.keys())]
    enumerated_tracks = sorted(enumerated_tracks, key=lambda x: x[0])
    
    seq_docs = []
    for track_idx in range(len(enumerated_tracks)-1):
        seq_docs.append(\
                       {
                       'url': url,
                       'first_track': enumerated_tracks[track_idx][1],
                       'second_track': enumerated_tracks[track_idx+1][1],
                       'first_position': enumerated_tracks[track_idx][0],
                       'second_position': enumerated_tracks[track_idx+1][0],
                       }
                    )
    return seq_docs

def build_played_playedby_edge(url_doc, url):
    '''
    Allows you to map who plays who.
    I think it would be interesting to study directional graphs from this.
    
    '''
    dj_name = url_doc['dj_name']
    dj_url = url_doc['dj_url']
    
    if (dj_name == 'N/A') or (dj_url == 'N/A'):
        return []
    
    played_docs = []
    for track_doc in list(url_doc['track_docs'].values()):
        
        played_docs.append(\
                          {
                          'url': url,
                          'played_by': dj_name,
                          'played_by_url': dj_url,
                          'played': track_doc['name'],
                          'played_track_url': track_doc['track_url'],
                          'played_artist': track_doc['artist'],
                          'played_artist_url': track_doc['artist_url'],
                          'played_remixer': track_doc['remixer'],
                          'played_remixer_url': track_doc['remixer_url']
                          }
                        )
    return played_docs
       

### Spot check parser

In [157]:
from urllib.request import Request, urlopen

url = 'https://www.1001tracklists.com/tracklist/2rrcqmpk/modd-vulcan-gas-company-austin-united-states-2019-03-09.html'
req = Request(url,\
              headers={'User-Agent': 'Mozilla/5.0'})
html = str(urlopen(req).read())

url_doc = {}
url_doc['html'] = html
url_doc.update(tracklist_meta_data(html))
url_doc.update(tracklist_general_information(html))
track_docs = tracklist_track_data(html)
url_doc['track_docs'] = track_docs

track_edges = build_track_edges(track_docs, url)
print('Length of track connections:', len(track_edges))
sequential_edges = build_sequential_track_edges(track_docs, url)
print('Length of Sequential connections:', len(sequential_edges))
played_edges = build_played_playedby_edge(url_doc, url)
print('Length of played connections:', len(played_edges))
artist_edges = build_artist_edges(url_doc, url)
print('Length of artist connections:', len(artist_edges))

Length of track connections: 91
Length of Sequential connections: 13
Length of played connections: 14
Length of artist connections: 85


### Run over these and keep a depth of like 2 for POC

In [None]:
# import time

# def find_str(s, char, start_index=0):

#     index = 0
#     s = s[start_index+1:]
#     if char in s:
#         c = char[0]
#         for ch in s:
#             if ch == c:
#                 if s[index:index+len(char)] == char:
#                     return start_index + 1 + index
#             index += 1
#     return -1   

# def request(url):

#     user_agent = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7'
#     req = Request(url,\
#                   headers={'User-Agent': user_agent})
#     html = str(urlopen(req).read())
#     return html

# date_pages = ["https://www.1001tracklists.com/index%d.html?order=date" %d for d in range(100)]
# urls = []
# for url in date_pages:
    
#     # Make http request
#     html = request(url)
#     index = 0
#     # Iterate over links found in html
#     count = 0
#     while find_str(html, 'a href="', index) != -1:
        
#         # Extract url
#         index = find_str(html, 'a href="', index)
#         url_chunk = html[index:].split('"')[1]

#         # Make sure it is either a referenced tracklist or 1001 page
#         if ('/tracklist/' in url_chunk) and ('http' not in url_chunk):
#             new_page = 'https://www.1001tracklists.com' + url_chunk
#             urls.append(new_page)
#         if ('www.1001tracklists.com' in url_chunk) and ('.html' in url_chunk):
#             urls.append(url_chunk)
            
#         count += 1
            
#     print('Number of URLS:', len(urls))
#     time.sleep(10)

# import pickle
# with open('1001_urls.pkl', 'wb') as f:
#      pickle.dump(urls, f)

In [11]:
# import numpy as np 
# import pickle

# with open('1001_urls.pkl', 'rb') as f:
#      urls = pickle.load(f)
        
# print(len(urls))
# unique_urls = np.unique(urls)
# print(len(unique_urls))
# unique_urls = [url for url in unique_urls if ('.html' in url) and ('#tlp' not in url)]
# print(len(unique_urls))

4534
2980
2970


In [None]:
import time
import pickle
import numpy as np

def parse(url):
    
    req = Request(url,\
                  headers={'User-Agent': 'Mozilla/5.0'})
    html = str(urlopen(req).read())   
    
    url_doc = {}
    url_doc['url'] = url
    url_doc['html'] = html
    url_doc.update(tracklist_meta_data(html))
    url_doc.update(tracklist_general_information(html))

    track_docs = tracklist_track_data(html)
    url_doc['track_docs'] = track_docs

    track_edges = build_track_edges(track_docs, url)
    print('Length of track connections:', len(track_edges))
    sequential_edges = build_sequential_track_edges(track_docs, url)
    print('Length of Sequential connections:', len(sequential_edges))
    played_edges = build_played_playedby_edge(url_doc, url)
    print('Length of played connections:', len(played_edges))
    artist_edges = build_artist_edges(url_doc, url)
    print('Length of artist connections:', len(artist_edges))

    return url_doc, track_edges, sequential_edges, played_edges, artist_edges, html

    
played_docs = []
sequential_docs = []
track_docs = []
tracklist_docs = []
artist_docs = []
url_html_map = {}

with open('1001_urls.pkl', 'rb') as f:
    urls = pickle.load(f)

# Find unique urls
seen_urls = []
unique_urls = np.unique(urls)
unique_urls = [url for url in unique_urls\
                   if ('.html' in url) and ('#tlp' not in url) and (url not in seen_urls)]

for url in unique_urls[:]:
    
    if ('.html' in url) and ('#tlp' not in url) and (url not in seen_urls):
    
        try:
            
            url_doc, track_edges, sequential_edges, played_edges, artist_edges, html = parse(url)

            url_html_map[url] = html
            
            played_docs.extend(played_edges)
            sequential_docs.extend(sequential_edges)
            track_docs.extend(track_edges)
            tracklist_docs.append(url_doc)
            artist_docs.extend(artist_edges)

            print('Len played docs:', len(played_docs))
            print('Len sequential docs:', len(sequential_docs))
            print('Len track docs:', len(track_docs))
            print('Len tracklist docs:', len(tracklist_docs))
            print('Len artist docs:', len(artist_docs))
            
            with open('played_docs3.pkl', 'wb') as f:
                pickle.dump(played_docs, f)
            with open('sequential_docs3.pkl', 'wb') as f:
                pickle.dump(sequential_docs, f)
            with open('track_docs3.pkl', 'wb') as f:
                pickle.dump(track_docs, f)
            with open('tracklist_docs3.pkl', 'wb') as f:
                pickle.dump(tracklist_docs, f)
            with open('artist_docs3.pkl', 'wb') as f:
                pickle.dump(artist_docs, f)
            with open('url_html_map.pkl', 'wb') as f:
                pickle.dump(artist_docs, f)
                
            seen_urls.append(url)
            
        except Exception as e:
            print(e)
        
        print('waiting')
        time.sleep(10)

In [None]:
# Crawl anjunadeep

crawler = Crawler()
anjuna_urls = crawler.start_crawl('https://www.1001tracklists.com/source/v7m7k3/the-anjunadeep-edition/index.html')

import pickle
with open('anjuna_urls.pkl', 'wb') as f:
     pickle.dump(anjuna_urls, f)

In [None]:
import time
import pickle
import numpy as np

def parse(url):
    
    req = Request(url,\
                  headers={'User-Agent': 'Mozilla/5.0'})
    html = str(urlopen(req).read())   
    
    url_doc = {}
    url_doc['url'] = url
    url_doc['html'] = html
    url_doc.update(tracklist_meta_data(html))
    url_doc.update(tracklist_general_information(html))

    track_docs = tracklist_track_data(html)
    url_doc['track_docs'] = track_docs

    track_edges = build_track_edges(track_docs, url)
    print('Length of track connections:', len(track_edges))
    sequential_edges = build_sequential_track_edges(track_docs, url)
    print('Length of Sequential connections:', len(sequential_edges))
    played_edges = build_played_playedby_edge(url_doc, url)
    print('Length of played connections:', len(played_edges))
    artist_edges = build_artist_edges(url_doc, url)
    print('Length of artist connections:', len(artist_edges))

    return url_doc, track_edges, sequential_edges, played_edges, artist_edges, html

    
played_docs = []
sequential_docs = []
track_docs = []
tracklist_docs = []
artist_docs = []
url_html_map = {}

with open('1001_urls.pkl', 'rb') as f:
    urls = pickle.load(f)

# Find unique urls
seen_urls = []
unique_urls = np.unique(urls)
unique_urls = [url for url in unique_urls\
                   if ('.html' in url) and ('#tlp' not in url) and (url not in seen_urls)]

for url in unique_urls[:]:
    
    if ('.html' in url) and ('#tlp' not in url) and (url not in seen_urls):
    
        try:
            
            url_doc, track_edges, sequential_edges, played_edges, artist_edges, html = parse(url)

            url_html_map[url] = html
            
            played_docs.extend(played_edges)
            sequential_docs.extend(sequential_edges)
            track_docs.extend(track_edges)
            tracklist_docs.append(url_doc)
            artist_docs.extend(artist_edges)

            print('Len played docs:', len(played_docs))
            print('Len sequential docs:', len(sequential_docs))
            print('Len track docs:', len(track_docs))
            print('Len tracklist docs:', len(tracklist_docs))
            print('Len artist docs:', len(artist_docs))
            
            with open('played_docs3.pkl', 'wb') as f:
                pickle.dump(played_docs, f)
            with open('sequential_docs3.pkl', 'wb') as f:
                pickle.dump(sequential_docs, f)
            with open('track_docs3.pkl', 'wb') as f:
                pickle.dump(track_docs, f)
            with open('tracklist_docs3.pkl', 'wb') as f:
                pickle.dump(tracklist_docs, f)
            with open('artist_docs3.pkl', 'wb') as f:
                pickle.dump(artist_docs, f)
            with open('url_html_map.pkl', 'wb') as f:
                pickle.dump(artist_docs, f)
                
            seen_urls.append(url)
            
        except Exception as e:
            print(e)
        
        print('waiting')
        time.sleep(10)