In [1]:
%load_ext autoreload
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [2]:
from dbmaster import MasterParams, MasterPersist
from dbbase import MusicDBIDModVal, MusicDBDir, MusicDBData
from dbnote import DownloadRecord, MergeSearchArtist
from utils import FileIO, DirInfo, FileInfo, getFlatList, Timestat, TermTime, TermTimeTS, getTT
from pandas import Series, DataFrame, concat, Timestamp
from pandb import PanDBIO
from musicdb.spotify import MusicDBParams, RawAPIData, MusicDBIO
from os import getpid
from functools import partial

mv = MusicDBIDModVal()
io = FileIO()
mpar = MusicDBParams()
dbio = MusicDBIO()
apiio = RawAPIData()
db = mpar.db

In [3]:
searchArtistRecord = DownloadRecord(db=db, name="SearchArtist", rTypes=["Index", "Data"])
downloadArtistRecord = DownloadRecord(db=db, name="DownloadArtist", rTypes=["Index"])
downloadArtistAlbumRecord = DownloadRecord(db=db, name="DownloadArtistAlbum", rTypes=["Index"])
downloadArtistTrackRecord = DownloadRecord(db=db, name="DownloadArtistTrack", rTypes=["Index"])
downloadArtistRelRecord = DownloadRecord(db=db, name="DownloadArtistRel", rTypes=["Index"])
downloadArtistIDRecord = DownloadRecord(db=db, name="DownloadArtistID", rTypes=["Index"])
downloadTrackRecord = DownloadRecord(db=db, name="DownloadTrack", rTypes=["Index"])
knownArtists = dbio.rdio.getSummaryNameData
allArtists = partial(dbio.rdio.getData, "SearchArtist")

DownloadRecord(db=Spotify, name=SearchArtist, rTypes=['Index', 'Data'])
DownloadRecord(db=Spotify, name=DownloadArtist, rTypes=['Index'])
DownloadRecord(db=Spotify, name=DownloadArtistAlbum, rTypes=['Index'])
DownloadRecord(db=Spotify, name=DownloadArtistTrack, rTypes=['Index'])
DownloadRecord(db=Spotify, name=DownloadArtistRel, rTypes=['Index'])
DownloadRecord(db=Spotify, name=DownloadArtistID, rTypes=['Index'])
DownloadRecord(db=Spotify, name=DownloadTrack, rTypes=['Index'])


In [None]:
if False:
    from utils import FileInfo
    import shutil
    db = dbio.params.db
    dbdir = dbio.params.dir
    nameMap = {"MasterArtists": "SearchArtist", "LocalArtists": "DownloadArtist", "LocalTracks": "DownloadTrack"}
    nameMap = nameMap | {"LocalArtistTracks": "DownloadArtistTrack", "LocalArtistAlbums": "DownloadArtistAlbum",
                         "LocalArtistSimilar": "DownloadArtistSimilar", "LocalGenres": "DownloadGenre",
                         "LocalArtistRels": "DownloadArtistRel", "LocalArtistIDs": "DownloadArtistID"}
    dinfo = DirInfo(f"/Users/tgadfort/anaconda3/envs/post/pandb/musicdb/{db}")
    
    cpMap = {}
    for origname, newname in nameMap.items():
        cpMap[f"{dbdir}SearchedFor{origname}"] = f"{db}-{newname}-Index"
        cpMap[f"{dbdir}SearchedFor{origname}Data"] = f"{db}-{newname}-Data"
        cpMap[f"{dbdir}SearchedFor{origname}Errors"] = f"{db}-{newname}-Error"
    assert dinfo.exists(), f"{dinfo} does not exist!"
    for srcFile, dstFile in cpMap.items():
        srcFile = dinfo.join(f"{srcFile}.p")
        dstFile = dinfo.join(f"{dstFile}.p")
        if srcFile.exists():
            srcFile.mvFile(dstFile)

In [None]:
##########################################################################################
# Show Summary
##########################################################################################
print(f"{db} Search Results (PID={getpid()})".format(db))
searchArtistRecord.info()
downloadArtistRecord.info()
downloadArtistAlbumRecord.info()
downloadArtistTrackRecord.info()
downloadArtistRelRecord.info()
downloadArtistRecord.info()
downloadArtistIDRecord.info()
downloadTrackRecord.info()
print(f"  {'KnownArtist Data': <20}: {knownArtists().shape[0]}")

# Search For New Artists

In [None]:
######################################################################################################
# Explode Recent Data
######################################################################################################
def getNewData(searchArtistRecord):
    newData = searchArtistRecord.getData()
    if len(newData) == 0:
        return DataFrame()
    newData = concat([DataFrame(s).T for s in newData.values()])
    newData.index = newData.index.astype(str)
    newData = newData[~newData.index.duplicated()].drop("sid", axis=1)
    return newData

def mergeNewData(searchArtistRecord):
    searchArtistRecord.load(verbose=False)
    newData = getNewData(searchArtistRecord)
    msr = MergeSearchArtist(db)
    msr.mergeLocal(searchArtistRecord, newData, test=False)

In [None]:
dbio = MusicDBIO(verbose=False,local=True,mkDirs=False)
apiio = RawAPIData(debug=False)
knownNames = PanDBIO().getUniqueArtistNames()
mergeNewData(searchArtistRecord)
searchedNames = Series(searchArtistRecord.getIndex())
artistNamesToGet = knownNames[~knownNames.map(searchArtistRecord.isKnown)]

print(f"# {db} Search Results (PID={getpid()})")
print(f"#   Available Names:     {knownNames.shape[0]}")
print(f"#   Known Artist Names:  {searchedNames.shape[0]}")
print(f"#   Artist Names To Get: {artistNamesToGet.shape[0]}")

del searchedNames
del knownNames

In [None]:
ts = Timestat(f"Getting {db} ArtistIDs")
tt = getTT(skipEOD=False, vacation=True)
assert dbio.rdio.isLocal, f"MusicDBIO is not set for local downloads!"

def isError(artistName, nErrors, sleeptime, error=None):
    searchArtistRecord.setError(index=artistName)
    print(f"Search Error ==> {artistName}: {error}")
    nErrors.append(artistName)
    apiio.sleep(sleeptime)
    
n = 0
maxN = 250000000
nErrors = []
searchArtistRecord.load(verbose=False)
for i, (idx, artistName) in enumerate(artistNamesToGet.items()):
    if searchArtistRecord.isKnown(artistName):
        continue

    if len(nErrors) >= 5:
        print("Stopping due to 5 consecutive errors")
        break

    try:
        response = apiio.getArtistSearchData(artistName=artistName)
    except Exception as error:
        isError(artistName, nErrors, 10, error)
        continue

    if not isinstance(response, dict):
        isError(artistName, nErrors, 3.5, "NotDict")
        continue

    nErrors = []
    searchArtistRecord.setData(index=artistName, data=response)
    apiio.sleep(2.5)
    n += 1
        
    if n % 5 == 0:
        if tt.isFinished():
            break
        apiio.sleep(1.0)
        
    if n % 100 == 0:
        ts.update(n=n)
        searchArtistRecord.save()
        apiio.wait(10.0)
        if tt.isFinished():
            break
    
    if n >= maxN:
        print(f"Breaking after {maxN} downloads...")
        break

ts.stop()
searchArtistRecord.save()

In [None]:
searchArtistRecord.save()

In [9]:
msa = MergeSearchArtist("RateYourMusic")
msa.copyFromLocal(force=True)

*****************************************************************************************************************************
*                                            MergeSearchArtist(db=RateYourMusic)                                            *
*****************************************************************************************************************************
Old Global Shape: (584812, 2)
New Local Shape:  (643810, 2)
New Global Shape: (643810, 2)


In [10]:
from dbnote import backup
backup("RateYourMusic", doMod=False, doSearch=True)

****************************************************************************************************
*                                     Backing Up RateYourMusic                                     *
****************************************************************************************************
 [Backing Up Raw RateYourMusic Data] | StartTime 2023-12-05 14:11:24
 [Backing Up Raw RateYourMusic Data] | ?/?        : RunTime .  >> Checking Raw Path [/Volumes/Piggy/Discog/artists-rateyourmusic] <<
 [Backing Up Raw RateYourMusic Data] | ?/?        : RunTime .  >> Checking Mod Path [/Volumes/Seagate/Discog/artists-rateyourmusic] <<
***************************************************************************************************
*                                           ModVal Data                                           *
***************************************************************************************************
Not checking the raw mod path
******************************

# Download Artist Albums Data

In [4]:
dbio = MusicDBIO(verbose=False,local=True,mkDirs=False)
apiio = RawAPIData(debug=False)
knownNames = allArtists()[['name']]
knownNames['ModVal'] = knownNames.index.map(mv.getModVal)
availableNames = knownNames[~knownNames.index.map(downloadArtistAlbumRecord.isKnown)]
artistNamesToGet = Series({modVal: modValDF for modVal,modValDF in availableNames.groupby("ModVal")})

print(f"# {db} Search Results")
print(f"#   Available Names:     {knownNames.shape[0]}")
print(f"#   Known Artist Names:  {downloadArtistAlbumRecord.numKnown()}")
print(f"#   Artist Names To Get: {availableNames.shape[0]}")

del availableNames
del knownNames

# Spotify Search Results
#   Available Names:     5149586
#   Known Artist Names:  2435291
#   Artist Names To Get: 2714562


In [11]:
ts = Timestat(f"Getting {db} ArtistIDs")
tt = getTT(skipEOD=False, vacation=True)
assert dbio.rdio.isLocal, f"MusicDBIO is not set for local downloads!"

def isError(artistName, artistID, nErrors, sleeptime, error=None):
    downloadArtistAlbumRecord.setError(index=artistID)
    print(f"Search Error ==> {artistName}: {error}")
    nErrors.append(artistName)
    apiio.sleep(sleeptime)
    
n = 0
maxN = 2500000
stop = False
nErrors = []
#downloadArtistAlbumRecord.load()
for groupModVal,modValData in artistNamesToGet.items():
    if stop is True:
        break
    modVal = groupModVal
    N = modValData.shape[0]
    for i,(artistID, artistName) in enumerate(modValData['name'].items()):    
        if downloadArtistAlbumRecord.isKnown(artistID):
            continue
    
        if len(nErrors) >= 5:
            print("Stopping due to 5 consecutive errors")
            stop = True
            break
    
        print(f"{groupModVal: <3} | {i: <5} | {N: <5} | {n: <5} | ", end="")
        
        try:
            response = apiio.getArtistAlbums(artistID=artistID, artistName=artistName)        
        except Exception as error:
            isError(artistName, artistID, nErrors, 5, error)
            continue
    
        if not isinstance(response, dict):
            isError(artistName, artistID, nErrors, 3.5, "NotDict")
            continue

        
        nErrors = []
        dbio.rdio.saveData("RawArtistAlbum", modVal, artistID, data=response)
        downloadArtistAlbumRecord.setIndex(index=artistID)
        apiio.sleep(2.5)
        n += 1
            
        if n % 5 == 0:
            if tt.isFinished():
                stop = True
                break
            
        if n % 100 == 0:
            ts.update(n=n)
            downloadArtistAlbumRecord.save()
            apiio.wait(15.0)
            if tt.isFinished():
                stop = True
                break
        
        if n >= maxN:
            print(f"Breaking after {maxN} downloads...")
            stop = True
            break

ts.stop()
#downloadArtistAlbumRecord.save()

 [Getting Spotify ArtistIDs] | StartTime 2023-12-05 14:46:18
   ====> Terminate Time Set To 2023-12-05 19:00:00 <====
   ====> Will Terminate Process 4 Hour and 13 Minute From Now
0   | 0     | 17864 | 0     | Downloading Albums For Khash (1fhZFzqMpeBm31zLIEmSlJ)                       ===> [2] 2  2
0   | 1     | 17864 | 1     | Downloading Albums For Kiing Khash (3fuQRvDqG6HBPfqkKyd6PI)                 ===> [1] 1  1
0   | 2     | 17864 | 2     | Downloading Albums For Caleb Kelly (5ucGGiixTyCmzDMTYwm0zv)                 ===> [4] 4  4
0   | 3     | 17864 | 3     | Downloading Albums For Nizum Eoki (4iGLtY8wYhtQK4LeQFgAiq)                  ===> [1] 1  1
0   | 4     | 17864 | 4     | Downloading Albums For Koton (4YkiNg5eTjctAr6dELMRfX)                       ===> [6] 6  6
0   | 5     | 17864 | 5     | Downloading Albums For Kolton Caulfield (2zykWMyTPvjYa8OaQpy8QW)            ===> [1] 1  1
0   | 6     | 17864 | 6     | Downloading Albums For Rastamen jojo (18DUTN7VRG84xgv4V81G4Y)         

Waiting:   0%|          | 0/150 [00:00<?, ?it/s]

0   | 100   | 17864 | 100   | Downloading Albums For Nate Craig (7lwjoqjZA6KNiggqExd0Q5)                  ===> [3] 3  3
0   | 101   | 17864 | 101   | Downloading Albums For Ausequa (3EGztqdS3AiRKi6kryRNg1)                     ===> [43] 43  43
0   | 102   | 17864 | 102   | Downloading Albums For NUTANA (2IlotRrtfpIKvTZhXoXDH0)                      ===> [3] 3  3
0   | 103   | 17864 | 103   | Downloading Albums For Pikeras (3Rk6ZWHH97W7zdoehmzt5G)                     ===> [16] 16  16
0   | 104   | 17864 | 104   | Downloading Albums For MOOZARK (6Gu8YisTBtshH676fHrMbk)                     ===> [4] 4  4
0   | 105   | 17864 | 105   | Downloading Albums For Chmura (02q4I59cJQ8uJ9JKuz2STw)                      ===> [4] 4  4
0   | 106   | 17864 | 106   | Downloading Albums For Kidwild (5IR3SxDdtCCw6KD1aoF6w9)                     ===> [9] 9  9
0   | 107   | 17864 | 107   | Downloading Albums For Mark Evitts (0YANZBJ7bTlljOSjobqoJh)                 ===> [6] 6  6
0   | 108   | 17864 | 108   | Down

Waiting:   0%|          | 0/150 [00:00<?, ?it/s]

0   | 200   | 17864 | 200   | Downloading Albums For Leo & Ray (3tdvuW4bIMZwElpiMETwwm)                   ===> [1] 1  1
0   | 201   | 17864 | 201   | Downloading Albums For El Noruego (3NoEPAjoWZrCbUEXlUiZxs)                  ===> [6] 6  6
0   | 202   | 17864 | 202   | Downloading Albums For Tatiana Marie Clark (2ovAB32IEY5k4Mgdl5eSfq)         ===> [4] 4  4
0   | 203   | 17864 | 203   | Downloading Albums For Fabrizio Berlincioni (7EI7G45KH76Fdywj5JhX0h)        ===> [6] 6  6
0   | 204   | 17864 | 204   | Downloading Albums For ROBINIO MUNDIBU (2BlWaS37aIC7bfZgdjEBgc)             ===> [1] 1  1
0   | 205   | 17864 | 205   | Downloading Albums For Fly Tye (6enisH6pEn6OsLggg4OZ68)                     ===> [24] 24  24
0   | 206   | 17864 | 206   | Downloading Albums For Korslagda Kukar (7uW0TRg4ie6mixKsuLEuAv)             ===> [6] 6  6
0   | 207   | 17864 | 207   | Downloading Albums For Kira Michele (1sfaaI0Ye2P9IjdVai9api)                ===> [24] 24  24
0   | 208   | 17864 | 208   | Down

Waiting:   0%|          | 0/150 [00:00<?, ?it/s]

0   | 300   | 17864 | 300   | Downloading Albums For Soumik Sen (1kwxW6jZABNOXKm0ftVUGj)                  ===> [2] 2  2
0   | 301   | 17864 | 301   | Downloading Albums For Cole Calico (57eFBbfUJqd6VzrGCNGVpP)                 ===> [3] 3  3
0   | 302   | 17864 | 302   | Downloading Albums For MEGTARO (3o3JLHE1KQzNc2wh46UiNI)                     ===> [4] 4  4
0   | 303   | 17864 | 303   | Downloading Albums For Memo ATR (6V1DSqlrymaAFfZOOVNIDh)                    ===> [12] 12  12
0   | 304   | 17864 | 304   | Downloading Albums For James Allen (7wLBfPrAWCwJr7gNgGBsut)                 ===> [4] 4  4
0   | 305   | 17864 | 305   | Downloading Albums For Anja Reich (6qFghYQcetnX8P6lK1yu1L)                  ===> [1] 1  1
0   | 306   | 17864 | 306   | Downloading Albums For Ghost Fetish (4KcGdKUnJ9jwulLK0Ae7dj)                ===> [5] 5  5
0   | 307   | 17864 | 307   | Downloading Albums For SCRP (6mRmujco5e9ge5KHau1mV1)                        ===> [9] 9  9
0   | 308   | 17864 | 308   | Downloa

Waiting:   0%|          | 0/150 [00:00<?, ?it/s]

0   | 400   | 17864 | 400   | Downloading Albums For NFC Énfasis (5pseOpqOUAo27mIWZIsuq5)                 ===> [1] 1  1
0   | 401   | 17864 | 401   | Downloading Albums For Kreajours (5lQi0ajKiH0chp5jFgTerx)                   ===> [8] 8  8
0   | 402   | 17864 | 402   | Downloading Albums For Ainsley Matic and the Broken Blues (1bu2ji7bMhmPoPxlIF9Ttx)   ===> [2] 2  2
0   | 403   | 17864 | 403   | Downloading Albums For Empty Cage Quartet (49zRHYMmd6aiyVGfNFCIbL)          ===> [4] 4  4
0   | 404   | 17864 | 404   | Downloading Albums For Marie Elfriede Simone (6RNmGmQ1dcz6uRZLPB8fGc)       ===> [1] 1  1
0   | 405   | 17864 | 405   | Downloading Albums For Fish & Bird (1pH328mTxfMpNX31DltKsx)                 ===> [2] 2  2
0   | 406   | 17864 | 406   | Downloading Albums For Dessauer (13bGI7bd5hv6Rj41L6sqLT)                    ===> [9] 9  9
0   | 407   | 17864 | 407   | Downloading Albums For Ben Frankel Orchestra (2pJOHRnZUP4ws3cg9wwKAG)       ===> [2] 2  2
0   | 408   | 17864 | 408   | D

Waiting:   0%|          | 0/150 [00:00<?, ?it/s]

0   | 500   | 17864 | 500   | Downloading Albums For Vasyl Barvinsky (51Elqoud1Fuxj29lR0aBU8)             ===> [1] 1  1
0   | 501   | 17864 | 501   | Downloading Albums For DJ and EvanJohn (6qJ8jtvlIWf9YFXZ7Mr8cf)             ===> [1] 1  1
0   | 502   | 17864 | 502   | Downloading Albums For Shaza (5jesTJxPS6iOH9jzXI4zfM)                       ===> [2] 2  2
0   | 503   | 17864 | 503   | Downloading Albums For Joshua Daniel Nichols (1fj9Q6qXRJ3nRkV6FCBSmr)       ===> [1] 1  1
0   | 504   | 17864 | 504   | Downloading Albums For KylelBvndz (3gum3AhN8shHMH6ygJYKmJ)                  ===> [2] 2  2
0   | 505   | 17864 | 505   | Downloading Albums For Stay True Entertainment (2Uwidiva5kgAMU4xvM0ia8)     ===> [1] 1  1
0   | 506   | 17864 | 506   | Downloading Albums For R.O.B (49RbmXDehRoflymjqwUNIE)                       ===> [26] 26  26
0   | 507   | 17864 | 507   | Downloading Albums For Gökhan Balık (27XUsmG6UfamHsrex1swpC)                ===> [4] 4  4
0   | 508   | 17864 | 508   | Downloa

Waiting:   0%|          | 0/150 [00:00<?, ?it/s]

0   | 600   | 17864 | 600   | Downloading Albums For Tess Berger (2jOjZmcXLGyd3UnNvMvxy2)                 ===> [5] 5  5
0   | 601   | 17864 | 601   | Downloading Albums For 김민종 (1wEyGbuSlgNqFKT7bH5Y9p)                         ===> [2] 2  2
0   | 602   | 17864 | 602   | Downloading Albums For DJ Crystal Ground (0kLvirTVqgfLEXEaWi1ug3)           ===> [4] 4  4
0   | 603   | 17864 | 603   | Downloading Albums For chuky martin (1KLb5jhoLi9euH1nJSd00Z)                ===> [1] 1  1
0   | 604   | 17864 | 604   | Downloading Albums For DJ Vella (6hBBfKz68zzJgzsJDCf1Sv)                    ===> [1] 1  1
0   | 605   | 17864 | 605   | Downloading Albums For Rolando Garcia (3HBum2nHZTsewtkJ6RDB1u)              ===> [4] 4  4
0   | 606   | 17864 | 606   | Downloading Albums For Big Jadee (20Wwuq1Trs6bnJgwBXCp69)                   ===> [1] 1  1
0   | 607   | 17864 | 607   | Downloading Albums For Vagali (3KilbBPwTtGTSIkWRZejxO)                      ===> [9] 9  9
0   | 608   | 17864 | 608   | Downloadin

Waiting:   0%|          | 0/150 [00:00<?, ?it/s]

0   | 700   | 17864 | 700   | Downloading Albums For David Walter (0gpsOpH9xpzEpHabCojun5)                ===> [18] 18  18
0   | 701   | 17864 | 701   | Downloading Albums For Jesse JMZ (6afXa2jjuAXC4kuW91jW7n)                   ===> [1] 1  1
0   | 702   | 17864 | 702   | Downloading Albums For BLIND (1UpXqE7laC1E3kDUEM9Iww)                       ===> [2] 2  2
0   | 703   | 17864 | 703   | Downloading Albums For Ruhe Rauschen (2gIaOXD1JTQE5aEteNXf9C)               ===> [11] 11  11
0   | 704   | 17864 | 704   | Downloading Albums For Druza (3gvg4SizYMzzdniS1mYtlj)                       ===> [3] 3  3
0   | 705   | 17864 | 705   | Downloading Albums For iReezy (4omujLRgcjPzcFasF9eoId)                      ===> [2] 2  2
0   | 706   | 17864 | 706   | Downloading Albums For Jordan Klassen (0TMvOZ7CguXCuuTkKVf4sa)              ===> [1] 1  1
0   | 707   | 17864 | 707   | Downloading Albums For Evan Kristopeit (6Dfkj2eRZwC21sW8KqOjKT)             ===> [1] 1  1
0   | 708   | 17864 | 708   | Down

Waiting:   0%|          | 0/150 [00:00<?, ?it/s]

0   | 800   | 17864 | 800   | Downloading Albums For The Jacka (4AnkYRI3U7mf11CO09ZFzc)                   ===> [2] 2  2
0   | 801   | 17864 | 801   | Downloading Albums For Raulin Rodríguez (0jLs9Ts6n8t3nx21pZHwyU)            ===> [1] 1  1
0   | 802   | 17864 | 802   | Downloading Albums For Reticent (3srMdjCnHksjkKVs18LQ9E)                    ===> [1] 1  1
0   | 803   | 17864 | 803   | Downloading Albums For TiiJEi (3PGxnP0AhlM6HO0IO6ZZes)                      ===> [23] 23  23
0   | 804   | 17864 | 804   | Downloading Albums For Lesley Rains (4N3lWor0nvoakWuvOugOU4)                ===> [11] 11  11
0   | 805   | 17864 | 805   | Downloading Albums For ROC DEZUL (0Lz9FG7nwJBhWZd69Vjsq9)                   ===> [3] 3  3
0   | 806   | 17864 | 806   | Downloading Albums For Dorotheo (7Bqd6UDHfc6xlAC4nOmITW)                    ===> [14] 14  14
0   | 807   | 17864 | 807   | Downloading Albums For Paul Fischer (6cYmfhTw8HBfaKFn4uFdu6)                ===> [1] 1  1
0   | 808   | 17864 | 808   | D

Waiting:   0%|          | 0/150 [00:00<?, ?it/s]

0   | 900   | 17864 | 900   | Downloading Albums For DOT (5woyrAMdYnu7OEJ8WFbIv4)                         ===> [1] 1  1
0   | 901   | 17864 | 901   | Downloading Albums For Keisher Downie (6rzWI34uPtqLJh1LzZqNn8)              ===> [3] 3  3
0   | 902   | 17864 | 902   | Downloading Albums For Chapter (1o2ePpOEpr2D2Z7sv7lCt4)                     ===> [2] 2  2
0   | 903   | 17864 | 903   | Downloading Albums For Alicia Warns (6Qc2lM5TL7dlS0pMkceop5)                ===> [6] 6  6
0   | 904   | 17864 | 904   | Downloading Albums For Gunz N Dojaa (4UBXwFAdhLrw9Qre3XoeDQ)                ===> [10] 10  10
0   | 905   | 17864 | 905   | Downloading Albums For Roemisch (0WuIfBOsejzJRLHiVXtDSI)                    ===> [12] 12  12
0   | 906   | 17864 | 906   | Downloading Albums For Detective 47 (5asBoe7VTcJtdYlnROgjot)                ===> [4] 4  4
0   | 907   | 17864 | 907   | Downloading Albums For Mally (2ANK6d1QAml6VMLUlkcNJP)                       ===> [1] 1  1
0   | 908   | 17864 | 908   | Down

Waiting:   0%|          | 0/150 [00:00<?, ?it/s]

0   | 1000  | 17864 | 1000  | Downloading Albums For KonigTiger (63WgRXpbHInHdd2RlROUYZ)                  ===> [1] 1  1
0   | 1001  | 17864 | 1001  | Downloading Albums For Facundo Oliva (6AqTnsuuy3YKr4QIS4wnbj)               ===> [4] 4  4
0   | 1002  | 17864 | 1002  | Downloading Albums For Wolfy (0OYGG0jvebVffo85igvaZR)                       ===> [2] 2  2
0   | 1003  | 17864 | 1003  | Downloading Albums For Yuto Wonderful (28L2sIWNr2GaHr6BxsBEWR)              ===> [3] 3  3
0   | 1004  | 17864 | 1004  | Downloading Albums For Alex Dawa (6btZeIxzYQSnzutRBcyUlV)                   ===> [6] 6  6
0   | 1005  | 17864 | 1005  | Downloading Albums For The Soul Symphony (5qLJ3GSTv7SckKWz1BsJc8)           ===> [1] 1  1
0   | 1006  | 17864 | 1006  | Downloading Albums For Boyoboy (71x8FDQfOvVbv59K3VMoKc)                     ===> [6] 6  6
0   | 1007  | 17864 | 1007  | Downloading Albums For Jamil Honesty (3fZ4vrFZoxJIFCGunIqxIZ)               ===> [1] 1  1
0   | 1008  | 17864 | 1008  | Downloadin

KeyboardInterrupt: 

In [12]:
downloadArtistAlbumRecord.save()

****************************************************************************************************************************
*                                                Saving DownloadArtistAlbum                                                *
****************************************************************************************************************************
  Saving Index [2436303] ... Done
  Saving Error [55] ... Done


# Download Album Data

## Create Media Data

In [None]:
mediaData = {}
for modVal in range(100):
    modValData = dbio.data.getModValData(modVal)
    modValMediaData = {}
    for artistID,artistIDData in modValData.iteritems():
        for mediaType,mediaTypeData in artistIDData.media.media.items():
            modValMediaData.update({code: [artistID,media.album,media.url] for code,media in mediaTypeData.items()})
    mediaData.update(modValMediaData)
    if (modVal+1) % 10 == 0:
        print(f"ModVal = {modVal+1}")
        
df = DataFrame(mediaData).T
df.columns = ["ArtistID", "Name", "Ref"]
knownMedia.save(data=df)

## Download Data

In [None]:
dbio   = bandcamp.MusicDBIO(verbose=False,local=True,mkDirs=False)
apiio = bandcamp.RawAPIData(debug=False)

In [None]:
useArtist = False
numMaster = 250

knownAlbumsData = knownMedia.get()
knownAlbumsData['IndexModVal'] = knownAlbumsData.index.map(dbio.getModVal)
availableNames  = concat([artistIDDF.head(numMaster) for artistID,artistIDDF in knownAlbumsData.groupby(["ArtistID"])])
localAlbumsDict = localAlbums.get()
availableNames  = availableNames[~availableNames.index.isin(localAlbumsDict.keys())]
albumNamesToGet = Series({modVal: modValDF for modVal,modValDF in availableNames.groupby(["IndexModVal"])})

print(f"# {db} Album Search Results")
print(f"#   Available Album IDs:  {knownAlbumsData.shape[0]}")
print(f"#   Known Album IDs:      {len(localAlbumsDict)}")
print(f"#   Albums To Download:   {availableNames.shape[0]}")

del availableNames
del localAlbumsDict
del knownAlbumsData

#   Albums To Download:   23457
#   Albums To Download:   20437
#   Albums To Download:   12137
#   Albums To Download:   4132

In [None]:
def saveAlbumData(db, localAlbumsDict, searchedForErrors):
    print("="*150)
    print(f"Saving {len(localAlbumsDict)} {db} Albums Data")
    localAlbums.save(data=localAlbumsDict)
    print(f"Saving {len(searchedForErrors)} {db} Searched For Errors")
    errors.save(data=searchedForErrors)
    print("="*150)

In [None]:
ts = Timestat("Getting {0} AlbumIDs".format(db))
tt = getTT(skipEOD=False)

n    = 0
maxN = 25000000
localAlbumsDict     = localAlbums.get()
searchedForErrors   = errors.get()
stop = False
nErrors = []
for groupModVal,modValData in albumNamesToGet.iteritems():
    if stop is True:
        break
    for j,(albumID,row) in enumerate(modValData.iterrows()):
        if len(nErrors) >= 5:
            for artistID in nErrors:
                print(f"del searchedForErrors['{artistID}']")
                stop=True
                break
        if any([dct.get(albumID) is not None for dct in [localAlbumsDict, searchedForErrors]]):
            continue
            
        artistID   = row["ArtistID"]
        albumName  = row["Name"]
        albumRef  = row["Ref"]

        print(f"{groupModVal: <8} |{j: <8} | {n: <8} | ", end="")
        try:
            response = apiio.getAlbumData(albumName=albumName, albumRef=albumRef)
        except:
            print("Error ==> {0}".format(albumName))
            searchedForErrors[albumID] = True
            nErrors.append(albumID)
            apiio.sleep(10)
            continue

        if not isinstance(response,bytes):
            print("Error ==> {0}".format(albumName))
            searchedForErrors[albumID] = True
            nErrors.append(albumID)
            apiio.sleep(3.5)
            continue

        nErrors = []
        modVal=dbio.mv.get(albumID)
        dbio.data.saveRawArtistAlbumData(data=response, modval=modVal, dbID=albumID)
        localAlbumsDict[albumID] = True
        apiio.sleep(4.5)
        n += 1
        nLastErrors = 0
        
        if n % 5 == 0:
            if tt.isFinished():
                stop=True
                break

        if n % 50 == 0:
            apiio.sleep(5)
            
        if n % 100 == 0:
            saveAlbumData(db, localAlbumsDict, searchedForErrors)
            if tt.isFinished():
                stop=True
                break
            apiio.wait(10.0)

        if n >= maxN:
            print("Breaking after {0} downloads...".format(maxN))
            stop=True
            break

ts.stop()
if True: saveAlbumData(db, localAlbumsDict, searchedForErrors)

In [None]:
from os import getpid
getpid()

# Download Lists

## Download Starter

In [None]:
from apiutils import apiio
from ioutils import FileIO, HTMLIO
io  = FileIO()
hio = HTMLIO()
apiio = apiio()

In [None]:
starter             = {}
starter["List"]     = "https://www.bandcamp.org/lists.php"
starter["Genre"]    = "https://www.bandcamp.org/genre.php"
#starterBestAlbum = "https://www.bandcamp.org/ratings/6-highest-rated/2023/1"
starter["Rating"]   = "https://www.bandcamp.org/ratings"
starter["Discover"] = "https://www.bandcamp.org/discover"
starter["Releases"] = "https://www.bandcamp.org/releases"
starter["MustHear"] = "https://www.bandcamp.org/must-hear"

savename = "../../sandbox/AOTYstarter.p"
starterData = io.get(savename)
for key,url in starter.items():
    if starterData.get(key) is not None:
        continue
    retval = apiio.get(url)
    if retval.code == 200:
        print(key)
        starterData[key] = retval.data
    apiio.sleep(3)
        
print(f"Saving data to {savename}")
io.save(idata=starterData, ifile=savename)

In [None]:
useStarter = False
useSite    = True

if useStarter is True:
    aotyData = io.get("../../sandbox/AOTYstarter.p")
elif useSite is True:
    aotyData = io.get("../../sandbox/AOTYsiteData.p") | io.get("../../sandbox/AOTYsiteData2.p")
else:
    aotyData = {}
    
N  = len(aotyData)
ts = Timestat(f"Sorting {N} Site Refs")
refsData = {"List": {}, "Lists": {}, "Rating": {}, "Album": {}, "Artist": {}, "Discover": {}, "Genre": {}, "MustHear": {}, "Release": {}, "Spotify": {}, "Apple": {}, "Amazon": {}, "User": {}}
for n,(key,keyData) in enumerate(aotyData.items()):
    if (n+1) % 500 == 0 or (n+1) == 100:
        ts.update(n=n+1, N=N)
        
    bsdata = hio.get(keyData)
    refs = bsdata.findAll("a")
    for ref in refs:
        href = ref.get('href')
        if not isinstance(href,str):
            continue
        if href.startswith("/list/"):
            refsData["List"][href] = ref.text.strip()
        elif "lists.php" in href:
            refsData["Lists"][href] = ref.text.strip()
        elif href.startswith("/ratings/"):
            refsData["Rating"][href] = ref.text.strip()
        elif href.startswith("/artist/"):
            refsData["Artist"][href] = ref.text.strip()
        elif href.startswith("/album/"):
            refsData["Album"][href] = ref.text.strip()
        elif href.startswith("/discover/"):
            refsData["Discover"][href] = ref.text.strip()
        elif href.startswith("/genre/"):
            refsData["Genre"][href] = ref.text.strip()
        elif href.startswith("/must-hear/"):
            refsData["MustHear"][href] = ref.text.strip()
        elif "/releases/" in href:
            refsData["Release"][href] = ref.text.strip()
        elif "spotify.com" in href:
            refsData["Spotify"][href] = ref.text.strip()
        elif "apple.com" in href:
            refsData["Apple"][href] = ref.text.strip()
        elif "amazon.com" in href:
            refsData["Amazon"][href] = ref.text.strip()
        elif "/user/" in href:
            refsData["User"][href] = ref.text.strip()
        else:
            continue
            print(href,'\t|\t',ref.text)
            
ts.stop()

In [None]:
savename = "../../sandbox/AOTYsiteRefs.p"
print(f"Saving data to {savename}")
for key,keyData in refsData.items():
    print(f"  {key: <20}{len(keyData)}")
io.save(idata=refsData, ifile=savename)

In [None]:
siteData   = {}
artistData = {}
albumData  = {}
userData   = {}
otherData  = {}

if False:
    for ref,name in refsData["List"].items():
        if ref.startswith("/artist/"):
            artistData[ref] = name
        elif ref.startswith("/album/"):
            albumData[ref] = name
        elif ref.startswith("/user/"):
            userData[ref] = name
        elif ref.startswith("/"):
            siteData[ref] = name
        else:
            otherData[ref] = name
else:
    for key,keyData in refsData.items():
        for ref,name in keyData.items():
            if ref.startswith("/artist/"):
                artistData[ref] = name
            elif ref.startswith("/album/"):
                albumData[ref] = name
            elif ref.startswith("/user/"):
                userData[ref] = name
            elif ref.startswith("/"):
                if "/list/" in ref:
                    siteData[ref] = name
                else:
                    otherData[ref] = name
            else:
                otherData[ref] = name
            
print(f"Found {len(artistData)} Artist Refs")
print(f"Found {len(albumData)} Album Refs")
print(f"Found {len(userData)} User Refs")
print(f"Found {len(siteData)} Site Refs")
print(f"Found {len(otherData)} Other Refs")

In [None]:
for year in range(1970,2005):
    key = f'/lists.php?y={year}'
    val = 'View More'
    siteData[key] = val

In [None]:
savename1 = "../../sandbox/AOTYsiteData.p"
siteDataDownloads1 = io.get(savename1)
print(f"Found {len(siteDataDownloads1)} Previous Downloads")
savename2 = "../../sandbox/AOTYsiteData2.p"
siteDataDownloads2 = io.get(savename2)
print(f"Found {len(siteDataDownloads2)} Previous Downloads")
N = len(siteData)
ts = Timestat(f"Downloading {N} Site Refs")
for n,(ref,name) in enumerate(siteData.items()):
    url=f"https://www.bandcamp.org{ref}"
    if any([dct.get(ref) is not None for dct in [siteDataDownloads1,siteDataDownloads2]]):
        continue
    
    retval = apiio.get(url)
    if retval.code == 200:
        print(f"{n: <6} | {N: <6} | {ref}")
        siteDataDownloads2[ref] = retval.data
    apiio.sleep(3)
    
    if (n+1) % 25 == 0:
        ts.update(n=n+1,N=N)
        print(f"Saving {len(siteDataDownloads2)} data to {savename2}")
        io.save(idata=siteDataDownloads2, ifile=savename2)

ts.stop()
        
print(f"Saving data to {savename2}")
io.save(idata=siteDataDownloads2, ifile=savename2)

In [None]:
siteDataDownloads = io.get("../../sandbox/AOTYsiteData.p")

In [None]:
io.save(idata=siteDataDownloads, ifile=savename)

## Check For New Data

In [None]:
from lib.bandcamp import MusicDBID
mid = MusicDBID()
mid.getAlbumID('/album/515536-beyonce-renaissance/critic-lists/?f=all&y=2022')

In [None]:
df = DataFrame(Series(refsData["Album"])).reset_index().rename(columns={"index": "Ref", 0: "List"})
df["AlbumID"] = df["Ref"].map(mid.getAlbumID)
df = df[~df["AlbumID"].duplicated()]

In [None]:
df = DataFrame(Series(refsData["Artist"])).reset_index().rename(columns={"index": "Ref", 0: "Name"})
df["ArtistID"] = df["Ref"].map(mid.getArtistID)
df = df[~df["ArtistID"].duplicated()]

In [None]:
artistNames = searchArtists()

In [None]:
df.index = df["ArtistID"]
df = df.drop(["ArtistID"], axis=1)
artistNames = concat([artistNames,df])
artistNames = artistNames[~artistNames.index.duplicated()]

In [None]:
dbio.data.saveSearchArtistData(data=artistNames)

# Backup

In [None]:
from utils import StoreData, backup
from numpy import array_split
sd = StoreData("bandcamp", "Artist")
for modVals in array_split(range(100), 2):
    sd.mergeLocalData(modVals=modVals)
sd.mergeGlobalData()

In [None]:
from time import sleep
sleep(200)
sd.mergeGlobalData()

In [None]:
for modVal in range(67):
    srcDir = DirInfo(f"/Volumes/Piggy/Discog/artists-bandcamp/{modVal}/artists")
    files  = [FileInfo(ifile) for ifile in srcDir.getFiles()]
    files  = [finfo for finfo in files if finfo.basename.isdigit()]
    dstDir = DirInfo(f"/Users/tgadfort/Music/Discog/artists-bandcamp/{modVal}/artists")
    for srcFile in files:
        dstFile = dstDir.join(srcFile.name)
        srcFile.mvFile(dstFile)