In [65]:
## Basic stuff
%load_ext autoreload
%autoreload
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
display(HTML("""<style>div.output_area{max-height:10000px;overflow:scroll;}</style>"""))

from mp3id import mp3ID
from musicBase import musicBase
from musicFinder import musicFinder
from musicPath import pathBasics
from timeUtils import clock, elapsed
from listUtils import getFlatList
from discogsBase import discogs
from multiArtist import multiArtist
from time import sleep
from pandas import DataFrame, Series
from ioUtils import getFile, saveFile
from searchUtils import findDirs, findNearest, findAll
from fileUtils import getDirBasics, getBaseFilename
from unicodedata import normalize
from fsUtils import moveDir, setDir, mkDir, isDir
from pandasUtils import getRowDataByColValue, getRowData

from difflib import SequenceMatcher
import operator
from glob import glob
from os.path import join

from masterdb import getArtistAlbumsDB, discConv


## Discogs
from discogsBase import discogs


## Python Version
import sys
print("Python: {0}".format(sys.version))

import datetime as dt
start = dt.datetime.now()
print("Notebook Last Run Initiated: "+str(start))

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


Python: 3.7.3 (default, Mar 27 2019, 16:54:48) 
[Clang 4.0.1 (tags/RELEASE_401/final)]
Notebook Last Run Initiated: 2020-02-23 18:49:38.450154


# Global Variables

In [2]:
dbFull  = ["Discogs", "AllMusic"]
dbNames = [x.lower() for x in dbFull]
dbKeys  = []
for item in list(zip(dbFull, dbNames)):
    dbKeys.append({"Key": item[0], "Name": item[1]})
print(dbKeys)

discogMediaNames   = ['Albums', 'Singles & EPs', 'Compilations', 'Videos', 'Miscellaneous', 'Visual', 'DJ Mixes']
allmusicMediaNames = ['Album']
myMediaNames       = ['Random', 'Todo', 'Match', 'Title', 'Singles']

[{'Key': 'Discogs', 'Name': 'discogs'}, {'Key': 'AllMusic', 'Name': 'allmusic'}]


## Create Disc Objects

In [3]:
disc = {}
for dbKey in dbKeys:
    key  = dbKey["Key"]
    name = dbKey["Name"]
    disc[key] = discogs(name)

## Artist Data (AllMusic)

In [4]:
discdf  = {}
artists = {}
artistIDToName = {}
artistNameToID = {}
artistAlbumsDB = {}
for dbKey in dbKeys:
    key  = dbKey["Key"]
    name = dbKey["Name"]
    print("Getting Master Artist DB File ({0})".format(key))
    
    discdf[key]  = disc[key].getMasterSlimArtistDiscogsDB()
    artists[key] = [x for x in list(discdf[key]["Artist"]) if x is not None]
    print("  Found {0} Artists in DB".format(len(artists[key])))

    artistIDToName[key] = discdf[key]["DiscArtist"].to_dict()
    artistNameToID[key] = {}
    print("  Found {0} ID -> Name entries".format(len(artistIDToName[key])))
                        
    for artistID,artistName in artistIDToName[key].items():
        if artistName is None:
            continue
        if artistNameToID[key].get(artistName) is None:
            artistNameToID[key][artistName] = []
        artistNameToID[key][artistName].append(artistID)
    print("  Found {0} Name -> ID entries".format(len(artistNameToID[key])))

    artistAlbumsDB[key] = getArtistAlbumsDB(disc[key], force=False)
    print("  Found {0} Artist Albums".format(artistAlbumsDB[key].shape[0]))
    print("")
    
_, _ = clock("Last Run")

Getting Master Artist DB File (Discogs)
  Found 717362 Artists in DB
  Found 717391 ID -> Name entries
  Found 659367 Name -> ID entries
Current Time is Sun Feb 23, 2020 15:45:01 for 
Using previously created Artist Albums DB
Current Time is Sun Feb 23, 2020 15:45:29 for Done with 
Process [Done with 
  Found 717391 Artist Albums

Getting Master Artist DB File (AllMusic)
  Found 36569 Artists in DB
  Found 36569 ID -> Name entries
  Found 34502 Name -> ID entries
Current Time is Sun Feb 23, 2020 15:45:29 for 
Using previously created Artist Albums DB
Current Time is Sun Feb 23, 2020 15:45:31 for Done with 
Process [Done with 
  Found 36569 Artist Albums



## Multi Artist Objects

In [5]:
mulArts = {}
for dbKey in dbKeys:
    key  = dbKey["Key"]
    name = dbKey["Name"]
    mulArts[key] = multiArtist(cutoff=0.9, discdata=artistNameToID[key], exact=False)
    
_, _ = clock("Last Run")

# Find Music and Match Artists

In [6]:
def getMyMusicDBMap():
    myMusicMap = getFile(ifile="myMusicMap.p")
    print("Found {0} entries".format(len(myMusicMap)))
    cntrs = {dbKey["Key"]: 0 for dbKey in dbKeys}
    for myArtistName, myArtistData in myMusicMap.items():
        for dbKey in dbKeys:
            key = dbKey['Key']
            if myArtistData[key] is not None:
                cntrs[key] += 1
    print(cntrs)
    return myMusicMap
#Found 4693 entries
#{'Discogs': 4693, 'AllMusic': 4303}

## Loop over My Music

In [84]:
from pandas import DataFrame

musicNameIDMap = {}
multiMap       = {}


def getArtistAlbums(discdf, idx):
    if not isinstance(discdf, DataFrame):
        raise ValueError("Not a DataFrame")
    
    try:
        artistAlbumsData = discdf[discdf.index == idx]
        artistAlbums     = artistAlbumsData["Albums"].to_dict().get(idx)
    except:
        return {}
        
    return artistAlbums


def getArtistIDX(artistMapData, name, discdf):
    if not isinstance(artistMapData, dict):
        raise ValueError("No Artist Map Data")
    if not isinstance(discdf, DataFrame):
        raise ValueError("Not a DataFrame")        
    if not isinstance(artistMapData, dict):
        raise ValueError("artistMapData is not a DB!")
    
    try:
        idx = artistMapData.get(name).get('ID')
    except:
        return None
    
    ## Check
    if idx not in list(discdf.index):
        raise ValueError("ID {0} for {1} is not in the Index of the main DataFrame!".format(idx, name))
    
    return idx

def getArtistIDDBCounts(dbIDData):
    if isinstance(discogsIDData, DataFrame):
        return dbIDData.shape[0]
    return 0

def printArtistIDs(artistName, discogsArtistIDX, allmusicArtistIDX):
    print('\t{0: <40}{1: <15}{2: <15}'.format(artistName,str(discogsArtistIDX),str(allmusicArtistIDX)))

def printArtistIDDBResults(artistName, discogsIDData, allmusicIDData):
    print("\t{0: <40}{1: <15}{2: <15}".format("", 
                                              getArtistIDDBCounts(discogsIDData),
                                              getArtistIDDBCounts(allmusicIDData)))
    
def getMyMusicAlbums(dirval, returnNames=False):    
    discogMediaNames   = ['Albums', 'Singles & EPs', 'Compilations', 'Videos', 'Miscellaneous', 'Visual', 'DJ Mixes']
    allmusicMediaNames = ['Album']
    myMediaNames       = ['Random', 'Todo', 'Match', 'Title', 'Singles', 'Unknown', 'Bootleg', 'Mix']
    
    myMusicAlbums = [x for x in findDirs(dirval) if getDirBasics(x)[-1] not in discogMediaNames+allmusicMediaNames+myMediaNames]
    if returnNames is True:
        myMusicAlbums = [getDirBasics(x)[-1] for x in myMusicAlbums]
    return myMusicAlbums


def getMyMatchedMusicAlbums(dirval):  
    matchval = join(dirval, "Match", "*")
    matchedAlbums = []
    for dname in glob(matchval):
        matchedAlbums += [getDirBasics(x)[-1].split(" :: ")[0] for x in findDirs(dname)]
    return matchedAlbums


def getMyTodoMusicAlbums(dirval):
    todoAlbums = []
    for dval in ["Todo", "Album", "Title"]:
        todoval = join(dirval, dval)
        for dname in glob(todoval):
            todoAlbums += [getDirBasics(x)[-1] for x in findDirs(dname)]
    return todoAlbums


def getMyUnknownMusicAlbums(dirval):
    todoAlbums = []
    for dval in ['Unknown', 'Bootleg', 'Mix']:
        todoval = join(dirval, dval)
        for dname in glob(todoval):
            todoAlbums += [getDirBasics(x)[-1] for x in findDirs(dname)]
    return todoAlbums


def getMyRandomMusic(dirval):
    randomMusic = []
    for dval in ['Random']:
        todoval = join(dirval, dval)
        for dname in glob(todoval):
            randomMusic += [getBaseFilename(x) for x in findAll(dname)]
    return randomMusic


def getMyDiscogsMusicAlbums(dirval):
    discogsAlbums      = []
    discogMediaNames   = ['Albums', 'Singles & EPs', 'Compilations', 'Videos', 'Miscellaneous', 'Visual', 'DJ Mixes']
    for discogName in discogMediaNames:
        matchval = join(dirval, discogName, "*")
        for dname in glob(matchval):
            discogsAlbums += [getDirBasics(x)[-1] for x in findDirs(dname)]
    return discogsAlbums



def getFlattenedArtistAlbums(vals):
    if vals is None or not isinstance(vals, dict):
        return []
    albums = []
    for k,v in vals.items():
        for k2, v2 in v.items():
            albums.append(v2)
    return list(set(albums))
            
    
def getBestAlbumsMatch(albums1, albums2, cutoff=0.7, debug=False):
    if debug:
        print("Best Albums Match")
    score = 0.0
    for albumA in albums1:
        for albumB in albums2:
            s = SequenceMatcher(None, albumA, albumB)
            ratio = round(s.ratio(), 2)
            if debug:
                print("\t\t\t[{0}] + [{1}] = [{2}]".format(albumA, albumB, ratio))
            if ratio > cutoff:
                score += ratio
    return score


def findPossibleArtistIDs(artistName, artistNameToID, artists, num=2, cutoff=0.7):
    possibleIDs    = artistNameToID.get(artistName)
    if possibleIDs is None:
        possibleIDs = []
    print(artistName,'\t',len(possibleIDs),'\t',len(artists))
    newArtistNames = findNearest(artistName, artists, num, cutoff)
    for newArtist in newArtistNames:
        possibleIDs += artistNameToID[newArtist]
    
    possibleIDs  = list(set(possibleIDs))

    return possibleIDs


def getMatchedDirs():
    baseDirs = ["/Volumes/Music/Matched", "/Volumes/Biggy/Matched"]
    return baseDirs


def getVolumeName(baseDir):
    vals = getDirBasics(baseDir)
    return vals[2]


def getMatchedDir():
    baseDir = "/Users/tgadfort/matched"
    return baseDir



def getPrimeDirectory(artistName):
    start = artistName[0]

    import string
    if start in string.ascii_uppercase:
        if artistName.startswith("The "):
            return "The"
        return start
    if start in string.ascii_lowercase:
        return "Xtra"
    elif start in string.digits:
        return "Num"
    else:
        raise ValueError("Could not determine Prime Directory for Artist {0}".format(artistName))
        
def getPrimeDirectories():    
    import string
    retvals  = [x for x in string.ascii_uppercase]
    retvals += ["Num", "Xtra", "The"]
    retvals  = sorted(retvals)
    return retvals

# Find Known Music Artists

In [8]:
myMusicMap = getMyMusicDBMap()

Found 4715 entries
{'Discogs': 4715, 'AllMusic': 4339}


### Get Artist <-> Prime Dir Map

In [9]:
def getArtistPrimeDirMap(primeDir):
    dirvals     = getFlatList([findDirs(setDir(matchedDir, primeDir)) for matchedDir in getMatchedDirs()])
    artistNames = [normalize('NFC', getDirBasics(dirval)[-1]) for dirval in dirvals]
    
    artistPrimeDirMap = {}
    for (artistName,dirval) in list(zip(artistNames, dirvals)):
        if artistPrimeDirMap.get(artistName) is None:
            artistPrimeDirMap[artistName] = []
        artistPrimeDirMap[artistName].append(dirval)
        
    return artistPrimeDirMap

### Search For Internal Matchs

In [10]:
def getMyArtistNames():
    artistNames = []
    for primeDir in getPrimeDirectories():
        artistPrimeDirMap = getArtistPrimeDirMap(primeDir)
        artistNames += artistPrimeDirMap.keys()
    print("Found {0} Artists In My Matched Directories".format(len(artistNames)))
    return artistNames

In [11]:
searchForInternal = False
if searchForInternal:
    start, cmt = clock("Searching For Internal Matches")
    artistNames = getMyArtistNames()
    for i,artistName in enumerate(artistNames):
        if myMusicMap.get(artistName) is not None:
            continue
        possibleMatches = findNearest(artistName, artistNames, 2, 0.7)
        possibleMatches = set(possibleMatches).difference(set([artistName]))
        if len(possibleMatches) > 1:
            print("{0: <6}{1: <40}{2}".format(i,artistName,possibleMatches))
    elapsed(start, cmt)

***
***

# Search For Artist <-> Match In My Music

In [12]:
def getMatchedStatusForMyArtists(onlyUnknown=False):
    start, cmt = clock("Matching All Music Artists")

    fullyUnknownArtistNames     = {}
    partiallyUnknownArtistNames = {}
    multiKnownArtistNames       = {}


    ######################################################################
    #### Loop Over Prime Directories
    ######################################################################
    for primeDir in getPrimeDirectories():
        artistPrimeDirMap = getArtistPrimeDirMap(primeDir)


        ######################################################################
        #### Loop Over Artist Name <-> Prime Map Items
        ######################################################################
        for artistName, artistPrimeDirs in artistPrimeDirMap.items():


            ######################################################################
            #### Check If Artist Has Any ID
            ######################################################################
            myMusicData          = myMusicMap.get(artistName)
            if myMusicData is None:
                fullyUnknownArtistNames[artistName] = artistPrimeDirs
                print("\tUnknown (All)     --> {0}".format(artistName))
                continue
            if onlyUnknown is True:
                continue


            ######################################################################
            #### Get Database IDs
            ######################################################################
            artistIDXs = {dbKey['Key']: getArtistIDX(myMusicData, dbKey['Key'], discdf[dbKey['Key']]) for dbKey in dbKeys}
            if all(artistIDXs.values()):
                ## Previously Known By All DBs
                #print("\tKnown (All)       --> {0}".format(artistName))
                continue

            if not any(artistIDXs.values()):
                ## Previously UnKnown By All DBs
                fullyUnknownArtistNames[artistName] = artistPrimeDirs
                print("\tUnkfnown (All)     --> {0}".format(artistName))
                continue

            partiallyUnknownArtistNames[artistName] = {}
            for key,artistID in artistIDXs.items():
                if artistID is None:
                    partiallyUnknownArtistNames[artistName][key] = artistPrimeDirs
                    print("\tUnknown (Partial) --> {0}".format(artistName))        

    elapsed(start, cmt)
    
    retval = {"FullyUnknown": fullyUnknownArtistNames, "PartiallyUnknown": partiallyUnknownArtistNames}
    return retval

#### Just look for fully unknown artists

In [13]:
matchedResults = getMatchedStatusForMyArtists(onlyUnknown=True)
for status, matchedResult in matchedResults.items():
    print("{0: <30}{1}".format(status, len(matchedResult)))
    
_, _ = clock("Last Run")

Current Time is Sun Feb 23, 2020 15:45:33 for Matching All Music Artists
Current Time is Sun Feb 23, 2020 15:45:34 for Done with Matching All Music Artists
Process [Done with Matching All Music Artists] took 1 seconds.
FullyUnknown                  0
PartiallyUnknown              0


#### Check everyone

In [14]:
matchedResults = getMatchedStatusForMyArtists()
for status, matchedResult in matchedResults.items():
    print("{0: <30}{1}".format(status, len(matchedResult)))
    
_, _ = clock("Last Run")

Current Time is Sun Feb 23, 2020 15:46:42 for Matching All Music Artists
	Unknown (Partial) --> A-Mafia
	Unknown (Partial) --> Admiral D
	Unknown (Partial) --> Adryan Russ
	Unknown (Partial) --> Afrodite
	Unknown (Partial) --> Aftertaste
	Unknown (Partial) --> Age Of Love
	Unknown (Partial) --> Agent 99
	Unknown (Partial) --> AIR
	Unknown (Partial) --> Alberte
	Unknown (Partial) --> Alley Boy
	Unknown (Partial) --> Andy G And The Roller Kings
	Unknown (Partial) --> Apothys
	Unknown (Partial) --> Arkham 13
	Unknown (Partial) --> Ashes
	Unknown (Partial) --> Ayahuasca
	Unknown (Partial) --> B.O.B
	Unknown (Partial) --> Barry Kleinbort
	Unknown (Partial) --> Beam & Yanou
	Unknown (Partial) --> Beatrice Arthur
	Unknown (Partial) --> Bedřich Smetana
	Unknown (Partial) --> Before I Break
	Unknown (Partial) --> Bei Maejor
	Unknown (Partial) --> Bells Of Avalon
	Unknown (Partial) --> Benett Rogers
	Unknown (Partial) --> Bert Jansch And John Renbourn
	Unknown (Partial) --> Beth Capper
	Unknown 

	Unknown (Partial) --> Swarm
	Unknown (Partial) --> Sønderjyllands Symfoniorkester
	Unknown (Partial) --> Søren Kragh-Jacobsen
	Unknown (Partial) --> Team Invasion
	Unknown (Partial) --> Tee Stunna
	Unknown (Partial) --> Tertium Non Data
	Unknown (Partial) --> Therr Maitz
	Unknown (Partial) --> This World Rejected
	Unknown (Partial) --> Thumbs Down
	Unknown (Partial) --> Tiesto
	Unknown (Partial) --> Tone Loc
	Unknown (Partial) --> Tony De Vit
	Unknown (Partial) --> Top Gun
	Unknown (Partial) --> Tracy T
	Unknown (Partial) --> Tuna Universitaria de Granada
	Unknown (Partial) --> The B. Lee Band
	Unknown (Partial) --> The Bicycle Thief
	Unknown (Partial) --> The Bitter Springs
	Unknown (Partial) --> The Bluebeats
	Unknown (Partial) --> The Bruce Johnston Surfing Band
	Unknown (Partial) --> The Campus Tramps
	Unknown (Partial) --> The Classic Metropolitan Orchestra
	Unknown (Partial) --> The Color Changin' Click
	Unknown (Partial) --> The Consumers
	Unknown (Partial) --> The Crash
	Unkno

***

## Analyze Fully Unknown Music Artists

In [16]:
def analyzeFullyUnknownArtists(matchedResults):
    num = 2
    cutoff = 0.6
    additions = {}

    print("{0: <5}{1: <40}{2: <12}{3}".format("#", "Artist", "# of Dirs", "# of Albums"))
    for i,(artistName, dirvals) in enumerate(matchedResults["FullyUnknown"].items()):
        print("\n")
        print("="*60)
        print("{0: <5}{1: <40}{2: <12}".format(i,artistName,len(dirvals)), end="")

        myMusicAlbums = []
        for dirval in dirvals:
            myMusicAlbums += getMyMusicAlbums(dirval, returnNames=True) + getMyMatchedMusicAlbums(dirval) + getMyDiscogsMusicAlbums(dirval)
        print(len(myMusicAlbums))


        possibleIDXs = {}
        for dbKey in dbKeys:
            key               = dbKey['Key']
            possibleIDXs[key] = findPossibleArtistIDs(artistName, artistNameToID[key], artists[key], num, cutoff)


            ## Find Possible IDs
            print("     Possible IDs ===>",len(possibleIDXs[key]))
            for possibleID in possibleIDXs[key]:
                print("\t{0: <15}".format(possibleID), end="")
                try:
                    artistAlbums = getRowData(artistAlbumsDB[key], rownames=possibleID)['Albums']
                except:
                    raise ValueError("Could not get Albums data from artistAlbumsDB[{0}] with id [{1}]".format(key, possibleID))

                try:
                    artistAlbums = getFlattenedArtistAlbums(artistAlbums)
                except:
                    raise ValueError("Could not flatten albums for id [{0}]".format(possibleID))


                print("{0: <10}".format(len(artistAlbums)), end="")

                ## Find overlapping albums
                retval = getBestAlbumsMatch(artistAlbums, myMusicAlbums, cutoff=cutoff, debug=False)                
                print(round(retval,2))
                if retval > cutoff:
                    if additions.get(key) is None:
                        additions[key] = {}
                    if additions[key].get(artistName) is None:
                        additions[key][artistName] = {"Score": retval, "Key": key, "Value": {'ID': possibleID, 'Name': None}}
                    elif retval > additions[key][artistName]["Score"]:
                        additions[key][artistName] = {"Score": retval, "Key": key, "Value": {'ID': possibleID, 'Name': None}}

                    print("\t{0: <15} is a match!".format(possibleID), end="")
                else:
                    print("", end="")

            print("")

    print("")
    print("Found {0} new matches".format(len(additions)))
    
    return additions

In [17]:
additions = analyzeFullyUnknownArtists(matchedResults)

#    Artist                                  # of Dirs   # of Albums

Found 0 new matches


In [18]:
start, cmt = clock("Adding New Matches to myMusicMap")
if len(additions) > 0:
    for dbKey, dbKeyData in additions.items():
        for artistName, artistValues in dbKeyData.items():
            if myMusicMap.get(artistName) is None:
                myMusicMap[artistName] = {"Discogs": None, "AllMusic": None}
            myMusicMap[artistName][artistValues['Key']] = artistValues['Value']

    cntrs = {"Discogs": 0, "AllMusic": 0}
    for myArtistName, myArtistData in myMusicMap.items():
        if myArtistData["Discogs"] is not None:
            cntrs["Discogs"] += 1
        if myArtistData["AllMusic"] is not None:
            cntrs["AllMusic"] += 1
    print(cntrs)

    saveFile(idata=myMusicMap, ifile="myMusicMap.p", debug=True)
elapsed(start, cmt)

Current Time is Sun Feb 23, 2020 16:08:31 for Adding New Matches to myMusicMap
Current Time is Sun Feb 23, 2020 16:08:31 for Done with Adding New Matches to myMusicMap
Process [Done with Adding New Matches to myMusicMap] took 558 microseconds.


***

## Analyze Partially Unknown Music Artists

In [38]:
def analyzePartiallyUnknownArtists(matchedResults):
    start, cmt = clock("Finding Possible New Matches")

    num = 2
    cutoff = 0.6


    discogMediaNames   = ['Albums', 'Singles & EPs', 'Compilations', 'Videos', 'Miscellaneous', 'Visual', 'DJ Mixes']
    allmusicMediaNames = ['Album']
    myMediaNames       = ['Random', 'Todo', 'Match', 'Title', 'Singles']

    additions = {}

    print("{0: <40}{1}".format("Artist", "# of Albums"))
    for i,(artistName, unknownVals) in enumerate(matchedResults["PartiallyUnknown"].items()):
        for dbKey in dbKeys:
            key = dbKey['Key']        
            if unknownVals.get(key) is not None:
                dirvals = unknownVals[key]
                
                myMusicAlbums = []
                for dirval in dirvals:
                    myMusicAlbums += getMyMusicAlbums(dirval, returnNames=True) + getMyMatchedMusicAlbums(dirval) + getMyDiscogsMusicAlbums(dirval)
                if len(myMusicAlbums) == 0:
                    continue
                print("{0: <40}{1}".format(artistName,len(myMusicAlbums)))


                ## Find Possible IDs
                possibleIDs = findPossibleArtistIDs(artistName, artistNameToID[key], artists[key], num, cutoff)
                print("     Possible IDs ===>",len(possibleIDs))
                for possibleID in possibleIDs:
                    print("\t{0: <15}".format(possibleID), end="")
                    artistAlbums = getRowData(artistAlbumsDB[key], rownames=possibleID)['Albums']
                    artistAlbums = getFlattenedArtistAlbums(artistAlbums)          
                    print("\t{0: <10}".format(len(artistAlbums)), end="")


                    ## Find overlapping albums
                    retval = getBestAlbumsMatch(artistAlbums, myMusicAlbums, cutoff=cutoff, debug=False)                
                    print(round(retval,2))
                    if retval > cutoff:
                        if additions.get(artistName) is None:
                            additions[artistName] = {"Score": retval, "Key": key, "Value": {'ID': possibleID, 'Name': None}}
                        elif retval > additions[artistName]["Score"]:
                            additions[artistName] = {"Score": retval, "Key": key, "Value": {'ID': possibleID, 'Name': None}}

                        print("\t{0: <15} is a match!".format(possibleID))
                    else:
                        print("")

    print("")
    print("Found {0} new matches".format(len(additions)))
    elapsed(start, cmt)
    
    return additions

In [39]:
additions = analyzePartiallyUnknownArtists(matchedResults)

Current Time is Sun Feb 23, 2020 16:18:51 for Finding Possible New Matches
Artist                                  # of Albums
A-Mafia                                 2
A-Mafia 	 0 	 36569
     Possible IDs ===> 2
	0001572547     	1         0.0

	0000232101     	27        0.0

Admiral D                               1
Admiral D 	 0 	 36569
     Possible IDs ===> 2
	0000189497     	6         0.0

	0002483942     	7         0.0

Adryan Russ                             1
Adryan Russ 	 1 	 36569
     Possible IDs ===> 2
	0000258514     	18        0.0

	0000500904     	2         0.0

Afrodite                                1
Afrodite 	 1 	 36569
     Possible IDs ===> 2
	0002666663     	2         0.0

	0001855410     	1         0.0

Agent 99                                1
Agent 99 	 0 	 36569
     Possible IDs ===> 2
	0000929073     	1         0.0

	0000603742     	3         0.0

AIR                                     1
AIR 	 0 	 36569
     Possible IDs ===> 2
	0000923084     	7         

     Possible IDs ===> 2
	0002775617     	2         0.0

	0002132219     	7         0.0

Dreams In Exile                         1
Dreams In Exile 	 0 	 36569
     Possible IDs ===> 2
	0000190876     	1         0.0

	0003126164     	1         0.0

DSB                                     1
DSB 	 2 	 36569
     Possible IDs ===> 2
	0000375959     	4         0.0

	0001789045     	1         0.0

DVSN                                    1
DVSN 	 0 	 36569
     Possible IDs ===> 0
Dee-Lite                                1
Dee-Lite 	 0 	 36569
     Possible IDs ===> 2
	0000235725     	1         0.0

	0000231342     	22        0.0

Eightball                               2
Eightball 	 1 	 36569
     Possible IDs ===> 3
	0000350685     	93        0.0

	0002743575     	1         0.0

	0001845827     	1         0.0

Erik Frandsen                           1
Erik Frandsen 	 0 	 36569
     Possible IDs ===> 2
	0000163426     	23        0.0

	0001666457     	2         0.0

Fisticuffs                 

     Possible IDs ===> 2
	0003011915     	31        0.0

	0002216965     	32        0.0

Maybach Hot                             1
Maybach Hot 	 0 	 36569
     Possible IDs ===> 0
MIKA                                    1
MIKA 	 0 	 36569
     Possible IDs ===> 2
	0001355062     	24        0.0

	0000388773     	30        0.0

Miss Tammy Faye Starlite & The Angels Of Mercy1
Miss Tammy Faye Starlite & The Angels Of Mercy 	 0 	 36569
     Possible IDs ===> 0
Mo Beatz                                1
Mo Beatz 	 1 	 36569
     Possible IDs ===> 2
	0002536250     	3         0.0

	0001490651     	1         0.0

Mug-Shot                                1
Mug-Shot 	 0 	 36569
     Possible IDs ===> 1
	0001416525     	5         0.0

N*E*R*D                                 2
N*E*R*D 	 0 	 36569
     Possible IDs ===> 1
	0003431237     	1         0.0

N.I.N.A.                                1
N.I.N.A. 	 0 	 36569
     Possible IDs ===> 2
	0000388773     	30        0.0

	0003593803     	1         0.

     Possible IDs ===> 2
	0000033756     	9         0.0

	0000033388     	2         0.0

Svasti-ayanam                           1
Svasti-ayanam 	 0 	 36569
     Possible IDs ===> 2
	0001007756     	3         0.0

	0000295756     	366       0.0

Swarm                                   1
Swarm 	 2 	 36569
     Possible IDs ===> 2
	0000042292     	9         0.0

	0000044379     	1         0.0

Team Invasion                           1
Team Invasion 	 0 	 36569
     Possible IDs ===> 2
	0001250407     	3         0.0

	0003535194     	2         0.0

Tee Stunna                              1
Tee Stunna 	 0 	 36569
     Possible IDs ===> 2
	0001469229     	5         0.0

	0000890499     	2         0.0

Tertium Non Data                        2
Tertium Non Data 	 0 	 36569
     Possible IDs ===> 1
	0000645242     	5         0.0

Therr Maitz                             3
Therr Maitz 	 0 	 36569
     Possible IDs ===> 2
	0002933646     	1         0.0

	0000540043     	3         0.0

This World 

     Possible IDs ===> 2
	0003562778     	1         0.0

	0000342908     	3         0.0

The Rocky Horror Picture Show           1
The Rocky Horror Picture Show 	 0 	 36569
     Possible IDs ===> 2
	0000976945     	1         0.0

	0000292968     	1         0.0

The Schramms                            1
The Schramms 	 0 	 36569
     Possible IDs ===> 2
	0000780413     	2         0.0

	0001287850     	1         0.0

The Scientists                          2
The Scientists 	 1 	 36569
     Possible IDs ===> 2
	0001763805     	1         0.0

	0001291230     	1         0.0

The Shimmer Kids                        1
The Shimmer Kids 	 0 	 36569
     Possible IDs ===> 2
	0000064903     	6         0.0

	0000522843     	6         0.0

The Smooths                             2
The Smooths 	 0 	 36569
     Possible IDs ===> 2
	0002823988     	1         0.0

	0000899530     	58        0.0

The Splash Four                         1
The Splash Four 	 0 	 36569
     Possible IDs ===> 2
	0000487517   

In [40]:
start, cmt = clock("Adding New Matches to myMusicMap")
if len(additions) > 0:
    for artistName, artistValues in additions.items():
        key = artistValues['Key']
        val = artistValues['Value']
        if myMusicMap.get(artistName) is None:
            myMusicMap[artistName] = {dbKey['Key']: None for dbKey in dbKeys}
        myMusicMap[artistName][key] = val

    cntrs = {dbKey["Key"]: 0 for dbKey in dbKeys}
    for myArtistName, myArtistData in myMusicMap.items():
        for dbKey in dbKeys:
            key = dbKey['Key']
            if myArtistData[key] is not None:
                cntrs[key] += 1
    print(cntrs)

saveFile(idata=myMusicMap, ifile="myMusicMap.p", debug=True)
elapsed(start, cmt)

Current Time is Sun Feb 23, 2020 16:24:20 for Adding New Matches to myMusicMap
{'Discogs': 4715, 'AllMusic': 4342}
Saving data to myMusicMap.p
  --> This file is 197.8kB.
Saved data to myMusicMap.p
  --> This file is 197.8kB.
Current Time is Sun Feb 23, 2020 16:24:20 for Done with Adding New Matches to myMusicMap
Process [Done with Adding New Matches to myMusicMap] took 668.0 millseconds.


***
***

# Match Albums

In [41]:
def showArtist(artistName, discogsArtist):
    print("\t{0: <30} ---> {1}".format(artistName, discogsArtist))
    
def showAlbum(myAlbumName):
    print("\t\tMy Album: {0}".format(myAlbumName))
    
def showMatch(myAlbumName, albumData):
    print("\t\t   Match: {0}".format(albumData))
    
def matchMyAlbum(myAlbumName, dbAlbumsData, ratioCut):
    maxRatio  = None
    albumData = None
    for mediaType, mediaAlbums in dbAlbumsData.items():
        for albumID, dbAlbumName in mediaAlbums.items():
            convAlbumName = discConv(dbAlbumName)
            s = SequenceMatcher(None, myAlbumName, convAlbumName)
            ratio = round(s.ratio(), 3)
            if ratio > ratioCut:
                if maxRatio is None:
                    maxRatio = ratio
                else:
                    if ratio < maxRatio:
                        continue
                    maxRatio  = ratio
                    albumData = [albumID, dbAlbumName, mediaType]

                    albumData = [albumID, dbAlbumName, mediaType]
                    
    return albumData

In [52]:
ratioCut          = 0.80
myMusicAlbumMatch = {}
db = "Discogs"
#db = "AllMusic"
start, cmt = clock("Checking for Albums Matches Against {0} DB".format(db))


######################################################################
#### Loop Over Databases
######################################################################
for dbKey in dbKeys:
    key = dbKey["Key"]
    if key != db:
        continue
    myMusicAlbumMatch = {key: {}}


    ######################################################################
    #### Loop Over Prime Directories
    ######################################################################
    for primeDir in getPrimeDirectories():
        artistPrimeDirMap = getArtistPrimeDirMap(primeDir)


        ######################################################################
        #### Loop Over Artist Name <-> Prime Map Items
        ######################################################################
        for artistName, artistPrimeDirs in artistPrimeDirMap.items():



            ######################################################################
            #### Check If Artist Has Any ID
            ######################################################################
            myMusicData          = myMusicMap.get(artistName)
            if myMusicData is None:
                raise ValueError("Found an unknown artist: [{0}]".format(artistName))

                

            ######################################################################
            #### Get My Remaining Albums To Be Matched (For Each Dirval)
            ######################################################################
            for dirval in artistPrimeDirs:
                myMusicAlbums = getMyMusicAlbums(dirval, returnNames=True)
                if len(myMusicAlbums) == 0:
                    continue
                
                
                ######################################################################
                #### Check For DB Match For Each Album
                ######################################################################
                artistID = getArtistIDX(myMusicData, key, discdf[key])
                if artistID is not None:
                    artistAlbums = getRowData(artistAlbumsDB[key], rownames=artistID)['Albums']
                    for myAlbumName in myMusicAlbums:
                        albumData = matchMyAlbum(myAlbumName, artistAlbums, ratioCut)
                        if albumData is not None:
                            showArtist(artistName, artistID)
                            if myMusicAlbumMatch[key].get(artistName) is None:
                                myMusicAlbumMatch[key][artistName] = {}
                            showAlbum(myAlbumName)
                            myMusicAlbumMatch[key][artistName][myAlbumName] = {"Dir": dirval, "Album": albumData}
                            showMatch(myAlbumName, albumData)


elapsed(start, cmt)

Current Time is Sun Feb 23, 2020 16:36:31 for Checking for Albums Matches Against Discogs DB
	Carpenters                     ---> 170357
		My Album: Now And Then
		   Match: ['7428736', 'Now & Then', 'Singles & EPs']
	Cosmic Gate                    ---> 11101
		My Album: Materia Chapter.One
		   Match: ['1168037', 'Materia Chapter.One', 'Albums']
	Creedence Clearwater Revival   ---> 252102
		My Album: Bayou Country
		   Match: ['1246733', 'Bayou Country', 'Singles & EPs']
	Creedence Clearwater Revival   ---> 252102
		My Album: Green River
		   Match: ['812409', 'Green River', 'Singles & EPs']
	Cyndi Lauper                   ---> 29718
		My Album: At Last
		   Match: ['7491780', 'At Last', 'Miscellaneous']
	Dave Matthews Band             ---> 30674
		My Album: Live Trax Vol. 41
		   Match: ['5087332', 'Live Trax Vol. 10', 'Albums']
	Dave Matthews Band             ---> 30674
		My Album: Live Trax Vol. 42
		   Match: ['1004998', 'Live Trax Vol 32', 'Albums']
	Dave Matthews Band           

### Manually Check If Match is Correct

In [53]:
from ioUtils import saveFile
saveFile(ifile="myMusicAlbumMatch.yaml", idata=myMusicAlbumMatch, debug=True)
print("Found {0} music <-> discogs albums maps".format(len(myMusicAlbumMatch)))

Saving data to myMusicAlbumMatch.yaml
Saved data to myMusicAlbumMatch.yaml
  --> This file is 3.3kB.
Found 1 music <-> discogs albums maps


In [54]:
myMusicAlbumMatch = getFile(ifile="myMusicAlbumMatch.yaml", debug=True)
print("Found {0} music <-> discogs albums maps".format(len(myMusicAlbumMatch)))

Loading data from myMusicAlbumMatch.yaml
  --> This file is 2.3kB.
Loading myMusicAlbumMatch.yaml
Found 1 music <-> discogs albums maps


# Move To Matched

In [55]:
def getMatchedDirName(albumName, albumID, dbKey = "Discogs"):
    if dbKey == "AllMusic":
        matchedDirName = " :: ".join([discConv(albumName), "[{0}] AM".format(albumID)])
    else:
        matchedDirName = " :: ".join([discConv(albumName), "[{0}]".format(albumID)])
        
    return matchedDirName

def getUnMatchedDirName(matchedDirName):
    vals = matchedDirName.split(" :: ")
    if len(vals) == 2:
        albumName  = vals[0]
        albumIDval = vals[1]
        try:
            albumID = int(albumIDval[(albumIDval.find("[")+1):albumIDval.rfind("]")])
        except:
            raise ValueError("Could not extract album ID from {0}".format(albumIDval))
            
        return albumName
    else:
        raise ValueError("Could not extract album name from {0}".format(matchedDirName))

In [56]:
for dbKey, dbValues in myMusicAlbumMatch.items():
    if dbValues is None:
        continue
    for artistName, artistAlbums in dbValues.items():
        print("==>",artistName)
        for myAlbumName,albumVals in artistAlbums.items():
            dirval   = albumVals["Dir"]
            albumVal = albumVals["Album"]

            matchedDir = setDir(dirval, "Match")
            mkDir(matchedDir)
            
            albumID, dbAlbumName, mediaType = albumVal
            
            srcDir = setDir(dirval, myAlbumName)
            if not isDir(srcDir):
                print("{0} does not exist".format(srcDir))
                continue
                
            mediaDir = setDir(matchedDir, discConv(mediaType))
            mkDir(mediaDir)
            
            dstName = getMatchedDirName(myAlbumName, albumID, dbKey)
            dstDir  = setDir(mediaDir, dstName)
            if isDir(dstDir):
                print("{0} already exists".format(dstDir))
                continue

            print("\tMoving {0}  --->  {1}".format(srcDir, dstDir))
            moveDir(srcDir, dstDir, debug=True)

==> Carpenters
	Moving /Volumes/Biggy/Matched/C/Carpenters/Now And Then  --->  /Volumes/Biggy/Matched/C/Carpenters/Match/Singles & EPs/Now And Then :: [7428736]
Moving /Volumes/Biggy/Matched/C/Carpenters/Now And Then to /Volumes/Biggy/Matched/C/Carpenters/Match/Singles & EPs/Now And Then :: [7428736]
==> Cosmic Gate
	Moving /Volumes/Biggy/Matched/C/Cosmic Gate/Materia Chapter.One  --->  /Volumes/Biggy/Matched/C/Cosmic Gate/Match/Albums/Materia Chapter.One :: [1168037]
Moving /Volumes/Biggy/Matched/C/Cosmic Gate/Materia Chapter.One to /Volumes/Biggy/Matched/C/Cosmic Gate/Match/Albums/Materia Chapter.One :: [1168037]
==> Creedence Clearwater Revival
/Volumes/Music/Matched/C/Creedence Clearwater Revival/Match/Singles & EPs/Bayou Country :: [1246733] already exists
/Volumes/Music/Matched/C/Creedence Clearwater Revival/Match/Singles & EPs/Green River :: [812409] already exists
==> Cyndi Lauper
/Volumes/Music/Matched/C/Cyndi Lauper/Match/Miscellaneous/At Last :: [7491780] already exists
==> 

***
***
***

## Analyze Number of Albums

In [104]:
def analyzeArtistAlbums():
    
    start, cmt = clock("Analyzing Artist Albums")
    
    artistMetadata = {}
    numArtists = 0

    ######################################################################
    #### Loop Over Prime Directories
    ######################################################################
    for primeDir in getPrimeDirectories():
        artistPrimeDirMap = getArtistPrimeDirMap(primeDir)
        print("{0: <35}".format(primeDir), end="")
        volumeCounts = {}


        ######################################################################
        #### Loop Over Artist Name <-> Prime Map Items
        ######################################################################
        for artistName, artistPrimeDirs in artistPrimeDirMap.items():
            artistMetadata[artistName] = {}
            

            
            ######################################################################
            #### Get My Albums
            ######################################################################
            myAlbums  = {"MyAlbums": {}, "Matched": {}, "Todo": {}, "Unknown": {}, "Random": {}}
            myVolumes = {}
            for dirval in artistPrimeDirs:
                volumeName = getVolumeName(dirval)
                if volumeCounts.get(volumeName) is None:
                    volumeCounts[volumeName] = 0
                volumeCounts[volumeName] += 1
                myVolumes[volumeName] = 0
                
                myAlbums["MyAlbums"][volumeName] = getMyMusicAlbums(dirval, returnNames=True)
                myVolumes[volumeName] += len(myAlbums["MyAlbums"][volumeName])
                
                myAlbums["Matched"][volumeName] = getMyMatchedMusicAlbums(dirval)
                myVolumes[volumeName] += len(myAlbums["Matched"][volumeName])
                
                myAlbums["Todo"][volumeName] = getMyTodoMusicAlbums(dirval)
                myVolumes[volumeName] += len(myAlbums["Todo"][volumeName])
                
                myAlbums["Unknown"][volumeName] = getMyUnknownMusicAlbums(dirval)
                myVolumes[volumeName] += len(myAlbums["Unknown"][volumeName])
                
                myAlbums["Random"][volumeName] = getMyRandomMusic(dirval)
                
            artistMetadata[artistName]["MyAlbums"] = myAlbums
            artistMetadata[artistName]["Volumes"]  = myVolumes
            
            
            
            ######################################################################
            #### Check If Artist Has Any ID
            ######################################################################
            myMusicData          = myMusicMap.get(artistName)
            if myMusicData is None:
                raise ValueError("Found an unknown artist: [{0}]".format(artistName))

            myDB = {}
            for dbKey in dbKeys:
                key = dbKey['Key']
                artistAlbums = []
                artistID = getArtistIDX(myMusicData, key, discdf[key])
                if artistID is not None:
                    artistAlbums = getRowData(artistAlbumsDB[key], rownames=artistID)['Albums']
                    myDB[key]    = getFlattenedArtistAlbums(artistAlbums)
                else:
                    myDB[key] = []
                    
            artistMetadata[artistName]["DB"]  = myDB
                    
                    

        ######################################################################
        #### Print Resulting Counts
        ######################################################################
        print("{0: <7}{1: <7}".format(len(artistMetadata) - numArtists, len(artistMetadata)))
        numArtists = len(artistMetadata)
        break
        
    elapsed(start, cmt)
    
    return artistMetadata

In [105]:
artistMetadata = analyzeArtistAlbums()

Current Time is Sun Feb 23, 2020 19:22:52 for Analyzing Artist Albums
A                                  194    194    
Current Time is Sun Feb 23, 2020 19:23:13 for Done with Analyzing Artist Albums
Process [Done with Analyzing Artist Albums] took 20 seconds.


In [133]:
minMyAlbums   = 2
minTodoAlbums = 5
import json
downloadCut   = 5

print("{0: <40}| {1: <12}| {2: <10}| {3: <10}| {4: <10}| {5: <10}| {6: <10}| {7: <10}| {8: <10}| {9: <10}| {10: <20}".format("Artist", "Volumes", "Discogs", "AllMusic", "# Albums", "Matched", "Todo", "Unknown", "Random", "f(Known)", "Download"))
print("{0: <40}| {1: <12}| {2: <10}| {3: <10}| {4: <10}| {5: <10}| {6: <10}| {7: <10}| {8: <10}| {9: <10}| {10: <20}".format("------", "-------", "-------", "--------", "--------", "-------", "----", "-------", "------", "--------", "--------"))
      

for artistName, artistData in artistMetadata.items():
    ### Artist Name
    print("{0: <40}".format(artistName), end="")
    
    ### Volumes
    volumeData = artistData["Volumes"]
    print("| {0: <12}".format(json.dumps(list(volumeData.values()))), end="")
    
    ### Discogs
    discogsData = artistData["DB"]["Discogs"]
    print("| {0: <10}".format(len(discogsData)), end="")
    
    ### AllMusic
    allmusicData = artistData["DB"]["AllMusic"]
    print("| {0: <10}".format(len(allmusicData)), end="")

    ### Albums Data
    myAlbumsData = {k: len(v) for k,v in artistData["MyAlbums"]["MyAlbums"].items()}
    print("| {0: <10}".format(json.dumps(list(myAlbumsData.values()))), end="")

    ### Matched Data
    matchData = {k: len(v) for k,v in artistData["MyAlbums"]["Matched"].items()}
    print("| {0: <10}".format(json.dumps(list(matchData.values()))), end="")

    ### Todo Data
    todoData = {k: len(v) for k,v in artistData["MyAlbums"]["Todo"].items()}
    print("| {0: <10}".format(json.dumps(list(todoData.values()))), end="")

    ### Unknown Data
    unknownData = {k: len(v) for k,v in artistData["MyAlbums"]["Unknown"].items()}
    print("| {0: <10}".format(json.dumps(list(unknownData.values()))), end="")

    ### Random Data
    randomData = {k: len(v) for k,v in artistData["MyAlbums"]["Random"].items()}
    print("| {0: <10}".format(json.dumps(list(randomData.values()))), end="")
    

    ### Check If We Need To Download More
    maxDB = max([len(v) for k, v in artistData["DB"].items()])
    if maxDB > 0:
        totalAlbums   = sum([v for k,v in artistData["Volumes"].items()])
        downloadRatio = int(round(100*(totalAlbums/maxDB),0))
    else:
        downloadRatio = "---"
    print("| {0: <10}".format(downloadRatio), end="")
    
    download = ""
    if isinstance(downloadRatio, int):
        if downloadRatio < downloadCut and maxDB > 50:
            download = artistName
    print("| {0: <20}".format(download), end="")
    


    ### Return
    print("")

Artist                                  | Volumes     | Discogs   | AllMusic  | # Albums  | Matched   | Todo      | Unknown   | Random    | f(Known)  | Download            
------                                  | -------     | -------   | --------  | --------  | -------   | ----      | -------   | ------    | --------  | --------            
A Flock Of Seagulls                     | [21]        | 65        | 40        | [15]      | [6]       | [0]       | [0]       | [1]       | 32        |                     
A Global Threat                         | [2]         | 11        | 7         | [0]       | [2]       | [0]       | [0]       | [0]       | 18        |                     
A Tribe Called Quest                    | [9]         | 59        | 35        | [0]       | [7]       | [2]       | [0]       | [0]       | 15        |                     
A*Teens                                 | [1, 1]      | 30        | 27        | [0, 1]    | [0, 0]    | [1, 0]    | [0, 0]    | [0, 0] 

Angie Stone                             | [1]         | 57        | 32        | [1]       | [0]       | [0]       | [0]       | [2]       | 2         | Angie Stone         
Angizia                                 | [2]         | 10        | 5         | [0]       | [2]       | [0]       | [0]       | [0]       | 20        |                     
Ani DiFranco                            | [5]         | 90        | 41        | [0]       | [2]       | [3]       | [0]       | [0]       | 6         |                     
Animaniacs                              | [2]         | 9         | 8         | [1]       | [0]       | [1]       | [0]       | [0]       | 22        |                     
Animotion                               | [1, 1]      | 17        | 7         | [0, 1]    | [0, 0]    | [1, 0]    | [0, 0]    | [0, 0]    | 12        |                     
Anita Baker                             | [2, 8]      | 48        | 34        | [0, 7]    | [1, 1]    | [1, 0]    | [0, 0]    | [0, 0] 

Anastacia                               | [9]         | 65        | 34        | [5]       | [4]       | [0]       | [0]       | [0]       | 14        |                     
Astronaut Ape                           | [7]         | 6         | 2         | [7]       | [0]       | [0]       | [0]       | [9]       | 117       |                     


***
***
***

# UnMatch Artist

In [None]:
def unMatchArtist(artistName):
    from os.path import join
    from fsUtils import removeDir

    dirval        = join(getMatchedDir(), getPrimeDirectory(artistName), artistName)
    matchedDir    = setDir(dirval, "Match")
    mediaTypeDirs = findDirs(matchedDir)
    for mediaTypeDir in mediaTypeDirs:
        for matchDir in findDirs(mediaTypeDir):
            albumName = getUnMatchedDirName(getDirBasics(matchDir)[-1])

            srcDir = matchDir
            dstDir = setDir(dirval, albumName)
            if isDir(dstDir):
                i = 0
                while not isDir(dstDir):
                    dstDir = "{0} [Fix-{1}]".format(setDir(dirval, albumName), i)
                    i += 1

            moveDir(srcDir, dstDir, debug=True)

        if isDir(mediaTypeDir):
            removeDir(mediaTypeDir, debug=True)

    if isDir(matchedDir):
        removeDir(matchedDir, debug=True)

In [None]:
## Anita Baker
## Annie Lennox
## Ani DiFranco
## Aretha Franklin

## Barenaked Ladies (Complete redo)
## Ben E. King

## David Bowie is a mess...
## Def Leopard is rough...
## Dolly Parton needs work...
## Eagles (Downloading what I can)
## Eric Clapton is a mess...
## Fleetwood mac is a real mess...


unMatchArtist("John Mayer")

In [None]:
unMatchArtist("Backstreet Boys")

# Move Artists Without Albums

In [None]:
testMove = False
for dirN in findDirs("/Users/tgadfort/matched"):
    for dirval in findDirs(dirN):
        
        ## Get My Music Artist Name
        artistName = getDirBasics(dirval)[-1]
        artistName = normalize('NFC', artistName)
        
        myAlbums = [getDirBasics(x)[-1] for x in findDirs(dirval)]
        
        ## Only Random
        setRandom = set(["Random"])
        nRemain   = len(set(myAlbums).difference(setRandom))
        if nRemain == 0:
            print("\n{0: <40}{1}".format(artistName, nRemain))

            srcDir = dirval
            dstDir = join("/Volumes/Music/Random", artistName)
            if isDir(srcDir):
                if not isDir(dstDir):
                    print("Moving [{0}] to [{1}]".format(srcDir, dstDir))
                    if testMove is False:
                        moveDir(srcDir, dstDir, debug=True)
                else:
                    print("Cannot move {0} because destination already exists.".format(srcDir))
                    continue
            else:
                print("Cannot move {0} because it is not a directory".format(srcDir))
                continue

# Find/Move Title

In [None]:
testMove = True
for dirN in findDirs("/Users/tgadfort/matched"):
    for dirval in findDirs(dirN):
        
        ## Get My Music Artist Name
        artistName = getDirBasics(dirval)[-1]
        artistName = normalize('NFC', artistName)
        
        todoAlbums = []
        for dval in ["Title"]:
            todoval = join(dirval, dval)
            for dname in glob(todoval):
                for tdir in findDirs(dname):
                    srcDir = tdir
                    dstDir = join(dirval, getDirBasics(tdir)[-1])
                    if isDir(srcDir):
                        if not isDir(dstDir):
                            print("Moving [{0}] to [{1}]".format(srcDir, dstDir))
                            if testMove is False:
                                moveDir(srcDir, dstDir, debug=True)
                        else:
                            print("Cannot move {0} because destination already exists.".format(srcDir))
                            continue
                    else:
                        print("Cannot move {0} because it is not a directory".format(srcDir))
                        continue

# Find Random/Todo

In [None]:
for dirN in findDirs("/Users/tgadfort/matched"):
    for dirval in findDirs(dirN):
        
        ## Get My Music Artist Name
        artistName = getDirBasics(dirval)[-1]
        artistName = normalize('NFC', artistName)
                
            
        myAlbums = [getDirBasics(x)[-1] for x in findDirs(dirval)]
        
        ## Only Todo
        setTodo = set(["Todo"])
        nRemain = len(set(myAlbums).difference(setTodo))
        if nRemain == 0:
            print("{0: <40}{1}".format(artistName, nRemain))

# Find Singles/EPs

In [None]:
for dirN in findDirs("/Users/tgadfort/matched"):
    for dirval in findDirs(dirN):
        
        ## Get My Music Artist Name
        artistName = getDirBasics(dirval)[-1]
        artistName = normalize('NFC', artistName)
        
        

In [None]:
## Accidental Moves

for dirN in findDirs("/Users/tgadfort/matched"):
    print(dirN)
    for dirval in findDirs(dirN):
        
        ## Get My Music Artist Name
        artistName = getDirBasics(dirval)[-1]
        artistName = normalize('NFC', artistName)
                
        discogMediaNames = ['Albums', 'Singles & EPs', 'Compilations', 'Videos', 'Miscellaneous', 'Visual', 'DJ Mixes']        
        matchvals = glob(join(dirval, "Match", "*"))
        matchvals = [x for x in matchvals if getDirBasics(x)[-1] not in discogMediaNames]
        
        for src in matchvals:
            dst = src.replace("/Match/", "/")
            moveDir(src, dst, debug=True)

In [None]:
baseDir = "/Users/tgadfort/matched"
for dbKey, dbValues in myMusicAlbumMatch.items():
    for artistName, artistAlbums in dbValues.items():
        print("==>",artistName)
        start = artistName[0]
        dirval = join(baseDir, getPrimeDirectory(artistName), artistName)
        matchedDir = setDir(dirval, "Match")
        mkDir(matchedDir)
        
        for myAlbumName,albumVal in artistAlbums.items():
            srcDir = setDir(dirval, myAlbumName)
            if not isDir(srcDir):
                print("{0} does not exist".format(srcDir))
                continue
            dstDir = setDir(matchedDir, discConv(myAlbumName))
            if isDir(dstDir):
                print("{0} already exists".format(dstDir))
                continue

            print("\tMoving {0}  --->  {1}".format(srcDir, dstDir))
            moveDir(srcDir, dstDir, debug=True)

In [None]:

            
    ## Loop over Artist AM IDs
    for iAMID,artistAMID in enumerate(artistAMIDs):
        if artistAMIDtoDCIDMap.get(artistAMID) is not None:
            continue
        
        showNames(artistAMName, artistDCNames)
        artistAMAlbums = getArtistAlbums(artistAMToAlbumsData.get(artistAMID))
        if artistAMAlbums is None:
            print("\tAM ID={0: <15}{1: <5}/{2: <8}{3: <5}{4}".format(artistAMID, iKey, len(artistAMKeys), iAMID, "NONE"))
        else:
            print("\tAM ID={0: <15}{1: <5}/{2: <8}{3: <5}{4}".format(artistAMID, iKey, len(artistAMKeys), iAMID, len(artistAMAlbums)))

            
        artistAMtoDCScore = None
        
            
        ## Loop over Artist DC Names
        for artistDCName in artistDCNames:
            artistDCIDs = artistDCNameToID[artistDCName]
            
            ## Loop over Artist DC IDs
            for artistDCID in artistDCIDs:
                artistDCAlbums = getArtistAlbums(artistDCToAlbumsData.get(artistDCID))
                if False:
                    if artistDCAlbums is None:
                        print("\t\tDC ID={0: <15}\t      {1}".format(artistDCID, "NONE"))
                    else:
                        print("\t\tDC ID={0: <15}\t Size={1}".format(artistDCID, len(artistDCAlbums)))
                
                retval = getBestAlbumsMatch(artistAMAlbums, artistDCAlbums, debug=False)
                if retval > 0.7:
                    if artistAMtoDCScore is None:
                        artistAMtoDCScore = {"Name": artistDCName, "ID": artistDCID, "Score": retval}
                    else:
                        if retval > artistAMtoDCScore["Score"]:
                            artistAMtoDCScore = {"Name": artistDCName, "ID": artistDCID, "Score": retval}
                            
                            
        print("\tAM ID={0: <15}{1}".format(artistAMID, artistAMtoDCScore))
        if artistAMtoDCScore is not None:
            artistAMIDtoDCIDMap[artistAMID] = {"AM Name": artistAMName, "DC ID": artistAMtoDCScore["ID"], "DC Name": artistAMtoDCScore["Name"]}        
        
        
        print(artistName,allmusicIDs)
    continue
    for dbname, dirval in unknownVals.items():
        print("Partially Unknown Artist: {0: <40}{1: <15}{2}".format(artistName, dbname, dirval))
        artistAMNametoIDMap = {v["AM Name"]: k for k,v in artistAMIDtoDCIDMap.items()}

In [None]:

        

        ## Print IDs
        printArtistIDs(artistName, discogsArtistIDX, allmusicArtistIDX)

        
        ## Get Database Entries
        discogsIDData  = getRowDataByColValue(discdfDC, "DiscArtist", artistName)
        allmusicIDData = getRowDataByColValue(discdfAM, "DiscArtist", artistName)

        
        ## Print Database ID Results
        printArtistIDDBResults(artistName, discogsIDData, allmusicIDData)
        
        
        
        ## Check for Match for all DBs
        discogsIDCnts  = getArtistIDDBCounts(discogsIDData)
        allmusicIDCnts = getArtistIDDBCounts(allmusicIDData)
        if discogsIDCnts == 1:
            musicMatchMap[artistName]["Status"].append("Discogs")
            musicMatchMap[artistName]["Matches"]["Discogs"]  = discogsIDData
            
        if allmusicIDCnts == 1:
            musicMatchMap[artistName]["Status"].append("AllMusic")
            musicMatchMap[artistName]["Matches"]["AllMusic"] = allmusicIDData

In [None]:

        
        continue
        
        
        print(artistName,type(mdata))
        if not isinstance(mdata, DataFrame):
            matches = mulArts.getArtistNames(artistName)
            if not all(matches.values()):
                print("{0: <10}{1: <50}{2}".format("?", artistName, matches))
                continue
            else:
                print("{0: <10}{1: <50}{2}".format("MIX", artistName, matches))
                srcDir = dirval
                dstDir = setDir("/Volumes/Music/Multi", artistName)
                moveDir(srcDir, dstDir)                
        else:
            print('\t',mdata.shape)
            if mdata.shape[0] == 1:
                musicNameIDMap[artistName] = [mdata.index, mdata["Name"].values[0]]
            else:
                print("{0: <10}{1: <50}{2}".format("", artistName, ""))
                albums = [x for x in findDirs(dirval) if getDirBasics(x)[-1] not in discogMediaNames+myMediaNames]
                if len(albums) == 0:
                    print("\tNo Albums\n")
                    srcDir = dirval
                    dstDir = setDir("/Volumes/Music/Random", artistName)
                    moveDir(srcDir, dstDir)
                else:
                    multiMap[artistName] = {"DB": mdata, "Albums": albums}
                    continue
                    for album in albums:
                        print("\t",album)
                    print("")
                print("\tDiscogs Data:")
                print(mdata)
                print("\n\n")

# Manual Entries

In [None]:
newManual = {}

In [None]:
mdata = getMusicData("Name", "Daryl Hall & John Oates")
newManual[mdata["DiscArtist"].values[0]] = [mdata.index, mdata["Name"].values[0]]

In [None]:
newManual

In [None]:
if len(newManual) > 0:
    try:
        myMusicNameIDMap.update({k: [v[0][0], v[1].values[0]] for k,v in newManual.items()})
    except:
        try:
            myMusicNameIDMap.update({k: [v[0][0], v[1]] for k,v in newManual.items()})
        except:
            raise ValueError("Not sure about {0}".format(newManual))

## Find MultiMap -> Match Entry

In [None]:
ratVal = 0.6
if len(myMusicNameIDMap) > 0:
    ratVal = 0.3
artistIDMap = {}
for artistName,artistSlimData in multiMap.items():
    print("\n","="*50)
    print("ArtistName: {0}".format(artistName))
    print("   Albums: {0}".format(len(artistSlimData["Albums"])))

    myAlbumNames = []
    for album in artistSlimData["Albums"]:
        myAlbumName = album.split("/{0}/".format(artistName))[-1]
        #print("     {0: <15}{1: <10}{2}".format("", "", myAlbumName))
        myAlbumNames.append(myAlbumName)
        
    for idx,row in artistSlimData["DB"].iterrows():
        artistAlbumsData = getRowByIndex(artistAlbumsDB, idx)
        artistAlbums     = artistAlbumsData["Albums"]
        print("   Match: {0}  [{1}]".format(row["DiscArtist"], row["Name"]))
        if isinstance(artistAlbums, dict):
            for albumType, albumTypeData in artistAlbums.items():
                for albumID, dbAlbumName in albumTypeData.items():
                    for myAlbumName in myAlbumNames:
                        s = SequenceMatcher(None, myAlbumName, dbAlbumName)
                        ratio = s.ratio()
                        if ratio > 0.6:
                            print("     {0: <15}{1: <10}{2: <8}{3}".format(albumType, albumID, round(ratio,2), dbAlbumName))
                            artistIDMap[artistName] = [idx, row["Name"]]

## Found Multi Artist

In [None]:
for artistName,artistData in artistIDMap.items():
    print(artistName,' ==> ',artistData)

In [None]:
if len(artistIDMap) > 0:
    myMusicNameIDMap.update(artistIDMap)

## Set Single Artist

In [None]:
for artistName,artistData in musicNameIDMap.items():
    print(artistName,' ==> ',artistData)

In [None]:
print("Found {0} entries".format(len(musicNameIDMap)))
if len(musicNameIDMap) > 0:
    try:
        myMusicNameIDMap.update({k: [v[0][0], v[1].values[0]] for k,v in musicNameIDMap.items()})
    except:
        try:
            myMusicNameIDMap.update({k: [v[0][0], v[1]] for k,v in musicNameIDMap.items()})
        except:
            raise ValueError("Not sure about {0}".format(musicNameIDMap))
print("Found {0} entries".format(len(musicNameIDMap)))
print("There are {0} total entries".format(len(myMusicNameIDMap)))

# Save Everything

In [None]:
from ioUtils import saveFile
print("Saving {0} entries".format(len(myMusicNameIDMap)))
saveFile(ifile="musicDiscogsMap.p", idata=myMusicNameIDMap, debug=True)

In [None]:
from ioUtils import saveFile
print("Saving {0} entries".format(len(myMusicNameIDMap)))
saveFile(ifile="musicDiscogsMap.yaml", idata=myMusicNameIDMap, debug=True)

In [None]:
myMusicNameIDMap = getFile("musicDiscogsMap.p")
print("Found {0} music <-> discogs maps".format(len(myMusicNameIDMap)))

In [None]:
from ioUtils import saveFile
saveFile(ifile="musicDiscogsMap.p", idata=myMusicNameIDMap, debug=True)
from ioUtils import saveFile
saveFile(ifile="musicDiscogsMap.yaml", idata=myMusicNameIDMap, debug=True)

# Find Music and Match Albums

In [None]:
#myMusicAlbumIDMap = getFile("musicDiscogsAlbumMap.p")
myMusicAlbumIDMap = {}
print("Found {0} music <-> discogs albums maps".format(len(myMusicAlbumIDMap)))

In [None]:
skipMusicAlbumIDMap = getFile(ifile="skipMusicDiscogsAlbumMap.yaml")
skipMusicAlbumIDMap = {}
print("Found {0} music <-> discogs albums maps".format(len(skipMusicAlbumIDMap)))

## Johnny Cash

In [None]:
#myMusicNameIDMap

# Show and Find Music <-> Discog Matches

In [None]:
ratioCut = 0.65
keepSkips = False

In [None]:
def showArtist(artistName, discogsArtist):
    print("\t{0: <30} ---> {1}".format(artistName, discogsArtist))
    
def showAlbum(myAlbumName):
    print("\t\tMy Album: {0}".format(myAlbumName))
    
def showMatch(myAlbumName, albumData):
    print("\t\t   Match: {0}".format(albumData))

musicAlbumIDMap = {}
discogMediaNames = ['Albums', 'Singles & EPs', 'Compilations', 'Videos', 'Miscellaneous', 'Visual', 'DJ Mixes']
myMediaNames     = ['Random', 'Todo', 'Match', 'Title', 'Singles', 'Album', 'Unknown', 'Bootleg', 'Mix', 'Tribute']
ignoreDirs       = list(set(discogMediaNames+myMediaNames))
nSkips           = len(skipMusicAlbumIDMap)

numUnMatches = Counter()

for dirN in findDirs("/Users/tgadfort/matched"):
    print(dirN)
    for dirval in findDirs(dirN):        
        artistName = getDirBasics(dirval)[-1]
        artistName = normalize('NFC', artistName)
        
        discogsArtist = myMusicNameIDMap[artistName]        
        idx                      = discogsArtist[0]
        discogsArtistName        = discogsArtist[1]
        try:
            discogsArtistAlbumsData  = getRowByIndex(artistAlbumsDB, idx)
            discogsArtistAlbums      = discogsArtistAlbumsData["Albums"]
        except:
            raise ValueError("Could not find Artist ID [{0}] and Artist Name [{1}]".format(idx, discogsArtistName))
        
        albums = [x for x in findDirs(dirval) if getDirBasics(x)[-1] not in ignoreDirs]        
        myAlbumNames = [album.split("/{0}/".format(artistName))[-1] for album in albums]
        
        numUnMatches[artistName] = len(myAlbumNames)
        
        for myAlbumName in myAlbumNames:
            myAlbumName = normalize('NFC', myAlbumName)
            if myMusicAlbumIDMap.get(artistName) is not None:
                if myMusicAlbumIDMap[artistName].get(myAlbumName) is not None:
                    numUnMatches[artistName] -= 1
                    #print("\t\tMy Album: {0} ---> {1}".format(myAlbumName, myMusicAlbumIDMap[artistName][myAlbumName]))
                    continue
                    
            if skipMusicAlbumIDMap.get(artistName) is not None:
                if skipMusicAlbumIDMap[artistName].get(myAlbumName) is not None:
                    numUnMatches[artistName] -= 1
                    #print("\t\tMy Album: {0} ---> {1}".format(myAlbumName, myMusicAlbumIDMap[artistName][myAlbumName]))
                    continue

            maxRatio  = None
            albumData = None
            for albumType, albumTypeData in discogsArtistAlbums.items():
                for albumID, dbAlbumName in albumTypeData.items():
                    convDiscogsAlbumName = discConv(dbAlbumName)
                    s = SequenceMatcher(None, myAlbumName, convDiscogsAlbumName)
                    ratio = round(s.ratio(), 3)
                    if ratio > ratioCut:
                        if maxRatio is None:
                            maxRatio = ratio
                        else:
                            if ratio < maxRatio:
                                continue
                            maxRatio  = ratio
                            albumData = [albumID, dbAlbumName]
                            
            if albumData is not None:
                numUnMatches[artistName] -= 1
                if musicAlbumIDMap.get(artistName) is None:
                    musicAlbumIDMap[artistName] = {}
                    showArtist(artistName, discogsArtist)
                showAlbum(myAlbumName)
                musicAlbumIDMap[artistName][myAlbumName] = albumData
                showMatch(myAlbumName, albumData)
                        
            if keepSkips:
                if skipMusicAlbumIDMap.get(artistName) is None:
                    skipMusicAlbumIDMap[artistName] = {}
                #showAlbum(myAlbumName)
                skipMusicAlbumIDMap[artistName][myAlbumName] = [albumID, dbAlbumName]



## Check For Skips

In [None]:
discogsArtist            = myMusicNameIDMap["John Fahey"]
idx                      = discogsArtist[0]
print("Artist ID -->",idx)
discogsArtistName        = discogsArtist[1]
discogsArtistAlbumsData  = getRowByIndex(artistAlbumsDB, idx)
discogsArtistAlbums      = discogsArtistAlbumsData["Albums"]

for k,v in discogsArtistAlbums.items():
    for code,name in v.items():
        print("{0: <15}{1: <12}{2}".format(k,code,name))
        continue

In [None]:
print("Previous {0} music <-> discogs albums maps".format(nSkips))
print("Found {0} music <-> discogs albums maps".format(len(skipMusicAlbumIDMap)))
saveFile(ifile="skipMusicDiscogsAlbumMap.yaml", idata=skipMusicAlbumIDMap, debug=True)


In [None]:
skipMusicAlbumIDMap = getFile(ifile="skipMusicDiscogsAlbumMap.yaml")
skipMusicAlbumIDMap = {}
print("Found {0} music <-> discogs albums maps".format(len(skipMusicAlbumIDMap)))

## Check For Match

In [None]:
from ioUtils import saveFile
saveFile(ifile="newMusicDiscogsAlbumMap.yaml", idata=musicAlbumIDMap, debug=True)
print("Found {0} music <-> discogs albums maps".format(len(musicAlbumIDMap)))

musicAlbumIDMap = getFile(ifile="newMusicDiscogsAlbumMap.yaml", debug=True)
print("Found {0} music <-> discogs albums maps".format(len(musicAlbumIDMap)))

In [None]:
musicAlbumIDMap = getFile(ifile="newMusicDiscogsAlbumMap.yaml", debug=True)
print("Found {0} music <-> discogs albums maps".format(len(musicAlbumIDMap)))

## Move To Matched

In [None]:
from fsUtils import mkDir, isDir, moveDir
for dirN in findDirs("/Users/tgadfort/matched"):
    print(dirN)
    for dirval in findDirs(dirN):        
        artistName = getDirBasics(dirval)[-1]
        artistName = normalize('NFC', artistName)
        musicMap   = musicAlbumIDMap.get(artistName)
        if musicMap is None:
            continue
        for myAlbumName,albumVal in musicMap.items():
            matchedDir = setDir(dirval, "Match")
            mkDir(matchedDir)
            srcDir = setDir(dirval, myAlbumName)
            if not isDir(srcDir):
                print("{0} does not exist".format(srcDir))
                continue
            dstDir = setDir(matchedDir, discConv(myAlbumName))
            if isDir(dstDir):
                print("{0} already exists".format(dstDir))
                continue
                
            print("Moving {0}  --->  {1}".format(srcDir, dstDir))
            moveDir(srcDir, dstDir)

# Move To Album Type

In [None]:
moveData = {}
for dirN in findDirs("/Users/tgadfort/matched"):
    print(dirN)
    for dirval in findDirs(dirN):
        matchedDir = setDir(dirval, "Match")
        aTypes     = ['Albums', 'Singles & EPs', 'Compilations', 'Videos', 'Miscellaneous', 'Visual', 'DJ Mixes']
        albums     = [getDirBasics(x)[-1] for x in findDirs(matchedDir)]
        albums     = [x for x in albums if x not in aTypes]
        if len(albums) == 0:
            continue
        
        artistName = getDirBasics(dirval)[-1]
        artistName = normalize('NFC', artistName)
        
        discogsArtist = myMusicNameIDMap[artistName]
        
        idx                      = discogsArtist[0]
        discogsArtistName        = discogsArtist[1]
        discogsArtistAlbumsData  = getRowByIndex(artistAlbumsDB, idx)
        discogsArtistAlbums      = discogsArtistAlbumsData["Albums"]
        
        
        for myAlbumName in albums:
            myAlbumName = normalize('NFC', myAlbumName)
            maxRatio    = None
            dbAlbumData = {}
            for albumType, albumTypeData in discogsArtistAlbums.items():
                for albumID, dbAlbumName in albumTypeData.items():
                    convDiscogsAlbumName = discConv(dbAlbumName)
                    s = SequenceMatcher(None, myAlbumName, convDiscogsAlbumName)
                    ratio = round(s.ratio(), 2)
                    if ratio > 0.0:
                        if maxRatio is None:
                            maxRatio = ratio
                        else:
                            if ratio < maxRatio:
                                continue
                            maxRatio = ratio

                        if moveData.get(artistName) is None:
                            print('  ',artistName)
                            moveData[artistName] = {"Dir": matchedDir, "Albums": {}}
                        moveData[artistName]["Albums"][myAlbumName] = [albumID, albumType, convDiscogsAlbumName]
       
            if maxRatio is not None:
                print('\t{0: <50}'.format(myAlbumName),moveData[artistName]["Albums"][myAlbumName])

In [None]:
saveFile(ifile="matchedMoveMusicDiscogsAlbumMap.yaml", idata=moveData, debug=True)
print("Found {0} music <-> discogs albums maps".format(len(moveData)))

In [None]:
moveData = getFile(ifile="matchedMoveMusicDiscogsAlbumMap.yaml", debug=True)
print("Found {0} music <-> discogs albums maps".format(len(moveData)))

In [None]:
atypes = {}
for artistName,artistMoveData in moveData.items():
    dirval = artistMoveData["Dir"]
    albums = artistMoveData["Albums"]
    for myAlbumName, dbAlbumData in albums.items():
        albumID, albumType, convDiscogsAlbumName = dbAlbumData[0], dbAlbumData[1], dbAlbumData[2]
        atypes[albumType] = True
        albumTypeDir = setDir(dirval, albumType)
        mkDir(albumTypeDir)
        srcDir  = setDir(dirval, myAlbumName)
        if not isDir(srcDir):
            continue

        dstName = " :: ".join([myAlbumName, "[{0}]".format(albumID)])
        dstDir  = setDir(albumTypeDir, dstName)
        if isDir(dstDir):
            print("{0} already exists.".format(dstDir))
            continue
        print("Moving {0}".format(srcDir))
        moveDir(srcDir, dstDir)

In [None]:
list(atypes.keys())

In [None]:
for artistName,v in musicAlbumIDMap.items():
    print(artistName)
    for myAlbumName,v2 in v.items():
        print("\t",myAlbumName,'\t',v2)

In [None]:
if True:
    saveFile(ifile="skipMusicDiscogsAlbumMap.yaml", idata=skipMusicAlbumIDMap, debug=True)
    print("Found {0} music <-> discogs albums maps".format(len(skipMusicAlbumIDMap)))
skipMusicAlbumIDMap

In [None]:
saveFile(ifile="newMusicDiscogsAlbumMap.yaml", idata=musicAlbumIDMap, debug=True)
print("Found {0} music <-> discogs albums maps".format(len(musicAlbumIDMap)))

In [None]:
musicAlbumIDMap = getFile(ifile="newMusicDiscogsAlbumMap.yaml", debug=True)
print("Found {0} music <-> discogs albums maps".format(len(musicAlbumIDMap)))

## Merge Maps (if needed)

In [None]:
for artistName,v in musicAlbumIDMap.items():
    for myAlbumName,v2 in v.items():
        if myMusicAlbumIDMap.get(artistName) is None:
            print("Adding {0}".format(artistName))
            myMusicAlbumIDMap[artistName] = {}
        if myMusicAlbumIDMap[artistName].get(myAlbumName) is None:
            print("Adding {0}/{1} ---> {2}".format(artistName, myAlbumName, v2))
            myMusicAlbumIDMap[artistName][myAlbumName] = v2

In [None]:
from ioUtils import saveFile
saveFile(ifile="musicDiscogsAlbumMap.p", idata=myMusicAlbumIDMap, debug=True)
print("Found {0} music <-> discogs albums maps".format(len(myMusicAlbumIDMap)))

In [None]:
saveFile(ifile="musicDiscogsAlbumMap.yaml", idata=myMusicAlbumIDMap, debug=True)

# Rename Albums

In [None]:
musicAlbumIDMap = getFile(ifile="musicDiscogsAlbumMap.yaml", debug=True)
print("Found {0} music <-> discogs albums maps".format(len(musicAlbumIDMap)))

In [None]:
renames = {}
for dirN in findDirs("/Users/tgadfort/matched"):
    print(dirN)
    for dirval in findDirs(dirN):        
        artistName = getDirBasics(dirval)[-1]
        artistName = normalize('NFC', artistName)
        if musicAlbumIDMap.get(artistName) is None:
            continue
        print("    {0}".format(artistName))
        for album, albumdata in musicAlbumIDMap[artistName].items():
            print("\t[{0}]   ====>   [{1}]".format(album,albumdata[1]))
            srcAlbumName = album
            dstAlbumName = discConv(albumdata[1])
            if srcAlbumName != dstAlbumName:
                if renames.get(dirval) is None:
                    renames[dirval] = {}
                renames[dirval][srcAlbumName] = dstAlbumName

In [None]:
saveFile(ifile="musicRenames.yaml", idata=renames, debug=True)

In [None]:
renameData = getFile(ifile="musicRenames.yaml", debug=True)

In [None]:
from fsUtils import isDir
for dirval,artistdata in renameData.items():
    for src,dst in artistdata.items():
        srcDir = setDir(dirval, src)
        dstDir = setDir(dirval, dst)
        print(dstDir,isDir(dstDir))
        if srcDir != dstDir and not isDir(dstDir):
            print("Moving {0} -> {1}".format(srcDir, dstDir))
            moveDir(srcDir, dstDir, debug=True)

## Music Discogs Maps

In [None]:
## Do not execute this stuff

In [None]:
try:
    myMusicNameIDMap = getFile("musicDiscogsMap.p")
    print("Found {0} music <-> discogs maps".format(len(myMusicNameIDMap)))
except:
    myMusicNameIDMap = {}
    print("Could not load music <-> discogs map")
       
myMusicNameIDMap["Mayday!"] == ['2156710', '¡Mayday!']

In [None]:
myMusicMap = {}
for myArtistName, discogsData in myMusicNameIDMap.items():
    if myMusicMap.get(myArtistName) is None:
        myMusicMap[myArtistName] = {"Discogs": None, "AllMusic": None}
    myMusicMap[myArtistName]["Discogs"] = {"ID": discogsData[0], "Name": discogsData[1]}
print("There are {0} my music data".format(len(myMusicMap)))

In [None]:
artistAMIDtoDCIDMap = getFile(ifile="/Users/tgadfort/Documents/code/discogs/artistAMIDtoDCIDMap.p")
artistDCIDtoAMIDMap = {v["DC ID"]: k for k,v in artistAMIDtoDCIDMap.items()}
print("There are {0} AM Artist ID to DC entries".format(len(artistAMIDtoDCIDMap)))
print("There are {0} DC Artist ID to AM entries".format(len(artistDCIDtoAMIDMap)))

In [None]:
for myArtistName, discogsData in myMusicNameIDMap.items():
    if myMusicMap.get(myArtistName) is None:
        myMusicMap[myArtistName] = {"Discogs": None, "AllMusic": None}
    artistAMData = artistDCIDtoAMIDMap.get(discogsData[0])
    if artistAMData is not None:
        myMusicMap[myArtistName]["AllMusic"] = {"ID": artistAMData, "Name": None}
print("There are {0} my music data".format(len(myMusicMap)))

In [None]:
saveFile(idata=myMusicMap, ifile="myMusicMap.p")

In [None]:
print("{0: <5}{1: <40}{2: <12}{3}".format("#", "Artist", "# of Dirs", "# of Albums"))
for i,(artistName, dirvals) in enumerate(matchedResults["FullyUnknown"].items()):
    print("{0: <5}{1: <40}{2: <12}".format(i,artistName,len(dirvals)), end="")

    myMusicAlbums = []
    for dirval in dirvals:
        myMusicAlbum  = getMyMusicAlbums(dirval, returnNames=True) + getMyMatchedMusicAlbums(dirval) + getMyDiscogsMusicAlbums(dirval)
    print(len(myMusicAlbum))

In [None]:
artistName = "Heart"
num = 2
cutoff = 0.6
#possibleIDs = {dbKey['Key']: findPossibleArtistIDs(artistName, artistNameToID[dbKey['Key']], artists[dbKey['Key']], num, cutoff) for dbKey in dbKeys}
#possibleIDs

# Fix Discogs Mapping

In [None]:
def fixDiscogsMapping(myMusicNameIDMap):
    musicNameIDMap = {}
    for artistName,v in myMusicNameIDMap.items():
        if isinstance(v, list):
            musicNameIDMap[artistName] = v
            #print(artistName,v)
            continue

        mdata = getMusicData("Name", v)
        if isinstance(mdata, DataFrame):
            if mdata.shape[0] == 1:
                #print(artistName,[mdata.index[0], mdata["Name"].values[0]])
                musicNameIDMap[artistName] = [mdata.index[0], mdata["Name"].values[0]]
            else:
                raise ValueError("No idea about {0}".format(mdata))
        else:
            print(artistName,'\t',v)
            
    return musicNameIDMap
        
#myMusicNameIDMap = musicNameIDMap
#from ioUtils import saveFile
#saveFile(ifile="musicDiscogsMap.p", idata=musicNameIDMap, debug=True)

In [None]:
myMusicNameIDMap = fixDiscogsMapping(myMusicNameIDMap)

In [None]:
if myMusicNameIDMap["Mayday!"] == ['2156710', '¡Mayday!']:
    saveFile(ifile="musicDiscogsMap.p", idata=myMusicNameIDMap, debug=True)
    saveFile(ifile="musicDiscogsMap.yaml", idata=myMusicNameIDMap, debug=True)

In [None]:
manualMatches = {}

if len(manualMatches) > 0:
    myMusicNameIDMap.update(manualMatches) 

print("Found {0} music <-> discogs maps".format(len(myMusicNameIDMap)))