In [1]:
## Basic stuff
%load_ext autoreload
%autoreload
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
display(HTML("""<style>div.output_area{max-height:10000px;overflow:scroll;}</style>"""))

################################################################################
## Music Stuff
################################################################################
from mp3id import mp3ID
from musicBase import musicBase
from musicFinder import musicFinder
from musicPath import pathBasics


################################################################################
## General Stuff
################################################################################
from timeUtils import clock, elapsed
from listUtils import getFlatList
from time import sleep
from pandas import DataFrame, Series
from ioUtils import getFile, saveFile
from searchUtils import findDirs, findNearest, findAll
from fileUtils import getDirBasics, getBaseFilename
from unicodedata import normalize
from fsUtils import moveDir, setDir, mkDir, isDir, removeDir
from pandasUtils import getRowDataByColValue, getRowData
import operator
from glob import glob
from os.path import join
from collections import Counter


################################################################################
## Database Stuff
################################################################################
from discogsBase import discogs
from mainDB import getAlbumTypes
from multiArtist import multiartist
from matchAlbums import matchAlbums
from masterdb import getArtistAlbumsDB, discConv


################################################################################
## Music Stuff
################################################################################
from musicUtils import *
from myMusicDBMap import myMusicDBMap


################################################################################
## Pandas Stuff
################################################################################
import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)


## Python Version
import sys
print("Python: {0}".format(sys.version))

import datetime as dt
start = dt.datetime.now()
print("Notebook Last Run Initiated: "+str(start))

Python: 3.7.3 (default, Mar 27 2019, 16:54:48) 
[Clang 4.0.1 (tags/RELEASE_401/final)]
Notebook Last Run Initiated: 2020-04-18 14:17:33.929437


# My Music Database Map

In [None]:
mdb = myMusicDBMap(debug=True)
mdb.getFullDBData()

In [None]:
#mdb.getArtistAlbumsFromID(db='AllMusic', artistID='0000239827')

In [None]:
#mdb.setDBs(["AceBootlegs"])
#mdb.show()

***
***

# Search For Artist <-> Match In My Music

#### Just look for fully unknown artists

In [None]:
#mdb.add("Almanac", "AllMusic", '0003476217')
mdb.add("James", "AllMusic", "0000112185")
mdb.save()
#for artist in matchedResults["FullyUnknown"].keys():
#    print("mdb.add(\"{0}\", \"AllMusic\", \"\")".format(artist))

In [None]:
matchedResults = getMatchedStatusForMyArtists(mdb, onlyUnknown=True)
for status, matchedResult in matchedResults.items():
    print("{0: <30}{1}".format(status, len(matchedResult)))
    
_, _ = clock("Last Run")

#### Check everyone

In [None]:
## Don't really need this anymore, but it's important
if False:
    matchedResults = getMatchedStatusForMyArtists()
    for status, matchedResult in matchedResults.items():
        print("{0: <30}{1}".format(status, len(matchedResult)))
    _, _ = clock("Last Run")

***

## Analyze Fully Unknown Music Artists

In [None]:
def forceRename(myMusicAlbumMatch, test=True):
    for dbKey, dbValues in myMusicAlbumMatch.items():
        if dbValues is None:
            continue
        print("Found {0} artist entries".format(len(dbValues)))
        for artistName, artistAlbums in dbValues.items():
            first = False
            for myAlbumName,albumVals in artistAlbums.items():
                dirval   = albumVals["Dir"]
                albumVal = albumVals["Album"]
                albumID, dbAlbumName, mediaType = albumVal

                dbAlbumNameConv = discConv(dbAlbumName)
                if myAlbumName != dbAlbumNameConv:
                    if myAlbumName.upper() == dbAlbumNameConv.upper():
                        continue
                    srcDir = setDir(dirval, myAlbumName)
                    dstDir = setDir(dirval, discConv(dbAlbumName))
                else:
                    continue

                if isDir(dstDir):
                    if first is False:
                        print("\n==>",artistName)
                        first = True
                    print("[{0}] and [{1}] already exists for artist {2}".format(srcDir, dstDir, artistName))

                if first is False:
                    print("\n==>",artistName)
                    first = True

                print("\tMoving {0}  --->  {1}".format(srcDir, dstDir))
                if test is False:
                    moveDir(srcDir, dstDir, debug=True)
                

def showArtist(artistName, discogsArtist):
    print("\t{0: <30} ---> {1}".format(artistName, discogsArtist))
    
def showAlbum(myAlbumName):
    print("\t\tMy Album: {0}".format(myAlbumName))
    
def showMatch(myAlbumName, albumData):
    print("\t\t   Match: {0}".format(albumData))

    
def matchMyAlbum(myAlbumName, dbAlbumsData, cuts, dbKey, albumType, debug=False):
    maxRatio  = None
    albumData = None
    ratioCut,maxCut = cuts
    if maxCut is None:
        maxCut = 1.0
        
    mediaTypes = getAlbumTypes(dbKey, albumType)
    for mediaType in mediaTypes:
        if dbAlbumsData.get(mediaType) is None:
            continue
        mediaAlbums = dbAlbumsData[mediaType]
        for albumID, dbAlbumName in mediaAlbums.items():
            convAlbumName = discConv(dbAlbumName)
            s = SequenceMatcher(None, myAlbumName, convAlbumName)
            ratio = round(s.ratio(), 3)
            if debug:
                print("\tMy Album: {0: <30}\tMatch: {1: <30}\tRatio: {2}".format(myAlbumName, convAlbumName, ratio))
            if ratio > ratioCut and ratio <= maxCut:
                if maxRatio is None:
                    maxRatio = ratio
                if ratio < maxRatio:
                    continue
                maxRatio  = ratio
                albumData = [albumID, dbAlbumName, mediaType]
                    
    if isinstance(maxRatio, float):
        retRatio = round(maxRatio,2)
    else:
        retRatio = None
        
    return albumData, retRatio



def getMatchedDirName(albumName, albumID, dbKey = "Discogs"):
    if dbKey == "AllMusic":
        matchedDirName = " :: ".join([discConv(albumName), "[AM-{0}]".format(albumID)])
    elif dbKey == "MusicBrainz":
        matchedDirName = " :: ".join([discConv(albumName), "[MB-{0}]".format(albumID)])
    elif dbKey == "Discogs":
        matchedDirName = " :: ".join([discConv(albumName), "[DC-{0}]".format(albumID)])
    elif dbKey == "AceBootlegs":
        matchedDirName = " :: ".join([discConv(albumName), "[AB-{0}]".format(albumID)])
    elif dbKey == "RateYourMusic":
        matchedDirName = " :: ".join([discConv(albumName), "[RM-{0}]".format(albumID)])
    elif dbKey == "LastFM":
        matchedDirName = " :: ".join([discConv(albumName), "[LM-{0}]".format(albumID)])
    elif dbKey == "DatPiff":
        matchedDirName = " :: ".join([discConv(albumName), "[DP-{0}]".format(albumID)])
    elif dbKey == "RockCorner":
        matchedDirName = " :: ".join([discConv(albumName), "[RC-{0}]".format(albumID)])
    elif dbKey == "CDandLP":
        matchedDirName = " :: ".join([discConv(albumName), "[CL-{0}]".format(albumID)])
    elif dbKey == "MusicStack":
        matchedDirName = " :: ".join([discConv(albumName), "[MS-{0}]".format(albumID)])
    else:
        raise ValueError("dbKey {0} not recognized!".format(dbKey))
        
    return matchedDirName



def getUnMatchedDirName(matchedDirName, mediaDirType):
    vals = matchedDirName.split(" :: ")
    if len(vals) == 2:
        albumName  = vals[0]
        albumIDval = vals[1]
        try:
            albumID = int(albumIDval[(albumIDval.find("[")+3):albumIDval.rfind("]")])
        except:
            raise ValueError("Could not extract album ID from {0}".format(albumIDval))
            
        if sum([x in mediaDirType for x in ["Single", "EP"]]) > 0:
            albumName = "{0} (Single)".format(albumName)
            
        if sum([x in mediaDirType for x in ["Mix", "MixTape"]]) > 0:
            albumName = "{0} (MixTape)".format(albumName)
            
        return albumName
    else:
        raise ValueError("Could not extract album name from {0}".format(matchedDirName))
        


def showMatchedMoves(myMusicAlbumMatch, rename=False):
    for dbKey, dbValues in myMusicAlbumMatch.items():
        if dbValues is None:
            continue
        for artistName, artistAlbums in dbValues.items():
            print("==>",artistName)
            for myAlbumName,albumVals in artistAlbums.items():
                dirval   = albumVals["Dir"]
                albumVal = albumVals["Album"]
                albumID, dbAlbumName, mediaType = albumVal

                srcDir = setDir(dirval, myAlbumName)
                if rename is True:
                    dstName = getMatchedDirName(discConv(dbAlbumName), albumID, dbKey)
                else:
                    dstName = getMatchedDirName(myAlbumName, albumID, dbKey)    
                    
                    
                    
def moveMatchedDirectories(myMusicAlbumMatch, rename=False, show=False):
    for dbKey, dbValues in myMusicAlbumMatch.items():
        if dbValues is None:
            continue
        for artistName, artistAlbums in dbValues.items():
            print("==>",artistName)
            for myAlbumName,albumVals in artistAlbums.items():
                dirval   = albumVals["Dir"]
                albumVal = albumVals["Album"]

                matchedDir = setDir(dirval, "Match")
                mkDir(matchedDir)

                albumID, dbAlbumName, mediaType = albumVal

                srcName = myAlbumName
                srcDir  = setDir(dirval, srcName)
                if not isDir(srcDir):
                    print("{0} does not exist".format(srcDir))
                    continue

                mediaDir = setDir(matchedDir, discConv(mediaType))
                mkDir(mediaDir)

                if rename is True:
                    dstName = getMatchedDirName(discConv(dbAlbumName), albumID, dbKey)
                else:
                    dstName = getMatchedDirName(myAlbumName, albumID, dbKey)
                    
                if show is True:
                    print('\t{0}'.format(mediaDir))
                    print("\t\t[{0}]".format(srcName))
                    print("\t\t[{0}]".format(dstName))
                    continue
                    
                    
                dstDir  = setDir(mediaDir, dstName)
                if isDir(dstDir):
                    print("{0} already exists".format(dstDir))
                    continue

                print("\tMoving {0}  --->  {1}".format(srcDir, dstDir))
                moveDir(srcDir, dstDir, debug=True)
                
                
                
def formatAlbum(albumName, albumType):
    if albumType == 3:
        retval = albumName.replace("(Single)", "")
        retval = retval.replace("(EP)", "")
        retval = retval.strip()
        return retval
    return albumName


def matchArtistNameAlbum(artistName, artistPrimeDirs, mdb, db, ratioCut = 0.95, maxCut = 1.0, albumType=1, debug=False):    
    artistRetVal = {}
    
    ######################################################################
    #### Get My Database Matched ID
    ######################################################################
    myMusicDBData = mdb.getArtistDBData(artistName, db)
    artistID      = myMusicDBData.get("ID")
    if artistID is None:
        return
    
    
    ######################################################################
    #### Get Database Albums
    ######################################################################
    artistAlbums = mdb.getArtistAlbumsFromID(db, artistID)
    

    ######################################################################
    #### Get My Remaining Albums To Be Matched (For Each Dirval)
    ######################################################################
    for dirval in artistPrimeDirs:
        myMusicAlbums = getMyMusicAlbums(dirval, returnNames=True)
        if len(myMusicAlbums) == 0:
            continue
        
        
        ######################################################################
        #### Loop over my albums to be matched
        ######################################################################
        for myAlbumName in myMusicAlbums:
            if debug:
                print("\tMy album: {0}".format(myAlbumName))
            myFormattedAlbum = formatAlbum(myAlbumName, albumType)
            albumData, ratio = matchMyAlbum(myFormattedAlbum, artistAlbums, [ratioCut, maxCut], db, albumType, debug=debug)
            if debug:
                print("\t\tMy match: {0}  [{1}]".format(albumData, ratio))

            if albumData is not None:
                showArtist(artistName, artistID)
                showAlbum(myAlbumName)
                if artistRetVal.get(myAlbumName) is None:
                    artistRetVal[myAlbumName] = {"Ratio": ratio, "Dir": dirval, "Album": albumData}
                else:
                    raise ValueError("Multiple copies for {0} by {1}".format(myAlbumName, artistName))
                showMatch(myAlbumName, albumData)

    return artistRetVal

    

def matchMyMusicAlbums(ratioCut=0.95, maxCut=None, db="Discogs", albumType=1):
    if maxCut is None:
        maxCut            = ratioCut+0.01
    myMusicAlbumMatch = {}

    start, cmt = clock("Checking for Albums Matches Against {0} DB".format(db))


    ######################################################################
    #### Loop Over Artist Name <-> Prime Map Items
    ######################################################################
    artistNameMatchedDirs = getArtistNameMatchedDirs()
    for artistName, artistPrimeDirs in artistNameMatchedDirs.items():
        matchVal = matchArtistNameAlbum(artistName, artistPrimeDirs, mdb, db, ratioCut, maxCut, albumType, debug=False)
        if isinstance(matchVal, dict) and len(matchVal) > 0:
            if myMusicAlbumMatch.get(db) is None:
                myMusicAlbumMatch[db] = {}
            if myMusicAlbumMatch[db].get(artistName) is None:
                myMusicAlbumMatch[db][artistName] = {}
            myMusicAlbumMatch[db][artistName].update(matchVal)
        
        
    elapsed(start, cmt)

    from ioUtils import saveFile
    saveFile(ifile="myMusicAlbumMatch.yaml", idata=myMusicAlbumMatch, debug=True)
    print("Found {0} music <-> discogs albums maps".format(len(myMusicAlbumMatch)))
    
    
def unMatchArtist(artistName):
    from os.path import join
    from fsUtils import removeDir, isFile, setFile, removeFile

    for musicDir in getMatchedDirs():
        dirval        = join(musicDir, getPrimeDirectory(artistName), artistName)
        if not isDir(dirval):
            continue
            
        matchedDir    = setDir(dirval, "Match")
        mediaTypeDirs = findDirs(matchedDir)
        for mediaTypeDir in mediaTypeDirs:
            for matchDir in findDirs(mediaTypeDir):
                albumName = getUnMatchedDirName(getDirBasics(matchDir)[-1], mediaTypeDir)

                srcDir = matchDir
                dstDir = setDir(dirval, albumName)
                if isDir(dstDir):
                    i = 0
                    while not isDir(dstDir):
                        dstDir = "{0} [Fix-{1}]".format(setDir(dirval, albumName), i)
                        i += 1

                moveDir(srcDir, dstDir, debug=True)

            if isDir(mediaTypeDir):
                DS_Store = setFile(mediaTypeDir, ".DS_Store")
                if isFile(DS_Store):
                    removeFile(DS_Store, debug=True)
                removeDir(mediaTypeDir, debug=True)

        if isDir(matchedDir):
            DS_Store = setFile(matchedDir, ".DS_Store")
            if isFile(DS_Store):
                removeFile(DS_Store, debug=True)
            removeDir(matchedDir, debug=True)

In [None]:
def analyzeFullyUnknownArtists(mdb, matchedResults, ratioCut=0.65, maxCut=None):
    maxCut = 1.0
    myMusicMatch = {}

    start, cmt = clock("Checking for Artist Matches")
            
    ma = matchAlbums()



    ######################################################################
    #### Loop Over Artist Name <-> Prime Map Items
    ######################################################################
    artistNameMatchedDirs = getArtistNameMatchedDirs()
    for artistName, artistPrimeDirs in artistNameMatchedDirs.items():
        if matchedResults["FullyUnknown"].get(artistName) is None:
            continue
        print(artistName)
        
        myMusicAlbums = []
        for dirval in artistPrimeDirs:
            myMusicAlbums += getMyMusicAlbums(dirval, returnNames=True)
        print(artistName,'\t',len(myMusicAlbums))
        artistAlbums        = mdb.getArtistAlbums(artistName, num=10, cutoff=0.7)
        
        for db,dbdata in artistAlbums.items():
            print("\t{0: <20}".format(db))
            for artist,IDs in dbdata.items():
                for artistID,artistIDData in IDs.items():
                    artistIDAlbums = getFlattenedArtistAlbums(artistIDData)                    
                    ma.match(artistIDAlbums, myMusicAlbums)
                    if ma.near >= 1 and ma.maxval >= 0.8:
                        if myMusicMatch.get(artistName) is None:
                            myMusicMatch[artistName] = {}
                            
                        if myMusicMatch[artistName].get(db) is None:
                            myMusicMatch[artistName][db] = {"ID": str(artistID), "Score": [ma.near, ma.score, ma.maxval]}
                        elif ma.near > myMusicMatch[artistName][db]["Score"][0] or ma.maxval > myMusicMatch[artistName][db]["Score"][2]:
                            myMusicMatch[artistName][db] = {"ID": str(artistID), "Score": [ma.near, ma.score, ma.maxval]}
                            
                        print("\t{0: <20}{1: <20}{2: <50}{3: <20}{4}\tMatch!!".format(db,artist,artistID,len(artistIDAlbums), [ma.near, ma.score, ma.maxval]))
                        

    from ioUtils import saveFile
    saveFile(ifile="myMusicArtistMatch.yaml", idata=myMusicMatch, debug=True)
    print("Found {0} music <-> discogs artist maps".format(len(myMusicMatch)))

In [None]:
analyzeFullyUnknownArtists(mdb, matchedResults)

In [None]:
myMusicMatch = getFile(ifile="myMusicArtistMatch.yaml")
print("Found {0} music <-> discogs artist maps".format(len(myMusicMatch)))

In [None]:
if len(myMusicMatch) > 0:
    for artistName, artistValues in myMusicMatch.items():
        print(artistName)
        for db, dbEntry in artistValues.items():
            print('\t',db,'\t',dbEntry)
            mdb.add(artistName, db, dbEntry["ID"])
    mdb.save()

***

## Analyze Partially Unknown Music Artists

In [None]:
## Don't really need this anymore...
if False:
    additions = analyzePartiallyUnknownArtists(matchedResults)
    print("Found {0} additions".format(len(additions)))
    start, cmt = clock("Adding New Matches to myMusicMap")
    if len(additions) > 0:
        showMyMusicMap(myMusicMap)

        for artistName, artistValues in additions.items():
            for key, value in artistValues.items():
                val   = value['Value']
                score = value['Score']
                if myMusicMap.get(artistName) is None:
                    myMusicMap[artistName] = {dbKey['Key']: None for dbKey in dbKeys}
                myMusicMap[artistName][key] = val

        showMyMusicMap(myMusicMap)
    saveFile(idata=myMusicMap, ifile="myMusicMap.p", debug=True)
    elapsed(start, cmt)

***
***

# Match Albums

In [None]:
dR=0.1
rC=0.9

matchMyMusicAlbums(ratioCut=rC, maxCut=rC+dR, db="AllMusic", albumType=1)
#matchMyMusicAlbums(ratioCut=rC, maxCut=rC+dR, db="MusicBrainz", albumType=3)
#matchMyMusicAlbums(ratioCut=rC, maxCut=rC+dR, db="Discogs", albumType="All")
#matchMyMusicAlbums(ratioCut=rC, maxCut=rC+dR, db="AceBootlegs", albumType=1) ## Max 1
#matchMyMusicAlbums(ratioCut=rC, maxCut=rC+dR, db="LastFM", albumType=1) ## Max 1
#matchMyMusicAlbums(ratioCut=rC, maxCut=rC+dR, db="RateYourMusic", albumType="All")
#matchMyMusicAlbums(ratioCut=rC, maxCut=rC+dR, db="DatPiff", albumType=1) ## Max 1
#matchMyMusicAlbums(ratioCut=rC, maxCut=rC+dR, db="RockCorner", albumType="All") ## Max 1
#matchMyMusicAlbums(ratioCut=rC, maxCut=rC+dR, db="CDandLP", albumType="All") ## Max 1
#matchMyMusicAlbums(ratioCut=rC, maxCut=rC+dR, db="MusicStack", albumType="All") ## Max 1

***
***
# Move To Matched

In [None]:
myMusicAlbumMatch = getFile(ifile="myMusicAlbumMatch.yaml", debug=True)
print("Found {0} music <-> discogs albums maps".format(len(myMusicAlbumMatch)))

In [None]:
moveMatchedDirectories(myMusicAlbumMatch, rename=True, show=True)

In [None]:
moveMatchedDirectories(myMusicAlbumMatch, rename=True, show=False)

***
***
***

# UnMatch Artist

In [None]:
## Anita Baker
## Annie Lennox
## Ani DiFranco
## Aretha Franklin

## Barenaked Ladies (Complete redo)
## Ben E. King

## David Bowie is a mess...
## Def Leopard is rough...
## Dolly Parton needs work...
## Eagles (Downloading what I can)
## Eric Clapton is a mess...
## Fleetwood mac is a real mess...


unMatchArtist("The Offspring")

In [None]:
######################################################################
#### Loop Over Prime Directories
######################################################################
for primeDir in getPrimeDirectories():
    artistPrimeDirMap = getArtistPrimeDirMap(primeDir)

    ######################################################################
    #### Loop Over Artist Name <-> Prime Map Items
    ######################################################################
    for artistName, artistPrimeDirs in artistPrimeDirMap.items():
        unMatchArtist(artistName)
