In [1]:
###########################################################################
## Basic stuff
###########################################################################
%load_ext autoreload
%autoreload
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
display(HTML("""<style>div.output_area{max-height:10000px;overflow:scroll;}</style>"""))


###########################################################################
## Music
###########################################################################
from myMusicPathData import myMusicPathData

###########################################################################
## Utils
###########################################################################
from timeUtils import timestat
from fileIO import fileIO
from fsUtils import fsPath, fsInfo, dirUtil, fileUtil


###########################################################################
## DB
###########################################################################
from masterManualEntries import masterManualEntries
from masterArtistNameDB import masterArtistNameDB
from mainDB import mainDB



# General

In [None]:
mme = masterManualEntries()

manDB      = masterArtistNameDB("main")
multimanDB = masterArtistNameDB("multi")

maindb = mainDB()

io = fileIO()
#maindb.setMasterDBData()   ### Full DB Access

# Find My Music

In [2]:
mmpd = myMusicPathData()
mmpdData = mmpd.findMyMusic(primeDir="O")

Current Time is Fri Oct 29, 2021 14:08 for Finding All Artist Albums From [O] Prime Directory
  Found 73 Artists From [O] Prime Directory
Process [Finding All Artist Albums From [O] Prime Directory] Took 0.8 Seconds


In [None]:
#mmpdData = mmpd.getData()
mmpd.getSummary(mmpdData).head()

# Music Details

In [None]:
import subprocess
from fsUtils import fileUtil, dirUtil


class myMusicPathDetails:
    def __init__(self, debug=False):
        self.debug = debug
        self.units = {"B": pow(1024,0), "K": pow(1024,1), "M": pow(1024,2), "G": pow(1024,3)}
        self.unit  = "M"
        
    def getPathSize(self, path):
        try:
            result = subprocess.check_output(['du','-sh', path]).split()[0].decode('utf-8')
        except:
            return None
        size   = float(result[:-1])
        unit   = result[-1]
        size  *= (self.units[unit])/(self.units[self.unit])
        return size

In [None]:
tmp = io.get("artistDirSize.p")

In [None]:
mmpd = myMusicPathDetails()
mmpdDF = DataFrame({artistName: Series({albumType: albumTypeData.albums for albumType,albumTypeData in artistData.albumData.items()}) for artistName,artistData in mmpdData.items()}).T

In [3]:
from os import walk
import subprocess

def getPathSize(path,retUnit="M"):
    units = {"B": pow(1024,0), "K": pow(1024,1), "M": pow(1024,2), "G": pow(1024,3)}

    try:
        result = subprocess.check_output(['du','-sh', path]).split()[0].decode('utf-8')
    except:
        return None
    size   = float(result[:-1])
    unit   = result[-1]
    size  *= (units[unit])/(units[retUnit])
    return size


def getPathFiles(path):
    pathFiles = []
    pathDirs  = {}
    for i,(root, dirs, files) in enumerate(walk(path)):
        pathFiles += [dirUtil(root).join(ifile) for ifile in files]
        if len(dirs) > 0:
            pathDirs.update({root: dirs})
    return {"Files": pathFiles, "Dirs": pathDirs}


def getPathTimestamp(path):
    return fsPath(path).modified


def getPathSummary(albumPathData):
    nSubdirs = len(albumPathData['Dirs'])
    nFiles   = len(albumPathData["Files"])
    ftypes   = Series([fsInfo(ifile).path.suffix for ifile in albumPathData["Files"]]).value_counts().to_dict()
    return {"nSubdirs": nSubdirs, "nFiles": nFiles, "ftypes": ftypes}

In [83]:
def getArtistSummaryData(artistAlbums, albumsSummaryData):
    artistSummaryData = {}
    for artistName,artistData in artistAlbums.items():
        artistSummaryData[artistName] = {}
        for albumType,albumTypeData in artistData.items():
            artistSummaryData[artistName][albumType] = {album: albumsSummaryData.loc[album] for album in albumTypeData}

    retval = {}
    for artistName,artistData in artistSummaryData.items():
        for albumType,albumTypeData in artistData.items():
            for album,albumData in albumTypeData.items():
                albumName = dirUtil(album).name.split(" :: ")[0]
                key   = (artistName,albumType,albumName)
                value = albumData
                if retval.get(key) is not None:
                    print(key)
                retval[key] = value

    df = DataFrame(retval).T
    df = df.reset_index().rename({'level_0': "ArtistName", 'level_1': "AlbumType", 'level_2': "AlbumName"}, axis=1)
    return df


def getSummaryDataRollup(x):
    colname = x.name
    if colname in ["Size", "nFiles", "nSubdirs"]:
        return {colname: x.astype(int).sum()}
    elif colname in ["Timestamp"]:
        return {colname: x.max()}
    elif colname in ["AlbumName", "AlbumType", "ArtistName"]:
        nunique = x.nunique()
        if nunique == 1:
            return {colname: x.unique()[0]}
        else:
            return {"n{0}s".format(colname[:-4]): nunique}
    elif colname in ["ftypes"]:
        return {colname: x.apply(Series).sum().to_dict()}
    else:
        raise ValueError("Not sure what to do with column [{0}]".format(colname))
    return 0


def getSummaryRollupDataFrame(df, byArtist=True):
    retval = []
    keys   = ["ArtistName","AlbumType","AlbumName"]
    gby    = keys[:1] if byArtist else keys[:2]
    ignore = keys[-2:] if byArtist else keys[-1:]
    for idx,idxDF in df.groupby(gby):
        result = {}
        for item in idxDF.apply(summary).values:
            if sum([x in item.keys() for x in ignore]) == 0:
                result.update(item)    
        retval.append(result)
        
    summaryDF = DataFrame(retval).fillna(0)
    summaryDF['nAlbums'] = summaryDF['nAlbums'].astype(int)
    return summaryDF

In [84]:
from pandas import DataFrame, Series
from listUtils import getFlatList
#{album: mmpdet.getPathSize(album) for album in mmpdData["Obie Trice"].albumData["Mix"].albums}
ts = timestat("Flattening Albums")
artistAlbums = {artistName: {albumType: albumTypeData.albums for albumType,albumTypeData in artistData.albumData.items()} for artistName,artistData in mmpdData.items()}
albumsList   = getFlatList([y for y in getFlatList([x.values() for x in artistAlbums.values()]) if len(y) > 0])
ts.stop()

ts = timestat("Getting Size For {0} Albums".format(len(albumsList)))
albumSizes = {album: getPathSize(album) for album in albumsList}
ts.stop()

ts = timestat("Getting Files/Subdirs For {0} Albums".format(len(albumsList)))
albumFiles = {album: getPathFiles(album) for album in albumsList}
ts.stop()

ts = timestat("Getting Timestamps For {0} Albums".format(len(albumsList)))
albumTimestamps = {album: getPathTimestamp(album) for album in albumsList}
ts.stop()

ts = timestat("Getting Summary For {0} Albums".format(len(albumsList)))
albumSummaries = {album: getPathSummary(albumData) for album,albumData in albumFiles.items()}
ts.stop()

ts = timestat("Joining Data For {0} Albums".format(len(albumsList)))
albumsSummaryData = DataFrame(albumSummaries).T
albumsSummaryData = albumsSummaryData.join(Series(albumTimestamps, name="Timestamp"))
albumsSummaryData = albumsSummaryData.join(Series(albumSizes, name="Size"))
ts.stop()

ts = timestat("Creating Summary DataFrame")
artistSummaryDF = getArtistSummaryData(artistAlbums, albumsSummaryData)
ts.stop()

Current Time is Fri Oct 29, 2021 15:05 for Flattening Albums
Process [Flattening Albums] Took 0.0 Seconds
Current Time is Fri Oct 29, 2021 15:05 for Getting Size For 670 Albums
Process [Getting Size For 670 Albums] Took 6.2 Seconds
Current Time is Fri Oct 29, 2021 15:05 for Getting Files/Subdirs For 670 Albums
Process [Getting Files/Subdirs For 670 Albums] Took 4.0 Seconds
Current Time is Fri Oct 29, 2021 15:05 for Getting Timestamps For 670 Albums
Process [Getting Timestamps For 670 Albums] Took 0.0 Seconds
Current Time is Fri Oct 29, 2021 15:05 for Getting Summary For 670 Albums
Process [Getting Summary For 670 Albums] Took 7.6 Seconds
Current Time is Fri Oct 29, 2021 15:05 for Joining Data For 670 Albums
Process [Joining Data For 670 Albums] Took 0.1 Seconds
Current Time is Fri Oct 29, 2021 15:05 for Creating Summary DataFrame
Process [Creating Summary DataFrame] Took 0.2 Seconds


In [None]:
artistRollupDF = getSummaryRollupDataFrame(artistSummaryDF, byArtist=True)