# Store music metadata using apple shortcuts
In this small project I was trying to store the metadata about the music that gets shazamed on my macOS
I started by trying to find Shazam database file on my system but that proved to be quit tricky
Note that I tried this on MacOs, it might be different in iOS device
TODO: test on mobile device

## Create a Shortcut
the the shortcut app, click on the plus sign, a new shortcut will be created
<img src="imgs/shazam_create.png" alt="create a new shortcut" width="70%" style="margin:5px;"/>


also note that in the video I recorded I used a loop, bad idea since we can run a shortcut in CLI

I modified the short cut to perform one cycle, it takes no input and outputs a song metadata if a song is found as text, otherwise it return some "No song found"

<img src="imgs/shortcut.png" alt="shows the text file created by the shortcut app" width="80%" style="margin:5px;"/>

In [1]:
import os
import re
import pandas as pd
from bs4 import BeautifulSoup

In [None]:

from parse_row import parse_row

In [2]:
df = [{'A': 1, 'B': 2, 'C': 4},
      {'A': 1, 'B': 2, 'C': 4},
      {'A': 1, 'B': 2, 'C': 4},
      {'A': 1, 'B': 2, 'C': 4}
      ]

pd.DataFrame(df).to_csv(index=False)

'A,B,C\n1,2,4\n1,2,4\n1,2,4\n1,2,4\n'

In [None]:
SHAZAM_TEMPLATE = """
<?xml version="1.0" encoding="UTF-8"?>
<root>

<timestamp>
Date
</timestamp>

<title>
Shazam Media (Title)
</title>

<artist>
Shazam Media (Artist)
</artist>

<isexplicit>
Shazam Media (Is Explicit)
</isexplicit>

<lyricssnippet>
Shazam Media (Lyrics Snippet)
</lyricssnippet>

<lyricsnippetsynced>
Shazam Media (Lyric Snippet Synced)
</lyricsnippetsynced>

<artwork>
Shazam Media (Artwork)
</artwork>

<videourl>
Shazam Media (Video URL)
</videourl>

<shazamurl>
Shazam Media (Shazam URL)
</shazamurl>

<applemusicurl>
Shazam Media (Apple Music URL)
</applemusicurl>

<name>
Shazam Media (Name)
</name>

</root>
"""

In [None]:
outfile = './test.txt'
res = !shortcuts run "shazam_step" --output-path $outfile

In [None]:
res

find the valid tags then find their values

In [None]:
def parse_row(outfile, encoding='utf-8'):
    """
    The parse_row function parses the XML document using lxml parser.
    it uses the `SHAZAM_TEMPLATE` string as map to read the xml data
    ---------------------------------------------------------
    :param outfile: Specify the path to the xml file that will be parsed
    :return: A dictionary with keys as the tag names
        and values as the text content of those tags
    """
    soup = BeautifulSoup(SHAZAM_TEMPLATE, 'xml')
    root = soup.root
    tags = [tag.name for tag in root.children if tag and tag.name != 'root']
    print()
    try:
        with open(outfile, encoding=encoding) as f:
            row = f.read()
    except FileNotFoundError:
        return FileNotFoundError
    # print(row)
    soup = BeautifulSoup(row.replace('\n', ''), 'xml')
    root = soup.select_one('root')
    if root:
        dct = {}
        for tag in tags:
            if not tag:
                continue
            tag_content = root.find(tag)
            if tag_content:
                dct[tag] = tag_content.text
    return dct

In [None]:
with open('/Users/sophie/)

In [None]:
e = Exception
type(e)

In [None]:
import os
import pandas as pd
import subprocess

WRITE_FRMT = {
    '.csv': 'to_csv',
    '.xls': 'to_excel',
    '.xlsx': 'to_excel',
    '.json': 'to_json',
    '.html': 'to_html',
    '.sql': 'to_sql',
    '.parquet': 'to_parquet',
    '.feather': 'to_feather',
    '.h5': 'to_hdf',
    '.hdf': 'to_hdf',
    '.dta': 'to_stata',
    '.sas7bdat': 'to_sas',
}

READ_FRMT = {
    '.csv': 'read_csv',
    '.xls': 'read_excel',
    '.xlsx': 'read_excel',
    '.json': 'read_json',
    '.html': 'read_html',
    '.sql': 'read_sql',
    '.parquet': 'read_parquet',
    '.feather': 'read_feather',
    '.h5': 'read_hdf',
    '.hdf': 'read_hdf',
    '.dta': 'read_stata',
    '.sas7bdat': 'read_sas',
}


def detect_file_type(file_path):

    # Run the file command and capture the output
    """
    The detect_file_type function takes a file path as input and returns the
    file type. The function uses the Linux 'file' command to determine the file
    type. If you are using Windows, you will need to install Cygwin or WSL in order
    to use this function.
    -----------------------------------------------
    :param file_path: Specify the file to be checked
    :return: A string containing the file type
    """
    result = subprocess.run(
        ['file', '--mime-type', file_path], capture_output=True, text=True)

    # Extract the file type from the output
    file_type = result.stdout.strip().split(': ')[-1]

    return file_type


def read_frmt(db_file, args={}):
    """
    The read_frmt function reads in a data file
    and returns the contents as a pandas DataFrame.
    -------------------------------------------------------
    :param db_file: Pass the name of the file to be read
    :param args: Pass in the arguments from the command line
    :return: A dataframe

    """
    print(db_file)
    _, ext = os.path.splitext(db_file)

    rgx = '|'.join(map(lambda c: c.strip('.'), READ_FRMT.keys()))
    rgx = rf'({rgx})'
    ft = detect_file_type(db_file)
    ft = re.findall(rgx, ft)
    if len(ft) > 0:
        frmt = f'.{ft[0]}'
    else:
        frmt = ext
    return read(db_file, frmt, args)


def read(db_file, frmt, args={}):
    """
    The read function reads a database file and returns
    a pandas dataframe.
    -------------------------------------------
    :param db_file: Specify the file to be read
    :param frmt: specify the format of the file being read
    :param args: Specify the arguments to use when reading the file
    :return: A pandas dataframe with the contents of the file
    """
    # print(frmt)
    method = READ_FRMT.get(frmt)
    if method:
        read_method = getattr(pd, method)
        df = read_method(db_file, **args)
        return df

In [None]:
# read_frmt('shazam.csv')

In [None]:
df

In [None]:


# Example usage:
file_path = 'file.json'
file_type = detect_file_type(file_path)
print("File Type:", file_type)

In [None]:
file_type

In [None]:
rgx = '|'.join(map(lambda c: c.strip('.'), READ_FRMT.keys()))
rgx = rf'({rgx})'

len(re.findall(rgx, file_type)) > 0

In [None]:
rgx