In [1]:
from src.Wrapper import LlamaWrapper

model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
llama = LlamaWrapper(model_id=model_id)



  from .autonotebook import tqdm as notebook_tqdm
Downloading shards: 100%|██████████| 4/4 [00:00<00:00, 8652.51it/s]
Loading checkpoint shards: 100%|██████████| 4/4 [00:02<00:00,  1.45it/s]
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [8]:
title_list = [
    "Dream Theater Metropolis Pt. 1 (Live At Luna Park DVD)", 
    "Plini & David Maxim Micic LIVE @ Vh1 Supersonic 2018 (full set)",
    "(Måneskin) Beggin' - Fingerstyle Guitar Cover | Josephine Alexandra",
    "Owane - Rock Is Too Heavy",
    "Jungle - Tash Sultana - Tutorial - Guitar Loop Cover - Tabs Available",
    "How to Play Beat It Solo - Eddie Van Halen Michael Jackson"
    ]


# Artist and Title

#### Option 1) manually with full list --> unstructured output

In [9]:
role_description = "You are a linguistic and music expert."
task_description = "For the following list of video titles, return dictionaries per element with artists and titles. The keys are the utterance and the values either ARTIST or TITLE."


In [10]:
prompt = role_description + task_description

temperature = 0.01
top_p = 0.1

output = llama.prompt_list(prompt, title_list, temperature=temperature, top_p=top_p)
output


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


"Here are the dictionaries for each video title:\n\n1. {'Dream Theater Metropolis Pt. 1 (Live At Luna Park DVD)': {'utterance': 'Dream Theater Metropolis Pt. 1 (Live At Luna Park DVD)', 'ARTIST': 'Dream Theater', 'TITLE': 'Metropolis Pt. 1'}}\n2. {'Plini & David Maxim Micic LIVE @ Vh1 Supersonic 2018 (full set)': {'utterance': 'Plini & David Maxim Micic LIVE @ Vh1 Supersonic 2018 (full set)', 'ARTIST': 'Plini & David Maxim Micic', 'TITLE': 'LIVE @ Vh1 Supersonic 2018 (full set)'}}\n3. {'(Måneskin) Beggin' - Fingerstyle Guitar Cover | Josephine Alexandra': {'utterance': '(Måneskin) Beggin' - Fingerstyle Guitar Cover | Josephine Alexandra', 'ARTIST': 'Måneskin', 'TITLE': 'Beggin\\''}}\n4. {'Owane - Rock Is Too Heavy': {'utterance': 'Owane - Rock Is Too Heavy', 'ARTIST': 'Owane', 'TITLE': 'Rock Is Too Heavy"

#### Option 2) with jsonformer --> structured but only single json.

In [12]:
json_schema = {
    "type": "object",
    "properties": {
        "artist_original": {"type": "string"},
        "artist_covering": {"type": "string"},
        "title_original": {"type": "string"},
        "title_covering": {"type": "string"},
        "title_english": {"type": "string"},
        }
    }

for title in title_list: 
    task_description = f"You are a given a video title of an online video which refers to a song. It is likely a cover. Parse the contained fields artist (covering and original), title (covering and original) and translate the title into english if its not english, else copy it into the translation.{title}"
    prompt = role_description + task_description
    output = llama.prompt_to_json(prompt, json_schema, 0.1)
    print(output)


{'artist_original': 'Dream Theater', 'artist_covering': 'Dream Theater', 'title_original': 'Metropolis Pt. 1', 'title_covering': 'Metropolis Pt. 1 (Live At Luna', 'title_english': 'Metropolis Pt. 1'}
{'artist_original': 'null', 'artist_covering': 'Plini', 'title_original': 'null', 'title_covering': 'full set', 'title_english': 'full set'}
{'artist_original': 'The Four Seasons', 'artist_covering': 'Måneskin', 'title_original': "Beggin'", 'title_covering': "Beggin'", 'title_english': "Beggin'"}
{'artist_original': 'Owane', 'artist_covering': 'Owane', 'title_original': 'Rock Is Too Heavy', 'title_covering': 'Rock Is Too Heavy', 'title_english': 'Rock Is Too Heavy'}
{'artist_original': 'Jungle', 'artist_covering': 'Tash Sultana', 'title_original': 'Jungle', 'title_covering': 'Tutorial - Guitar Loop Cover', 'title_english': 'Jungle'}
{'artist_original': 'Michael Jackson', 'artist_covering': 'Eddie Van Halen', 'title_original': 'Beat It', 'title_covering': 'How to Play Beat It Solo', 'title_

# SHS100K-Test

## Zero-Shot

In [22]:
import pandas as pd

data = pd.read_parquet("/data/csi_datasets/shs100k2_yt.parquet").query("split == 'TEST'")


In [31]:
from tqdm import tqdm

n=200
data_sample = data.sample(n=n)

json_schema = {
    "type": "object",
    "properties": {
        "artist_performing": {"type": "string"},
        "artist_original": {"type": "string"},
        "title_original": {"type": "string"},
        "title_performing": {"type": "string"},
        "title_english": {"type": "string"},
        }
    }

task_description = f"""
You are a given a video title of a YouTube video which contains a musical performance. 
Parse the song title of the performance, the original song title, the name of the performing artist and the original artist into the 
respective fields.
If the song title is not english, also fill the field title_english with the english translation of the 
title you find. Video title: "
"""

prompt = role_description + task_description
outputs = []

# why can't I set to 0?
temperature = 0.00000000000001

# inference with Llama
for video_title in tqdm(data_sample.video_title): 
    prompt = role_description + task_description + video_title
    output = llama.prompt_to_json(prompt, json_schema, temperature)
    outputs.append(output)

# write to df
data_parsed = pd.DataFrame(outputs)
for col in data_parsed.columns:
    data_sample[col] = data_parsed[col].values


100%|██████████| 200/200 [05:39<00:00,  1.70s/it]


In [62]:
accuracy_title_perf = sum((data_sample.title.str.lower() == data_sample.title_performing.str.lower())) / len(data_sample)
accuracy_title_orig = sum((data_sample.title.str.lower() == data_sample.title_original.str.lower())) / len(data_sample)
accuracy_artist_perf = sum((data_sample.performer.str.lower() == data_sample.artist_performing.str.lower())) / len(data_sample)
accuracy_artist_orig = sum((data_sample.performer.str.lower() == data_sample.artist_original.str.lower())) / len(data_sample)

print(accuracy_title_perf)
print(accuracy_title_orig)
print(accuracy_artist_perf)
print(accuracy_artist_orig)


0.68
0.76
0.65
0.47


In [78]:
title_perf_extracted = data_sample.apply(lambda x: x.title_performing in x.video_title, axis=1).sum() / len(data_sample)
title_orig_extracted = data_sample.apply(lambda x: x.title_original in x.video_title, axis=1).sum() / len(data_sample)
artist_perf_extracted = data_sample.apply(lambda x: x.artist_performing in x.video_title, axis=1).sum() / len(data_sample)
artist_orig_extracted = data_sample.apply(lambda x: x.artist_original in x.video_title, axis=1).sum() / len(data_sample)

print(title_perf_extracted)
print(title_orig_extracted)
print(artist_perf_extracted)
print(artist_orig_extracted)


0.945
0.945
0.925
0.8


## Few-Shot

In [79]:
task_description = """
You are a given a video title of a YouTube video which contains a musical performance. 
Parse the song title, the name of the performing artist in the video and the song title into the 
respective fields.
If the song title is not english, also fill the field title_english with the english translation of the 
title you find. Here are some examples: "
"""

examples = """
Example 1 video title: 'Dream Theater Metropolis Pt. 1 (Live At Luna Park DVD)'; Output attributes: artist_performing: Dream Theater, artist_original: Dream Theater, 
title_performing: Metropolis Pt. 1, title_original: Metropolis Pt. 1
Example 2 video title: '(Måneskin) Beggin' - Fingerstyle Guitar Cover | Josephine Alexandra'; Output attributes: artist_performing: Josephine Alexandra, artist_original: Måneskin, 
title_performing: Beggin' - Fingerstyle Guitar Cover, title_original: Beggin'
Example 3 video title: 'Owane - Rock Is Too Heavy'; Output attributes: artist_performing: Owane, artist_original: Owane, 
title_performing: Rock Is Too Heavy, title_original: Rock Is Too Heavy
Example 4 video title: 'Jungle - Tash Sultana - Tutorial - Guitar Loop Cover - Tabs Available'; Output attributes: artist_performing: null, artist_original: Tash Sultana, 
title_performing: Jungle, title_original: Jungle
Example 5 video title: 'How to Play Beat It Solo - Eddie Van Halen Michael Jackson'; Output attributes: artist_performing: null, artist_original: Micheal Jackson and Eddie Van Halen, 
title_performing: Beat It Solo, title_original: Beat It

Input: 
"""

json_schema = {
    "type": "object",
    "properties": {
        "artist_performing": {"type": "string"},
        "artist_original": {"type": "string"},
        "title_original": {"type": "string"},
        "title_performing": {"type": "string"},
        "title_english": {"type": "string"},
        }
    }


outputs = []

# few shot inference with Llama
for video_title in tqdm(data_sample.video_title): 
    prompt = role_description + task_description + examples + video_title
    output = llama.prompt_to_json(prompt, json_schema, temperature)
    outputs.append(output)

# write to df
data_parsed = pd.DataFrame(outputs)
for col in data_parsed.columns:
    data_sample[col] = data_parsed[col].values



100%|██████████| 200/200 [06:08<00:00,  1.84s/it]


In [80]:
accuracy_title_perf = sum((data_sample.title.str.lower() == data_sample.title_performing.str.lower())) / len(data_sample)
accuracy_title_orig = sum((data_sample.title.str.lower() == data_sample.title_original.str.lower())) / len(data_sample)
accuracy_artist_perf = sum((data_sample.performer.str.lower() == data_sample.artist_performing.str.lower())) / len(data_sample)
accuracy_artist_orig = sum((data_sample.performer.str.lower() == data_sample.artist_original.str.lower())) / len(data_sample)

print(accuracy_title_perf)
print(accuracy_title_orig)
print(accuracy_artist_perf)
print(accuracy_artist_orig)


0.51
0.755
0.625
0.44


In [81]:
title_perf_extracted = data_sample.apply(lambda x: x.title_performing in x.video_title, axis=1).sum() / len(data_sample)
title_orig_extracted = data_sample.apply(lambda x: x.title_original in x.video_title, axis=1).sum() / len(data_sample)
artist_perf_extracted = data_sample.apply(lambda x: x.artist_performing in x.video_title, axis=1).sum() / len(data_sample)
artist_orig_extracted = data_sample.apply(lambda x: x.artist_original in x.video_title, axis=1).sum() / len(data_sample)

print(title_perf_extracted)
print(title_orig_extracted)
print(artist_perf_extracted)
print(artist_orig_extracted)


0.905
0.955
0.92
0.76


In [86]:
data_sample[data_sample.performer != data_sample.artist_performing][["video_title", "performer", "artist_performing"]]



Unnamed: 0,video_title,performer,artist_performing
9804,APRON STRINGS - Cliff Richard,Cliff Richard and The Drifters,Cliff Richard
5122,BB King - Hummingbird,B.B. King,BB King
1750,Mina Mazzini- Stayin' Alive (live version),Mina,Mina Mazzini
3200,Don Ellis Orchestra - Put It Where You Want It...,Don Ellis,Don Ellis Orchestra
7158,Gene Clark - Almost Saturday Night,Gene Clark & Carla Olson,Gene Clark
...,...,...,...
3898,"Lee Morse and her Blue Grass Boys - ""If You Wa...",Lee Morse and Her Blue Grass Boys,Lee Morse and her Blue Grass Boys
5522,Heart Of The Matter- Hyannis Sound 2008,The Hyannis Sound,Hyannis Sound
9755,Two More Bottles of Wine,Emmylou Harris,unknown
8670,"Luciano Benevene, Nilla Pizzi e Duo Fasano - B...",Luciano Benevene - Nilla Pizzi - Duo Fasano - ...,"Luciano Benevene, Nilla Piz"


In [89]:
from rapidfuzz import fuzz

title_perf_ratios = data_sample.apply(lambda x: fuzz.token_ratio(x.title_performing, x.video_title) / 100, axis=1)
title_orig_ratios = data_sample.apply(lambda x: fuzz.token_ratio(x.title_original, x.video_title) / 100, axis=1)
artist_perf_ratios = data_sample.apply(lambda x: fuzz.token_ratio(x.artist_performing, x.video_title)/ 100,  axis=1)
artist_orig_ratios = data_sample.apply(lambda x: fuzz.token_ratio(x.artist_original, x.video_title) / 100, axis=1)


In [91]:
title_perf_ratios.describe()


count    200.000000
mean       0.928107
std        0.161550
min        0.184615
25%        0.997191
50%        1.000000
75%        1.000000
max        1.000000
dtype: float64

In [92]:
title_orig_ratios.describe()


count    200.000000
mean       0.910474
std        0.205917
min        0.135135
25%        1.000000
50%        1.000000
75%        1.000000
max        1.000000
dtype: float64

In [93]:
artist_perf_ratios.describe()


count    200.000000
mean       0.884570
std        0.243777
min        0.000000
25%        1.000000
50%        1.000000
75%        1.000000
max        1.000000
dtype: float64

In [94]:
artist_orig_ratios.describe()



count    200.000000
mean       0.774261
std        0.323432
min        0.000000
25%        0.533333
50%        1.000000
75%        1.000000
max        1.000000
dtype: float64