###Import Libraries###

In [None]:
import spacy 
from spacy import displacy
import pandas as pd

spacy.__version__

import numpy
numpy.random.seed(0)

###Download and Load Spacy Language Model###

In [None]:
#Download spacy small model
!python -m spacy download en_core_web_sm
# Load SpaCy model
nlp = spacy.load("en_core_web_sm")

[38;5;2m✔ Download and installation successful[0m
You can now load the model via spacy.load('en_core_web_sm')


In [None]:
nlp.pipe_names

['tagger', 'parser', 'ner']

###Text NER on pre-defined Spacy Model###

In [None]:
text = "Give me a bright guitar" 

doc = nlp(text)

entities = []
labels = []
position_start = []
position_end = []

for ent in doc.ents:
    entities.append(ent)
    labels.append(ent.label_)
    position_start.append(ent.start_char)
    position_end.append(ent.end_char)
    
df = pd.DataFrame({'Entities':entities,'Labels':labels,'Position_Start':position_start, 'Position_End':position_end})

df

Unnamed: 0,Entities,Labels,Position_Start,Position_End


In [None]:
### It can be seen that the pre-defined spacy model will not help use directly, we need to use our data to train on our labels

##For Instrument Recognition##

###Updating NER###

In [None]:
# Getting the pipeline component
nlp = spacy.load("en_core_web_sm")
ner=nlp.get_pipe("ner")

###Prepare Training Data in spacy format###

In [None]:
df = pd.read_csv('https://docs.google.com/spreadsheets/d/1prSkwXc1IXMKLrIgzurMyxQexSEhx1LNTLBMuQfjhzI/export?gid=0&format=csv', skiprows = 2)
print(df.shape)
df.head()

(240, 15)


Unnamed: 0,#,Text,Adverb,Instrument,Member,Unnamed: 5,acoustic guitar,Clear,buzzing,Rich,like a drum,Taylor,Fender,ahem,Unnamed: 14
0,1.0,Give me a bright guitar,Bright,guitar,Juan Carlos,,bagpipes,Cold,damped,Distortion,guttural,Gibson,PRS,argh,
1,2.0,I'd like a sharp cello,Sharp,cello,Juan Carlos,,banjo,Compact,dark,fast_decay,about to rip the strings off,Guild,G&L,Babble,
2,3.0,give me a dry acoustic guitar,Dry,acoustic guitar,Jacob,,bass guitar,Crisp,distorted,long_release,gnarly distortion,Seagull,Rickenbacker,bam,
3,4.0,give me a metallic harp,Metallic,harp,Jacob,,bongo drums,Dark,dull,Multiphonic,stinging precision,Yamaha,Ibanez,bang,
4,5.0,give me a dirty organ,Dirty,organ,Jacob,,bugle,Deep,harsh,nonlinear_env,piercing clean,Ovation,ESP,Bark,


In [None]:
from numpy.core.defchararray import find

df = df[~df.Instrument.str.len().isna()] 

df.Text = df.Text.str.lower()
df.Instrument = df.Instrument.str.lower()

Text = df.Text.values.astype(str)
Instrument = df.Instrument.values.astype(str)

df['start_index'] = find(Text, Instrument)
df['end_index'] = df['start_index'] + df.Instrument.str.len().astype(int) 

print(df.shape)
df.head()

(48, 17)


Unnamed: 0,#,Text,Adverb,Instrument,Member,Unnamed: 5,acoustic guitar,Clear,buzzing,Rich,like a drum,Taylor,Fender,ahem,Unnamed: 14,start_index,end_index
0,1.0,give me a bright guitar,Bright,guitar,Juan Carlos,,bagpipes,Cold,damped,Distortion,guttural,Gibson,PRS,argh,,17,23
1,2.0,i'd like a sharp cello,Sharp,cello,Juan Carlos,,banjo,Compact,dark,fast_decay,about to rip the strings off,Guild,G&L,Babble,,17,22
2,3.0,give me a dry acoustic guitar,Dry,acoustic guitar,Jacob,,bass guitar,Crisp,distorted,long_release,gnarly distortion,Seagull,Rickenbacker,bam,,14,29
3,4.0,give me a metallic harp,Metallic,harp,Jacob,,bongo drums,Dark,dull,Multiphonic,stinging precision,Yamaha,Ibanez,bang,,19,23
4,5.0,give me a dirty organ,Dirty,organ,Jacob,,bugle,Deep,harsh,nonlinear_env,piercing clean,Ovation,ESP,Bark,,16,21


In [None]:
test_df = df.sample(5)
train_df = df[~df.Text.isin(test_df.Text)]

print(df.shape)
print(test_df.shape)
print(train_df.shape)

(48, 17)
(5, 17)
(43, 17)


In [None]:
TRAIN_DATA = []

for text, start_idx, end_idx in zip(train_df.Text, train_df.start_index, train_df.end_index):

  TRAIN_DATA.append((text, {"entities": [( start_idx, end_idx, "INSTRUMENT")]}))

print(len(TRAIN_DATA))

TEST_DATA = []

for text, start_idx, end_idx in zip(test_df.Text, test_df.start_index, test_df.end_index):

  TEST_DATA.append((text, {"entities": [( start_idx, end_idx, "INSTRUMENT")]}))

print(len(TEST_DATA))


43
5


####Adding labels to the `ner`###


In [None]:
for _, annotations in TRAIN_DATA:
  for ent in annotations.get("entities"):
    ner.add_label(ent[2])

###Disable pipeline components that is not changed

In [None]:
pipe_exceptions = ["ner", "trf_wordpiecer", "trf_tok2vec"]
unaffected_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions]

###Train NER###

In [None]:
# Import requirements
import random
from spacy.util import minibatch, compounding
from pathlib import Path

# TRAINING THE MODEL
with nlp.disable_pipes(*unaffected_pipes):
  for iteration in range(40):

    # shuufling examples  before every iteration
    random.shuffle(TRAIN_DATA)
    losses = {}
    # batch up the examples using spaCy's minibatch
    batches = minibatch(TRAIN_DATA, size=compounding(4.0, 32.0, 1.001))
    for batch in batches:
        
        texts, annotations = zip(*batch)
        nlp.update(
                    texts,  # batch of texts
                    annotations,  # batch of annotations
                    drop=0.5,  # dropout - make it harder to memorise data
                    losses=losses
                )
        print("Losses", losses)

Losses {'ner': 24.65455436706543}
Losses {'ner': 55.32708215713501}
Losses {'ner': 83.32814335823059}
Losses {'ner': 102.80389976501465}
Losses {'ner': 125.41117358207703}
Losses {'ner': 146.00458002090454}
Losses {'ner': 165.76476728916168}
Losses {'ner': 192.97753298282623}
Losses {'ner': 221.98739778995514}
Losses {'ner': 240.7572045326233}
Losses {'ner': 263.10168144106865}
Losses {'ner': 36.23232316970825}
Losses {'ner': 58.33322525024414}
Losses {'ner': 79.28316736221313}
Losses {'ner': 103.94092035293579}
Losses {'ner': 125.68139350414276}
Losses {'ner': 145.7491043806076}
Losses {'ner': 165.51068305969238}
Losses {'ner': 186.5547866821289}
Losses {'ner': 203.09644269943237}
Losses {'ner': 231.35916757583618}
Losses {'ner': 251.39497590065002}
Losses {'ner': 19.38022756576538}
Losses {'ner': 42.744486570358276}
Losses {'ner': 60.174341440200806}
Losses {'ner': 89.54851603507996}
Losses {'ner': 102.43791973590851}
Losses {'ner': 118.15094888210297}
Losses {'ner': 132.024947285652

In [None]:
### trained upto 40 iterations

In [None]:
for example in TEST_DATA:
  print(example[0])
  doc = nlp(example[0])
  print("Entities", [(ent.text, ent.label_) for ent in doc.ents])

give me a metallic harp
Entities []
give me a chord preset
Entities [('preset', 'INSTRUMENT')]
a noisy piano and flute please
Entities []
give me a percussive violin
Entities [('violin', 'INSTRUMENT')]
give me a bright guitar
Entities [('guitar', 'INSTRUMENT')]


In [None]:
doc = nlp("Give me a bright AA")
print("Entities", [(ent.text, ent.label_) for ent in doc.ents])

doc = nlp("Give me a sharp Cat")
print("Entities", [(ent.text, ent.label_) for ent in doc.ents])


Entities []
Entities []


##For Adverb Recognition##

###Updating NER###

In [None]:
# Getting the pipeline component
nlp = spacy.load("en_core_web_sm")
ner=nlp.get_pipe("ner")

###Prepare Training Data in spacy format###

In [None]:
df = pd.read_csv('https://docs.google.com/spreadsheets/d/1prSkwXc1IXMKLrIgzurMyxQexSEhx1LNTLBMuQfjhzI/export?gid=0&format=csv', skiprows = 2)
print(df.shape)
df.head()

(178, 11)


Unnamed: 0,#,Text,Sound file,Adverb,Instrument,Member,Slack Name,Unnamed: 7,Lucas,acoustic guitar,Hollow
0,1,Give me a bright guitar,https://drive.google.com/file/d/1tLNVvyu99lgco...,Bright,guitar,Juan Carlos,juancopi81,,Gianluca,bagpipes,Clear
1,2,I'd like a sharp cello,https://drive.google.com/file/d/1maTKhwe_7JMnG...,Sharp,cello,Juan Carlos,juancopi81,,Stephen,banjo,Rough
2,3,give me a dry acoustic guitar,,Dry,acoustic guitar,Jacob,,,Tanay,bass guitar,Metallic
3,4,give me a metallic harp,,Metallic,harp,Jacob,,,Juan Carlos,bongo drums,Warm
4,5,give me a dirty organ,,Dirty,organ,Jacob,,,,bugle,Smooth


In [None]:
from numpy.core.defchararray import find

df = df[~df.Adverb.str.len().isna()] 

df.Text = df.Text.str.lower()
df.Adverb = df.Adverb.str.lower()

Text = df.Text.values.astype(str)
Adverb = df.Adverb.values.astype(str)

df['start_index'] = find(Text, Adverb)
df['end_index'] = df['start_index'] + df.Adverb.str.len().astype(int) 

df.head()

Unnamed: 0,#,Text,Sound file,Adverb,Instrument,Member,Slack Name,Unnamed: 7,Lucas,acoustic guitar,Hollow,start_index,end_index
0,1,give me a bright guitar,https://drive.google.com/file/d/1tLNVvyu99lgco...,bright,guitar,Juan Carlos,juancopi81,,Gianluca,bagpipes,Clear,10,16
1,2,i'd like a sharp cello,https://drive.google.com/file/d/1maTKhwe_7JMnG...,sharp,cello,Juan Carlos,juancopi81,,Stephen,banjo,Rough,11,16
2,3,give me a dry acoustic guitar,,dry,acoustic guitar,Jacob,,,Tanay,bass guitar,Metallic,10,13
3,4,give me a metallic harp,,metallic,harp,Jacob,,,Juan Carlos,bongo drums,Warm,10,18
4,5,give me a dirty organ,,dirty,organ,Jacob,,,,bugle,Smooth,10,15


In [None]:
test_df = df.sample(15)
train_df = df[~df.Text.isin(test_df.Text)]

print(df.shape)
print(test_df.shape)
print(train_df.shape)

(166, 13)
(15, 13)
(151, 13)


In [None]:
TRAIN_DATA = []

for text, start_idx, end_idx in zip(train_df.Text, train_df.start_index, train_df.end_index):

  TRAIN_DATA.append((text, {"entities": [( start_idx, end_idx, "ADVERB")]}))

print(len(TRAIN_DATA))

TEST_DATA = []

for text, start_idx, end_idx in zip(test_df.Text, test_df.start_index, test_df.end_index):

  TEST_DATA.append((text, {"entities": [( start_idx, end_idx, "ADVERB")]}))

print(len(TEST_DATA))


151
15


####Adding labels to the `ner`###


In [None]:
for _, annotations in TRAIN_DATA:
  for ent in annotations.get("entities"):
    ner.add_label(ent[2])

###Disable pipeline components that is not changed

In [None]:
pipe_exceptions = ["ner", "trf_wordpiecer", "trf_tok2vec"]
unaffected_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions]

###Train NER###

In [None]:
# Import requirements
import random
from spacy.util import minibatch, compounding
from pathlib import Path

# TRAINING THE MODEL
with nlp.disable_pipes(*unaffected_pipes):

  # Training for 20 iterations
  for iteration in range(20):

    # shuufling examples  before every iteration
    random.shuffle(TRAIN_DATA)
    losses = {}
    # batch up the examples using spaCy's minibatch
    batches = minibatch(TRAIN_DATA, size=compounding(4.0, 32.0, 1.001))
    for batch in batches:
        texts, annotations = zip(*batch)
        nlp.update(
                    texts,  # batch of texts
                    annotations,  # batch of annotations
                    drop=0.5,  # dropout - make it harder to memorise data
                    losses=losses,
                )
        print("Losses", losses)

Losses {'ner': 24.38126802444458}
Losses {'ner': 43.2413900748943}
Losses {'ner': 64.46840700070607}
Losses {'ner': 82.66070255200611}
Losses {'ner': 102.22540953318821}
Losses {'ner': 121.90133270184742}
Losses {'ner': 141.13477071683155}
Losses {'ner': 159.76567229192005}
Losses {'ner': 184.4382232085918}
Losses {'ner': 205.0867008821224}
Losses {'ner': 222.83347364823567}
Losses {'ner': 241.4570370570873}
Losses {'ner': 260.93555315892445}
Losses {'ner': 281.67301877896534}
Losses {'ner': 301.7486284629558}
Losses {'ner': 314.4890752689098}
Losses {'ner': 334.8503464118694}
Losses {'ner': 351.366987134621}
Losses {'ner': 369.80611577193486}
Losses {'ner': 391.35178055922734}
Losses {'ner': 414.3140555874561}
Losses {'ner': 437.7582785145496}
Losses {'ner': 460.41880288283573}
Losses {'ner': 478.2993977324222}
Losses {'ner': 496.14514138858067}
Losses {'ner': 509.8595376269077}
Losses {'ner': 523.2385607020115}
Losses {'ner': 557.0325794474338}
Losses {'ner': 578.7740645662998}
Losse

In [None]:
for example in TEST_DATA:
  doc = nlp(example[0])
  print(example[0] , " : Entities", [(ent.text, ent.label_) for ent in doc.ents])

give me a sharply recorder  : Entities [('sharply', 'ADVERB')]
give me a promptly didgeridoo  : Entities [('promptly', 'ADVERB')]
give me a obediently sitar  : Entities [('obediently', 'ADVERB')]
give me a obnoxiously piccolo  : Entities [('obnoxiously', 'ADVERB')]
give me a warm synth  : Entities [('warm', 'ADVERB')]
give me a sharply didgeridoo  : Entities [('sharply', 'ADVERB')]
give me a elegantly bass guitar  : Entities [('elegantly', 'ADVERB')]
give me a wide stereo pad  : Entities [('wide', 'ADVERB')]
give me a exactly cymbals  : Entities [('exactly', 'ADVERB')]
give me a dramatically viola  : Entities [('dramatically', 'ADVERB')]
give me a rapidly lute  : Entities [('rapidly', 'ADVERB')]
give me a powerfully trumpet  : Entities [('powerfully', 'ADVERB')]
give me a round bass  : Entities [('round', 'ADVERB')]
give me a poorly clarinet  : Entities [('poorly', 'ADVERB')]
give me a irritably keyboard  : Entities [('irritably', 'ADVERB')]


####The problem here also seems like that of instrument, memorise the position of adverb####

In [None]:
doc = nlp("How bright are you")
print("Entities", [(ent.text, ent.label_) for ent in doc.ents])

doc = nlp("The garden is very wide")
print("Entities", [(ent.text, ent.label_) for ent in doc.ents])

doc = nlp("give me a AA bass")
print("Entities", [(ent.text, ent.label_) for ent in doc.ents])


Entities []
Entities []
Entities [('AA', 'ADVERB')]
