In [4]:
#Import important lib.
import nltk
#!pip install gradio
#!pip install transformers
import librosa
import torch
import gradio as gr
from transformers import Wav2Vec2Tokenizer, Wav2Vec2ForCTC
nltk.download("punkt")
import json
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.metrics import classification_report

import spacy
import random
from spacy.training.example import Example


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [5]:
#Loading the pre-trained model and the tokenizer
model_name = "facebook/wav2vec2-base-960h"
tokenizer = Wav2Vec2Tokenizer.from_pretrained(model_name)
model = Wav2Vec2ForCTC.from_pretrained(model_name)


The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'Wav2Vec2CTCTokenizer'. 
The class this function is called from is 'Wav2Vec2Tokenizer'.
Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [6]:

#reading the file
speech, sample_rate = librosa.load('/content/sales_call_telephone_marketers.wav')
#make it 1-D
if len(speech.shape) > 1:
    speech = speech[:,0] + speech[:,1]
#Resampling the audio at 16KHz
if sample_rate !=16000:
  speech = librosa.resample(speech,orig_sr=sample_rate, target_sr=16000)

#Tokenize
input_values = tokenizer(speech, return_tensors="pt").input_values
#Take logits
logits = model(input_values).logits
#Take argmax
predicted_ids = torch.argmax(logits, dim=-1)
#Get the words from predicted word ids
transcription = tokenizer.decode(predicted_ids[0])
#Correcting the letter casing
sentences = nltk.sent_tokenize(transcription.lower())
transcription = (' '.join([s.replace(s[0],s[0].capitalize(),1) for s in sentences]))
#create dictionary
d = {}
d.update({"task_1_output": transcription})

print(transcription)

Hello i nancy this is like from eightient incorporation yes how can i help you nancy you have been using our preepa connection for a couple of years now right ye that's right how would you like a postpa connection that allows you to make free unlimited voice calls to three eightent numbers i would love that but what's the catch there's no catch there will be a monthly rental which you will have to pay like any other postpaid connection fantastic sign me up


In [13]:
#Prepare the dataset
data = pd.DataFrame({
    'Text': [
        "My name is Jeff and I am calling from Amazon.",
        "I am calling from Microsoft and my name is Satya.",
        "I am Sundar and this is a call from Google.",
        "I am calling about your Microsoft Azure subscription.",
        "This is a call regarding your Google Cloud Platform account.",
        "I would like to talk about your Amazon Web Services account.",
        "Hello, this is John from IBM.",
        "I'm calling on behalf of Apple.",
        "My name is Emily, and I represent Tesla.",
        "This call is related to your Salesforce subscription.",
        "I'm reaching out about your Oracle database.",
        "Good day, I am Tom calling from Facebook."
    ],
    'Label': ['Intro', 'Intro', 'Intro', 'Purpose', 'Purpose', 'Purpose', 'Intro',  'Intro', 'Intro','Purpose','Purpose','Intro']
})

X = data['Text']
y = data['Label']

#convert to vectors
vectorizer = TfidfVectorizer()
X_train_vec = vectorizer.fit_transform(X)
#train model
model = SVC(kernel='linear')
model.fit(X_train_vec,y)

txt = "Hello. Hi Nancy. This is Mike from AT&T and Corporation for a couple of years now, right? That's right. I would you like a postpaid connection that allows you to make free unlimited voice calls to 38 e, n t numbers. I would love that, but what's the catch? There's no catch, there will be a monthly rental, which you will have to pay like any other postpaid connection. Fantastic sign me up. There's no catch, there will be a monthly rental, which you will have to pay like any other postpaid connection. Fantastic sign me up."
All_text = txt.split(".")
intent = []
for sent in All_text:
    new_text_vec = vectorizer.transform([sent])
    predicted_category = model.predict(new_text_vec)
    intent.append(predicted_category[0])



In [14]:
#blank english model
nlp = spacy.blank("en")

#Define the entity
entity_labels = ["caller_name", "company", "product"]

#Add the entity recognizer
ner = nlp.add_pipe("ner")

#Add the labels to the entity recognizer
for label in entity_labels:
    ner.add_label(label)

#Prepare the dataset
TRAIN_DATA = [
    ("My name is Jeff and I am calling from Amazon.", {"entities": [(11, 15, "caller_name"), (38, 44, "company")]}),
    ("I am calling from Microsoft and my name is Satya.", {"entities": [(19, 28, "company"), (45, 50, "caller_name")]}),
    ("I am Sundar and this is a call from Google.", {"entities": [(5, 11, "caller_name"), (37, 43, "company")]}),
    ("I am calling about your Microsoft Azure subscription.", {"entities": [(25, 41, "product")]}),
    ("This is a call regarding your Google Cloud Platform account.", {"entities": [(32, 54, "product")]}),
    ("I would like to talk about your Amazon Web Services account.", {"entities": [(33, 55, "product")]}),
    ("My name is Jeff and I am calling from Amazon.", {"entities": [(11, 15, "caller_name"), (38, 44, "company")]}),
    ("I am calling from Microsoft and my name is Satya.", {"entities": [(19, 28, "company"), (45, 50, "caller_name")]}),
    ("I am Sundar and this is a call from Google.", {"entities": [(5, 11, "caller_name"), (37, 43, "company")]}),
    ("I am calling about your Microsoft Azure subscription.", {"entities": [(25, 41, "product")]}),
    ("This is a call regarding your Google Cloud Platform account.", {"entities": [(32, 54, "product")]}),
    ("I would like to talk about your Amazon Web Services account.", {"entities": [(33, 55, "product")]}),
    ("My name is Jeff and I am calling from Amazon.", {"entities": [(11, 15, "caller_name"), (38, 44, "company")]}),
    ("I am calling from Microsoft and my name is Satya.", {"entities": [(19, 28, "company"), (45, 50, "caller_name")]}),
    ("I am Sundar and this is a call from Google.", {"entities": [(5, 11, "caller_name"), (37, 43, "company")]}),
    ("I am calling about your Microsoft Azure subscription.", {"entities": [(25, 41, "product")]}),
    ("This is a call regarding your Google Cloud Platform account.", {"entities": [(32, 54, "product")]}),
    ("I would like to talk about your Amazon Web Services account.", {"entities": [(33, 55, "product")]}),
    ("My name is John from IBM. I want to discuss Azure.", {"entities": [(11, 15, "caller_name"), (20, 23, "company"), (42, 47, "product")]}),
    ("Hello, this is Emily calling about Microsoft Office. I'm from Salesforce.", {"entities": [(20, 25, "caller_name"), (42, 57, "product"), (66, 75, "company")]}),
    ("I'm Alex and I represent Tesla. Let's talk about Google Cloud Platform.", {"entities": [(4, 8, "caller_name"), (23, 28, "company"), (52, 73, "product")]}),
    ("My name is Sarah from Oracle. I want to discuss Amazon Web Services.", {"entities": [(11, 16, "caller_name"), (21, 27, "company"), (47, 67, "product")]}),
    ("Hello, this is Michael calling about IBM Watson. I'm from Facebook.", {"entities": [(20, 27, "caller_name"), (42, 52, "product"), (59, 66, "company")]}),
    ("I'm Olivia and I represent Apple. Let's talk about Microsoft Office.", {"entities": [(4, 10, "caller_name"), (27, 32, "company"), (51, 66, "product")]}),
    ("My name is John from Tesla. I want to discuss Amazon Web Services.", {"entities": [(11, 15, "caller_name"), (20, 25, "company"), (42, 62, "product")]}),
    ("Hello, this is Emily calling about Azure. I'm from Salesforce.", {"entities": [(20, 25, "caller_name"), (42, 47, "product"), (61, 70, "company")]}),
    ("I'm Alex and I represent IBM. Let's talk about Google Cloud Platform.", {"entities": [(4, 8, "caller_name"), (23, 26, "company"), (50, 71, "product")]}),
    ("My name is Sarah from Microsoft Office. I want to discuss Oracle.", {"entities": [(11, 16, "caller_name"), (21, 36, "company"), (50, 56, "product")]}),
    ("Hello, this is Michael calling about Amazon Web Services. I'm from Facebook.", {"entities": [(20, 27, "caller_name"), (42, 62, "product"), (69, 76, "company")]}),
    ("I'm Olivia and I represent Google Cloud Platform. Let's talk about IBM Watson.", {"entities": [(4, 10, "caller_name"), (27, 48, "company"), (63, 74, "product")]}),
    ("My name is John from Amazon Web Services. I want to discuss Salesforce.", {"entities": [(11, 15, "caller_name"), (20, 40, "company"), (49, 58, "product")]}),
    ("Hello, this is Emily calling about Oracle. I'm from Tesla.", {"entities": [(20, 25, "caller_name"), (42, 48, "product"), (59, 64, "company")]})
]

# Disable unnecessary pipeline
disable_pipes = [pipe for pipe in nlp.pipe_names if pipe != "ner"]

# Train the model
with nlp.disable_pipes(*disable_pipes):
    optimizer = nlp.begin_training()
    for epoch in range(10):
        random.shuffle(TRAIN_DATA)
        losses = {}
        for text, annotations in TRAIN_DATA:
            doc = nlp.make_doc(text)
            example = Example.from_dict(doc, annotations)
            nlp.update([example], sgd=optimizer, losses=losses)
        print("Epoch:", epoch, "Losses:", losses)
nlp.to_disk("/content/trained_model")





Epoch: 0 Losses: {'ner': 92.25466343276847}
Epoch: 1 Losses: {'ner': 7.668230379147374}
Epoch: 2 Losses: {'ner': 4.628165412485447}
Epoch: 3 Losses: {'ner': 1.6646014207981776}
Epoch: 4 Losses: {'ner': 1.9652608956701454}
Epoch: 5 Losses: {'ner': 0.9706916448377712}
Epoch: 6 Losses: {'ner': 1.2550992650463648}
Epoch: 7 Losses: {'ner': 1.9750809606560018}
Epoch: 8 Losses: {'ner': 15.781432280974267}
Epoch: 9 Losses: {'ner': 1.092679572403554}


In [9]:
# Process the input text
nlp = spacy.load("/content/trained_model")
d.update({"task_3_output":[]})
i=0
#Find entities
for sent in All_text:
    d["task_3_output"].append({"sentence":sent, "intent":intent[i],"entities": []})
    i = i+1
    doc = nlp(sent)
    for ent in doc.ents:
        d["task_3_output"][i-1]["entities"].append({"entity_name": ent.text,"entity_value": ent.label_})

In [12]:
import json

json_object = json.dumps(d, indent=4)
#Save json
with open("/content/data.json", "w") as f:
    json.dump(json_object,f)