### NER: Named Enitty Recognition: Using token classification to classify entites from natural language

In [1]:
from transformers import AutoTokenizer, AutoModel, AutoModelForTokenClassification, pipeline

INFO:tensorflow:Enabling eager execution
INFO:tensorflow:Enabling v2 tensorshape
INFO:tensorflow:Enabling resource variables
INFO:tensorflow:Enabling tensor equality
INFO:tensorflow:Enabling control flow v2


In [2]:
# https://huggingface.co/savasy/bert-base-turkish-ner-cased

custom_module = 'savasy/bert-base-turkish-ner-cased'

turkish_ner_tokenizer = AutoTokenizer.from_pretrained(custom_module)
turkish_ner_model = AutoModelForTokenClassification.from_pretrained(custom_module)

In [3]:
sequence = "Merhaba! Benim adım Sinan. San Francisco'dan geliyorum" # Hi! I'm Sinan. I come from San Francisco"

ner=pipeline('ner', model=turkish_ner_model, tokenizer=turkish_ner_tokenizer)
ner(sequence)

[{'entity': 'B-PER',
  'score': 0.72424716,
  'index': 5,
  'word': 'Sinan',
  'start': 20,
  'end': 25},
 {'entity': 'B-LOC',
  'score': 0.99879956,
  'index': 7,
  'word': 'San',
  'start': 27,
  'end': 30},
 {'entity': 'I-LOC',
  'score': 0.9977098,
  'index': 8,
  'word': 'Francisco',
  'start': 31,
  'end': 40}]

### Summarization: Using Bert 2 Bert to extract summaries from text

In [4]:
# https://huggingface.co/mrm8488/bert-small2bert-small-finetuned-cnn_daily_mail-summarization

from transformers import BertTokenizerFast, EncoderDecoderModel
import torch
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
tokenizer = BertTokenizerFast.from_pretrained('mrm8488/bert-small2bert-small-finetuned-cnn_daily_mail-summarization')
model = EncoderDecoderModel.from_pretrained('mrm8488/bert-small2bert-small-finetuned-cnn_daily_mail-summarization').to(device)

def generate_summary(text):
    # cut off at BERT max length 512
    inputs = tokenizer([text], padding="max_length", truncation=True, max_length=512, return_tensors="pt")
    input_ids = inputs.input_ids.to(device)
    attention_mask = inputs.attention_mask.to(device)

    output = model.generate(input_ids, attention_mask=attention_mask)

    return tokenizer.decode(output[0], skip_special_tokens=True)

In [5]:
text = "The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct."

print(f'Length of text: {len(text)}')
print(text)
print('\n\n')

summary = generate_summary(text)

print(f'Length of summary: {len(summary)}')

print(summary)

Length of text: 743
The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct.



Length of summary: 263
the eiffel tower is 324 metres ( 1, 063 ft ) tall. its base is square, measuring 125 metres ( 410 ft ) on each side. it was the first structure to reach a height of 300 metres. it is the second tallest free - s

In [6]:
from transformers import AutoModelForSequenceClassification
  
tokenizer = AutoTokenizer.from_pretrained("Alireza1044/albert-base-v2-qnli")

model = AutoModelForSequenceClassification.from_pretrained("Alireza1044/albert-base-v2-qnli")

### NLI: Natural Language Inference: the task of determining whether a “hypothesis” is true (called entailment), false (called contradiction), or undetermined (called neutral) given a “premise”.



In [7]:
# https://huggingface.co/Alireza1044/albert-base-v2-qnli?text=I+like+you.+I+love+you
from torch.nn import Softmax


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def nli(text):
    inputs = tokenizer([text], padding="max_length", truncation=True, max_length=512, return_tensors="pt")
    input_ids = inputs.input_ids.to(device)
    attention_mask = inputs.attention_mask.to(device)

    output = model(input_ids, attention_mask=attention_mask)

    return Softmax()(output.logits)

In [8]:
nli('I like you. I love you.')

  return Softmax()(output.logits)


tensor([[0.0270, 0.9730]], grad_fn=<SoftmaxBackward>)

In [9]:
nli('I like you. I hate you.')

  return Softmax()(output.logits)


tensor([[0.0195, 0.9805]], grad_fn=<SoftmaxBackward>)

In [10]:
import torch
from transformers import Wav2Vec2Processor, HubertForCTC
from datasets import load_dataset
import soundfile as sf

processor = Wav2Vec2Processor.from_pretrained("facebook/hubert-large-ls960-ft")
model = HubertForCTC.from_pretrained("facebook/hubert-large-ls960-ft")

def map_to_array(batch):
    speech, _ = sf.read(batch["file"])
    batch["speech"] = speech
    return batch
    
ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation")
ds = ds.map(map_to_array)

input_values = processor(ds["speech"][0], return_tensors="pt").input_values  # Batch size 1
logits = model(input_values).logits
predicted_ids = torch.argmax(logits, dim=-1)
transcription = processor.decode(predicted_ids[0])

Reusing dataset librispeech_asr (/Users/sinanozdemir/.cache/huggingface/datasets/librispeech_asr/clean/2.1.0/468ec03677f46a8714ac6b5b64dba02d246a228d92cbbad7f3dc190fa039eab1)
Loading cached processed dataset at /Users/sinanozdemir/.cache/huggingface/datasets/librispeech_asr/clean/2.1.0/468ec03677f46a8714ac6b5b64dba02d246a228d92cbbad7f3dc190fa039eab1/cache-013ae9d5f1cf6018.arrow
It is strongly recommended to pass the ``sampling_rate`` argument to this function.Failing to do so can result in silent errors that might be hard to debug.


In [11]:
transcription

'A MAN SAID TO THE UNIVERSE SIR I EXIST'

In [12]:
import librosa    
local_file, sampling_rate = librosa.load('../data/sample.wav', sr=16000) # Downsample to 16kHz as the model was trained in

input_values = processor(local_file, return_tensors="pt", sampling_rate=sampling_rate).input_values
logits = model(input_values).logits
predicted_ids = torch.argmax(logits, dim=-1)
transcription = processor.decode(predicted_ids[0])

transcription

'WHAT A WONDERFUL CLASS'

<img title="a title" alt="Speech Recognintion" src="../images/speech_recognition.png">