<a href="https://colab.research.google.com/github/mohammedterry/NLP_Lab/blob/master/WaveNet_STT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# WaveNet

In [0]:
!pip3 install tensorflow==1.0.0

In [0]:
!pip3 install sugartensor

In [0]:
!git clone https://github.com/buriburisuri/speech-to-text-wavenet

In [0]:
import os
os.chdir('speech-to-text-wavenet')
!ls

In [0]:
!mkdir asset
!ls

### Load in Pretrained Weights saved in Google Drive

In [0]:
from google.colab import drive
drive.mount('/content/gdrive')

In [0]:
!cp -r ../gdrive/My\ Drive/Codes_Projects/speech-to-text-wavenet/asset/* asset/

# Tests

In [0]:
from IPython.display import Audio
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 

## Test 1

In [0]:
!curl -O https://www.ee.columbia.edu/~dpwe/sounds/sents/sm3_cln.wav
Audio(url = "https://www.ee.columbia.edu/~dpwe/sounds/sents/sm3_cln.wav")

In [0]:
!python3 recognize.py --file sm3_cln.wav

## Test 2

In [0]:
!curl -O https://www.ee.columbia.edu/~dpwe/sounds/sents/sm1_cln.wav
Audio(url = "https://www.ee.columbia.edu/~dpwe/sounds/sents/sm1_cln.wav")

In [0]:
!python3 recognize.py --file sm1_cln.wav

## Test 3

In [0]:
! curl -O https://www.ee.columbia.edu/~dpwe/sounds/sents/sf3_cln.wav
Audio(url = "https://www.ee.columbia.edu/~dpwe/sounds/sents/sf3_cln.wav")

In [0]:
!python3 recognize.py --file sf3_cln.wav

## Test 4

In [0]:
!curl -O https://www.ee.columbia.edu/~dpwe/sounds/sents/sf1_cln.wav
Audio(url = "https://www.ee.columbia.edu/~dpwe/sounds/sents/sf1_cln.wav")

In [0]:
!python3 recognize.py --file sf1_cln.wav

## Test 5

In [0]:
!curl -O https://www.ee.columbia.edu/~dpwe/sounds/sents/sm2_cln.wav
Audio(url = "https://www.ee.columbia.edu/~dpwe/sounds/sents/sm2_cln.wav")

In [0]:
!python3 recognize.py --file sm2_cln.wav

## Try some more (longer, background noise, etc)

In [0]:
!curl -O https://www.ee.columbia.edu/~dpwe/sounds/musp/mssp2.wav
Audio(url = "https://www.ee.columbia.edu/~dpwe/sounds/musp/mssp2.wav")

In [0]:
!python3 recognize.py --file mssp2.wav

# Phonetic Spellchecking

In [0]:
!pip3 install jellyfish
from jellyfish import metaphone, soundex, nysiis, match_rating_codex, jaro_distance, levenshtein_distance, damerau_levenshtein_distance, jaro_winkler, hamming_distance

In [0]:
!wget https://raw.githubusercontent.com/first20hours/google-10000-english/master/google-10000-english.txt
with open('google-10000-english.txt') as f:
  vocab = [word.strip() for word in f.readlines()]
vocab += ["chive","chives"]

In [0]:
def suggest_corrections(typo, top_n = 3, reverse = False):
  typo_phonetic = phonetic_encoder(typo)
  scores = [distance_metric(typo_phonetic, phonetic) for phonetic in vocab_phonetic]
  ranked_candidates_idx = sorted([(score,idx) for idx,score in enumerate(scores)], reverse= reverse)
  ranked_candidates = [vocab[idx] for _,idx in ranked_candidates_idx[:top_n]]
  return ranked_candidates

def spellcheck(sentence, reverse = False):
  return ' '.join([typo if typo in vocab else suggest_corrections(typo,1,reverse)[0] for typo in sentence.split()])

In [0]:
phonetic_encoders = [metaphone, soundex, nysiis, match_rating_codex]
inverse_distance_metrics = [jaro_distance, jaro_winkler]
distance_metrics = [levenshtein_distance, damerau_levenshtein_distance, hamming_distance]

In [0]:
typo = 'chivez'
test_sentence = 'cottige cheeze with chivez is delicius'

for phonetic_encoder in phonetic_encoders:
  print('\n'+'-'*10 + phonetic_encoder.__name__.upper() + '-'*10)
  vocab_phonetic = [phonetic_encoder(word) for word in vocab]
  for distance_metric in distance_metrics:
    print(f"{distance_metric.__name__.upper()}\n\t\t\t{suggest_corrections(typo)}, {spellcheck(test_sentence)}")
  for distance_metric in inverse_distance_metrics:
    print(f"{distance_metric.__name__.upper()}\n\t\t\t{suggest_corrections(typo,reverse=True)}, {spellcheck(test_sentence,reverse=True)}")

In [0]:
phonetic_encoder = metaphone
vocab_phonetic = [phonetic_encoder(word) for word in vocab]
distance_metric = damerau_levenshtein_distance

tests = [
    "cotage teese with chives s tolicious",
    "cotage chees with chaes is telicious",
    "cottige cheese with chives is delicious",
    "codies chees with chives is delicious",
    "cottis chees with chis is delicious",
]

for test in tests:
  print(f"{test}\n{spellcheck(test)}\n")