# Shruti_Model_Deployment_using_FastAPI.ipynb

In [1]:
#@title Mount Google Drive
from google.colab import drive
drive.mount('drive')

Mounted at drive


In [2]:
#@title Clone the REPOS

import os
from os.path import exists, join, basename, splitext
!pip install gdown
git_repo_url = 'https://github.com/NVIDIA/tacotron2.git'
project_name = splitext(basename(git_repo_url))[0]
if not exists(project_name):
    # clone and install
    !git clone -q --recursive {git_repo_url}
    !git clone -q --recursive https://github.com/SortAnon/hifi-gan
    !pip install -q librosa unidecode
# pbar.update(1) # downloaded TT2 and HiFi-GAN
import sys
sys.path.append('hifi-gan')
sys.path.append(project_name)
import time
import matplotlib
import matplotlib.pylab as plt
import gdown

%matplotlib inline
import IPython.display as ipd
import numpy as np
import torch
import json
from hparams import create_hparams
from model import Tacotron2
from layers import TacotronSTFT
from audio_processing import griffin_lim
from text import text_to_sequence
from env import AttrDict
from meldataset import MAX_WAV_VALUE
from models import Generator

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m235.9/235.9 KB[0m [31m12.1 MB/s[0m eta [36m0:00:00[0m
[?25h

  return s in _symbol_to_id and s is not '_' and s is not '~'
  return s in _symbol_to_id and s is not '_' and s is not '~'


In [3]:
#@title Creating Hyperparameters

from text import symbols

class HParams:
    def __init__(self) -> None:
        self.epochs=500
        self.iters_per_checkpoint=1000
        self.seed=1234
        self.dynamic_loss_scaling=True
        self.fp16_run=False
        self.distributed_run=False
        self.dist_backend="nccl"
        self.dist_url="tcp://localhost:54321"
        self.cudnn_enabled=True
        self.cudnn_benchmark=False
        self.ignore_layers=['embedding.weight']

        ################################
        # Data Parameters             #
        ################################
        self.load_mel_from_disk=False
        self.training_files='./filelists/train_list.txt'
        self.validation_files='./filelists/val_list.txt'
        self.text_cleaners=['transliteration_cleaners']

        ################################
        # Audio Parameters             #
        ################################
        self.max_wav_value=32768.0
        self.sampling_rate=22050
        self.filter_length=1024
        self.hop_length=256
        self.win_length=1024
        self.n_mel_channels=80
        self.mel_fmin=0.0
        self.mel_fmax=8000.0

        ################################
        # Model Parameters             #
        ################################
        self.n_symbols=len(symbols)
        self.symbols_embedding_dim=512

        # Encoder parameters
        self.encoder_kernel_size=5
        self.encoder_n_convolutions=3
        self.encoder_embedding_dim=512

        # Decoder parameters
        self.n_frames_per_step=1  # currently only 1 is supported
        self.decoder_rnn_dim=1024
        self.prenet_dim=256
        self.max_decoder_steps=1000
        self.gate_threshold=0.5
        self.p_attention_dropout=0.1
        self.p_decoder_dropout=0.1

        # Attention parameters
        self.attention_rnn_dim=1024
        self.attention_dim=128

        # Location Layer parameters
        self.attention_location_n_filters=32
        self.attention_location_kernel_size=31

        # Mel-post processing network parameters
        self.postnet_embedding_dim=512
        self.postnet_kernel_size=5
        self.postnet_n_convolutions=5

        ################################
        # Optimization Hyperparameters #
        ################################
        self.use_saved_learning_rate=False
        self.learning_rate=1e-3
        self.weight_decay=1e-6
        self.grad_clip_thresh=1.0
        self.batch_size=8
        self.mask_padding=True  # set model's padded outputs to padded values

hparams = HParams()

In [4]:
#@title Load Tacotron2 &  HiFi-GAN

#@markdown Config:

#Universal HiFi-GAN (has some robotic noise): 1qpgI41wNXFcH-iKq1Y42JlBC9j0je8PW
Tacotron2_Model = '/content/drive/MyDrive/colab/outdir/Shruti_22kHz_45epoch'#@param {type:"string"}
TACOTRON2_ID = Tacotron2_Model
HIFIGAN_ID = "1qpgI41wNXFcH-iKq1Y42JlBC9j0je8PW"


graph_width = 900
graph_height = 360
def plot_data(data, figsize=(int(graph_width/100), int(graph_height/100))):
    %matplotlib inline
    fig, axes = plt.subplots(1, len(data), figsize=figsize)
    for i in range(len(data)):
        axes[i].imshow(data[i], aspect='auto', origin='bottom', 
                    interpolation='none', cmap='inferno')
    fig.canvas.draw()
    plt.show()

!gdown --id '1E12g_sREdcH5vuZb44EZYX8JjGWQ9rRp'
thisdict = {}
for line in reversed((open('/content/drive/MyDrive/colab/outdir/merged.dict.txt', "r").read()).splitlines()):
    thisdict[(line.split(" ",1))[0]] = (line.split(" ",1))[1].strip()

def ARPA(text, punctuation=r"!?,।.;", EOS_Token=True):
    out = ''
    for word_ in text.split(" "):
        word=word_; end_chars = ''
        while any(elem in word for elem in punctuation) and len(word) > 1:
            if word[-1] in punctuation: end_chars = word[-1] + end_chars; word = word[:-1]
            else: break
        try:
            word_arpa = thisdict[word.upper()]
            word = "{" + str(word_arpa) + "}"
        except KeyError: pass
        out = (out + " " + word + end_chars).strip()
    if EOS_Token and out[-1] != ";": out += ";"
    return out

def get_hifigan(MODEL_ID):
    # Download HiFi-GAN
    hifigan_pretrained_model = 'hifimodel'
    gdown.download("https://drive.google.com/uc?id="+MODEL_ID, hifigan_pretrained_model, quiet=False)
    if not exists(hifigan_pretrained_model):
        raise Exception("HiFI-GAN model failed to download!")

    # Load HiFi-GAN
    conf = os.path.join("hifi-gan", "config_v1.json")
    with open(conf) as f:
        json_config = json.loads(f.read())
    h = AttrDict(json_config)
    torch.manual_seed(h.seed)
    hifigan = Generator(h).to(torch.device("cuda"))
    state_dict_g = torch.load(hifigan_pretrained_model, map_location=torch.device("cuda"))
    hifigan.load_state_dict(state_dict_g["generator"])
    hifigan.eval()
    hifigan.remove_weight_norm()
    return hifigan, h

def has_MMI(STATE_DICT):
    return any(True for x in STATE_DICT.keys() if "mi." in x)

def get_Tactron2(MODEL_ID):
    tacotron2_pretrained_model = TACOTRON2_ID
    if not exists(tacotron2_pretrained_model):
        raise Exception("Tacotron2 model failed to download!")
    # Load Tacotron2 and Config
    hparams.sampling_rate = 22050
    hparams.max_decoder_steps = 3000 # Max Duration
    hparams.gate_threshold = 0.25 # Model must be 25% sure the clip is over before ending generation
    model = Tacotron2(hparams)
    state_dict = torch.load(tacotron2_pretrained_model)['state_dict']
    if has_MMI(state_dict):
        raise Exception("ERROR: This notebook does not currently support MMI models.")
    model.load_state_dict(state_dict)
    _ = model.cuda().eval().half()
    return model



Access denied with the following error:

 	Cannot retrieve the public link of the file. You may need to change
	the permission to 'Anyone with the link', or have had many accesses. 

You may still be able to access the file from the browser:

	 https://drive.google.com/uc?id=1E12g_sREdcH5vuZb44EZYX8JjGWQ9rRp 



In [5]:
#@title Infer Audio Method
def end_to_end_infer(text, pronounciation_dictionary, show_graphs):
    for i in [x for x in text.split("\n") if len(x)]:
        if not pronounciation_dictionary:
            if i[-1] != ";": i=i+";" 
        else: i = ARPA(i)
        with torch.no_grad(): # save VRAM by not including gradients
            sequence = np.array(text_to_sequence(i, ['transliteration_cleaners']))[None, :]
            sequence = torch.autograd.Variable(torch.from_numpy(sequence)).cuda().long()
            mel_outputs, mel_outputs_postnet, _, alignments = model.inference(sequence)
            if show_graphs:
                plot_data((mel_outputs_postnet.float().data.cpu().numpy()[0],
                        alignments.float().data.cpu().numpy()[0].T))
            y_g_hat = hifigan(mel_outputs_postnet.float())
            audio = y_g_hat.squeeze()
            audio = audio * MAX_WAV_VALUE
            print("")
            ipd.display(ipd.Audio(audio.cpu().numpy().astype("int16"), rate=hparams.sampling_rate))

In [6]:
#@title Get Modal & HiFIGAN
hifigan, h = get_hifigan(HIFIGAN_ID)
model = get_Tactron2(TACOTRON2_ID)
previous_tt2_id = TACOTRON2_ID

Downloading...
From: https://drive.google.com/uc?id=1qpgI41wNXFcH-iKq1Y42JlBC9j0je8PW
To: /content/hifimodel
100%|██████████| 55.8M/55.8M [00:02<00:00, 24.9MB/s]


Removing weight norm...


In [7]:
#@title Configuration before Infer

pronounciation_dictionary = False #@param {type:"boolean"}
# disables automatic ARPAbet conversion, useful for inputting your own ARPAbet pronounciations or just for testing
show_graphs = False #@param {type:"boolean"}
max_duration = 25 #this does nothing
model.decoder.max_decoder_steps = 10000 #@param {type:"integer"}
stop_threshold = 0.324 #@param {type:"number"}
model.decoder.gate_threshold = stop_threshold

In [None]:
#@title Synthesize a text
print(f"Current Config:\npronounciation_dictionary: {pronounciation_dictionary}\nshow_graphs: {show_graphs}\nmax_duration (in seconds): {max_duration}\nstop_threshold: {stop_threshold}\n\n")

time.sleep(1)
print("Enter/Paste your text.")
contents = []
while True:
    try:
        print("-"*50)
        line = input()
        if line == "":
            continue
        end_to_end_infer(line, pronounciation_dictionary, show_graphs)
    except EOFError:
        break
    except KeyboardInterrupt:
        print("Stopping...")
        break

Current Config:
pronounciation_dictionary: False
show_graphs: True
max_duration (in seconds): 25
stop_threshold: 0.324


Enter/Paste your text.
--------------------------------------------------
त्यो रात्रि पनि सधैँको जस्तो साधारण रात्रि थियो



--------------------------------------------------
Stopping...


In [17]:
#@title Installing Server Requirements
# Install server requirements
!pip install fastapi==0.68.1
!pip install uvicorn==0.15.0
!pip install timm==0.4.12
!pip install python-multipart==0.0.5
!pip install nest-asyncio
!pip install pyngrok
!pip install aiofiles
!pip install PyPDF2==2.12.0

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting PyPDF2==2.12.0
  Downloading pypdf2-2.12.0-py3-none-any.whl (222 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m222.2/222.2 KB[0m [31m11.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: PyPDF2
  Attempting uninstall: PyPDF2
    Found existing installation: PyPDF2 3.0.1
    Uninstalling PyPDF2-3.0.1:
      Successfully uninstalled PyPDF2-3.0.1
Successfully installed PyPDF2-2.12.0


In [9]:
#@title Text Extraction Class
import PyPDF2
import base64
from io import BytesIO

class PDFExtract :
    #*********************** Conversion from Preeti to Unicode ***********************#
    unicodeatoz=["ब","द","अ","म","भ","ा","न","ज","ष्","व","प","ि","फ","ल","य","उ","त्र","च","क","त","ग","ख","ध","ह","थ","श"]
    unicodeAtoZ=["ब्","ध","ऋ","म्","भ्","ँ","न्","ज्","क्ष्","व्","प्","ी","ः","ल्","इ","ए","त्त","च्","क्","त्","ग्","ख्","ध्","ह्","थ्","श्"]
    unicode0to9=["ण्","ज्ञ","द्द","घ","द्ध","छ","ट","ठ","ड","ढ"]
    symbolsDict=\
    {
        "~":"ञ्",
        "`":"ञ",
        "!":"१",
        "@":"२",
        "#":"३",
        "$":"४",
        "%":"५",
        "^":"६",
        "&":"७",
        "*":"८",
        "(":"९",
        ")":"०",
        "-":"(",
        "_":")",
        "+":"ं",
        "[":"ृ",
        "{":"र्",
        "]":"े",
        "}":"ै",
        "\\":"्",
        "|":"्र",
        ";":"स",
        ":":"स्",
        "'":"ु",
        "\"":"ू",
        ",":",",
        "<":"?",
        ".":"।",
        ">":"श्र",
        "/":"र",
        "?":"रु",
        "=":".",
        "ˆ":"फ्",
        "Î":"ङ्ख",
        "Í":"ङ्क",
        "å":"द्व",
        "÷":"/"
    }

    #arranging certain characters, symbols for proper output
    def normalizePreeti(preetitxt):
        normalized=''
        previoussymbol=''
        preetitxt=preetitxt.replace('qm','s|')
        preetitxt=preetitxt.replace('f]','ो')
        preetitxt=preetitxt.replace('km','फ')
        preetitxt=preetitxt.replace('0f','ण')
        preetitxt=preetitxt.replace('If','क्ष')
        preetitxt=preetitxt.replace('if','ष')
        preetitxt=preetitxt.replace('cf','आ')
        #my changes
        preetitxt=preetitxt.replace('O{', '')
        preetitxt=preetitxt.replace('Í', '')
        preetitxt=preetitxt.replace('æ', '') #opening quotation
        preetitxt=preetitxt.replace('Æ', '') #closing quotation
        preetitxt=preetitxt.replace('Ù', '') #;
        preetitxt=preetitxt.replace('«', '|')
        preetitxt=preetitxt.replace('¿', '?') #dirga ru

        index=-1
        while index+1 < len(preetitxt):
            index+=1
            character=preetitxt[index]
            try:
                #for rearranging र् 
                if preetitxt[index+2] == '{':
                    temp=preetitxt[index+1]
                    if temp=='f' or temp=='ो' or temp=='}' or temp=='L':
                        normalized+='{'+character+temp
                        index+=2
                        continue
                if preetitxt[index+1] == '{':
                    if character!='f':
                        normalized+='{'+character
                        index+=1
                        continue
            except IndexError:
                pass
            if character=='l':
                previoussymbol='l'
                continue
            else:
                normalized+=character+previoussymbol
                previoussymbol=''
        return normalized

    def convert(preeti):
        converted=''
        normalizedpreeti=PDFExtract.normalizePreeti(preeti)
        for index, character in enumerate(normalizedpreeti):
            try:
                if ord(character) >= 97 and ord(character) <= 122:
                    converted+=PDFExtract.unicodeatoz[ord(character)-97]

                elif ord(character) >= 65 and ord(character) <= 90:
                    converted+=PDFExtract.unicodeAtoZ[ord(character)-65]

                elif ord(character) >= 48 and ord(character) <= 57:
                    converted+=PDFExtract.unicode0to9[ord(character)-48]

                else:
                    converted+=PDFExtract.symbolsDict[character]

            except KeyError:
                converted+=character
                
        return converted


    #*********************** Extraction of content from PDF ***********************#
    pdfReader = None
    pageNumber = -1

    #Decodes the string to byte format. Returns BytesIO
    def encodedBase64ToObj(encoded_string):
        ioBytes = PDFExtract.decode64(encoded_string)
        PDFExtract.pdfReader = PyPDF2.PdfFileReader(ioBytes)
        PDFExtract.pageNumber = PDFExtract.pdfReader.numPages
    
    def decode64(encoded_string):
        return BytesIO(base64.b64decode(encoded_string))

    def pdfToObj(self, src):
        pdfFileObj = open(src, 'rb')
        print(pdfFileObj)
        PDFExtract.pdfReader = PyPDF2.PdfFileReader(pdfFileObj)
        PDFExtract.pageNumber = PDFExtract.pdfReader.numPages 

    @staticmethod
    def getPageNumber():
        return PDFExtract.pageNumber

    def stringProcessing(sentStr):
        sentStr = sentStr.replace('\u200c', '')
        return sentStr

    def wordList(pageNumber):
        if PDFExtract.pdfReader == None or PDFExtract.pageNumber < 1:
            print("No any PDF source")
            exit(-2)

        if pageNumber<0 or pageNumber>PDFExtract.pageNumber:
            print("Page Number Invalid")
            exit(-1)

        #extracts content of the page   
        pageObj = PDFExtract.pdfReader.getPage(pageNumber) 
        extractedText = pageObj.extractText()

        #checks for ascii of प, ा, र  if in Preeti. Requires conversion if true
        if 'k' in extractedText or 'f' in extractedText  or '/' in extractedText:
            extractedText = PDFExtract.convert(extractedText)
        
        extractedText = PDFExtract.stringProcessing(extractedText)
        
        #separation by word using intermediate space
        wordList = (extractedText).split()
        return(wordList)
    
    def sentenceList(self, pageNumber):
        wordList = PDFExtract.wordList(pageNumber)
        sentence = ''
        sentenceList = []

        #separation by sentence using ।, !, ?
        for i in wordList:
            if i == '।':
                sentenceList.append(sentence + '।')
                sentence = ''
            elif i == '!':
                sentenceList.append(sentence + '!')
                sentence = ''
            elif i == '?':
                sentenceList.append(sentence + '?')
                sentence = ''
            else:
                sentence += i + ' '

        if len(sentence) > 0:
            sentenceList.append(sentence)

        return sentenceList

In [11]:
#@title Setup the FastAPI server

#Creating API
from typing import List
from fastapi import FastAPI, UploadFile, File
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import HTMLResponse, StreamingResponse
import aiofiles


app = FastAPI()

app.add_middleware(
    CORSMiddleware,
    allow_credentials=False,
    allow_origins=["*"],
    allow_methods=["*"],
    allow_headers=["*"]
)
@app.post("/uploadfiles/")
async def create_upload_file(files: List[UploadFile] = File(...)):
    for file in files:
        print(file)
        contents = await file.read()
        print(str(contents)[2:-1])
        async with aiofiles.open(file.filename, mode='wb') as f:
            await f.write(contents)
            e1 = PDFExtract()
            e1.pdfToObj(file.filename)
            page = e1.getPageNumber()
            for i in range (page):
                print(e1.sentenceList(i))
                print('\n\n',  i)
                end_to_end_infer(line, pronounciation_dictionary, show_graphs)
    return {'success': 1}


@app.get("/")
async def main():
    """Create a basic home page to upload a file

    :return: HTML for homepage
    :rtype: HTMLResponse
    """

    content = """<body>
          <h4>कथाहरू पठाउनुहोस् र हामी यसलाई आवाज दिनेछौं</h4>
          <form action="/uploadfiles/" enctype="multipart/form-data" method="post">
              <input name="files" type="file" multiple>
              <input type="submit">
          </form>
      </body>
      """
    return HTMLResponse(content=content)


In [14]:
#@title Ngrok Token 
auth_token = "2GKmydjZUq8ropNrnrlchTXeWDF_7JhhvSt3eFHmjp8cbcbrx" #@param {type:"string"}
# Since we can't access Colab notebooks IP directly we'll use
# ngrok to create a public URL for the server via a tunnel

# Authenticate ngrok
# https://dashboard.ngrok.com/signup
# Then go to the "Your Authtoken" tab in the sidebar and copy the API key
import os
os.system(f"ngrok authtoken {auth_token}")

0

In [20]:
from pyngrok import ngrok

# Create tunnel
public_url = ngrok.connect(8000, port='8000', bind_tls=True)

In [None]:
# Check if it exists
!ps aux | grep ngrok

root         739  5.6  0.1 726660 24720 ?        Sl   08:19   0:00 /usr/local/lib/python3.8/dist-packages/pyngrok/bin/ngrok start --none --log=stdout
root         749  0.0  0.0  39208  6584 ?        S    08:19   0:00 /bin/bash -c ps aux | grep ngrok
root         751  0.0  0.0  38580  5020 ?        S    08:19   0:00 grep ngrok


# Run uvicorn server


In [21]:
import nest_asyncio

# Allow for asyncio to work within the Jupyter notebook cell
nest_asyncio.apply()

import uvicorn

# Run the FastAPI app using uvicorn
print(public_url)
uvicorn.run(app)

NgrokTunnel: "https://e332-34-141-204-201.ngrok.io" -> "http://localhost:8000"


INFO:     Started server process [358]
INFO:uvicorn.error:Started server process [358]
INFO:     Waiting for application startup.
INFO:uvicorn.error:Waiting for application startup.
INFO:     Application startup complete.
INFO:uvicorn.error:Application startup complete.
INFO:     Uvicorn running on http://127.0.0.1:8000 (Press CTRL+C to quit)
INFO:uvicorn.error:Uvicorn running on http://127.0.0.1:8000 (Press CTRL+C to quit)


INFO:     103.186.197.218:0 - "GET / HTTP/1.1" 200 OK
INFO:     103.186.197.218:0 - "GET /favicon.ico HTTP/1.1" 404 Not Found
<starlette.datastructures.UploadFile object at 0x7f85a87a4910>
%PDF-1.7\n%\xe2\xe3\xcf\xd3\n14 0 obj\n<<\n/Filter /FlateDecode\n/Length 11\n>>\nstream\nH\x89j\x00\x080\x00\x00\x81\x00\x81\nendstream\nendobj\n15 0 obj\n<<\n/Filter /FlateDecode\n/Length 3683\n/Length1 5356\n>>\nstream\nH\x89T\x95\x7fl\x12i\x1a\xc7\xdfwf\x80\x81N[~S*\xb4L)\xc5\xb1"\xa5\x08\x08\n\x02\xb5\xac\xe5\xb6\xec\xed\xdc\x86\xdcq\x17\x8c\xa8\xe0\x16-n\xd9\x14\x95\xdbp\t\x9a\x9a\xd4M\xbbi\x93\xb2\xd7\xe6lNr\xc7\x1f\\\xd2?\xd6\xa4kj\xae\xf54\xb1w\xf6\xb2\xfd\xa3\xb9\xf3r\xbd\xa4&\xfe\xa1YMl\xcen\xaf\xea\xdc\xbd\xd0\xda\xbd\xcd\x1b2y\xdegf\x98\xef\xf7\xf9<\xcf\x0b \x00\xa0\x06\xfc\n\xe0\xa0\xe3\x83\x9f\x98;\xdb\xbbn\xfd\x01\x80z\n\xed\x86N\r~\xaa{\xb06?\x88\xe2_\xa0x3~!q\x1e\x93\xdc=\t\x80\xf8#\x00\xe0\xdf\x13\xe7.\xc5\x19\xed\xc44\xca\xcd\x01 j:{\xe6\xe4\xe93\x97\x7f\x16\x04@\xba\x1f\xed\xd9\xc

ERROR:    Exception in ASGI application
Traceback (most recent call last):
  File "/usr/local/lib/python3.8/dist-packages/uvicorn/protocols/http/h11_impl.py", line 373, in run_asgi
    result = await app(self.scope, self.receive, self.send)
  File "/usr/local/lib/python3.8/dist-packages/uvicorn/middleware/proxy_headers.py", line 75, in __call__
    return await self.app(scope, receive, send)
  File "/usr/local/lib/python3.8/dist-packages/fastapi/applications.py", line 208, in __call__
    await super().__call__(scope, receive, send)
  File "/usr/local/lib/python3.8/dist-packages/starlette/applications.py", line 112, in __call__
    await self.middleware_stack(scope, receive, send)
  File "/usr/local/lib/python3.8/dist-packages/starlette/middleware/errors.py", line 181, in __call__
    raise exc from None
  File "/usr/local/lib/python3.8/dist-packages/starlette/middleware/errors.py", line 159, in __call__
    await self.app(scope, receive, _send)
  File "/usr/local/lib/python3.8/dist-pa

In [19]:
# Kill tunnel
ngrok.disconnect(public_url=public_url)