Connecting Model to Gradio

1. Image classification (pytorch)

Step 1: Setting up the Image Classification Model

In [None]:
#import torch
#model = torch.hub.load('pytorch/vision:v0.6.0', 'resnet18', pretrained=True).eval()

Step 2: Defining a predict function

In [None]:
import requests
from PIL import Image
from torchvision import transforms

# Download human-readable labels for ImageNet.
response = requests.get("https://git.io/JJkYN")
labels = response.text.split("\n")

def predict(inp):
  inp = transforms.ToTensor()(inp).unsqueeze(0)
  with torch.no_grad():
    prediction = torch.nn.functional.softmax(model(inp)[0], dim=0)
    confidences = {labels[i]: float(prediction[i]) for i in range(1000)}    
  return confidences

Step 3: Creating a Gradio Interface 

In [None]:
gr.Interface(fn=predict, 
             inputs=gr.Image(type="pil"),
             outputs=gr.Label(num_top_classes=3),
             examples=["lion.jpg", "cheetah.jpg"]).launch()

2. Image Segmentation : see it in colab

3. AnimeGAN image transformation demo: see it in colab

4. Text to Speech

In [None]:
pip install neon_tts_plugin_coqui

In [None]:
import tempfile
import neon_tts_plugin_coqui
from neon_tts_plugin_coqui import CoquiTTS

LANGUAGES = list(CoquiTTS.langs.keys())
coquiTTS = CoquiTTS()

def tts(text: str, language: str):
    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
        coquiTTS.get_tts(text, fp, speaker = {"language" : language})
        return fp.name

inputs = [gr.Textbox(label="Input", value=CoquiTTS.langs["en"]["sentence"], max_lines=3), 
            gr.Radio(label="Language", choices=LANGUAGES, value="en")]
outputs = gr.Audio(label="Output")

demo = gr.Interface(fn=tts, inputs=inputs, outputs=outputs)

demo.launch()

5. speach to text

In [None]:

import os

# save your HF API token from https:/hf.co/settings/tokens as an env variable to avoid rate limiting
auth_token = os.getenv("auth_token")

# automatically load the interface from a HF model 
# you can remove the api_key parameter if you don't care about rate limiting. 
demo = gr.load(
    "huggingface/facebook/wav2vec2-base-960h",
    title="Speech-to-text",
    inputs="mic",
    description="Let me try to guess what you're saying!",
    hf_token=auth_token
)

demo.launch()


6. Text generation

In [None]:
from transformers import pipeline

generator = pipeline('text-generation', model='gpt2')

def generate(text):
    result = generator(text, max_length=30, num_return_sequences=1)
    return result[0]["generated_text"]

examples = [
    ["The Moon's orbit around Earth has"],
    ["The smooth Borealis basin in the Northern Hemisphere covers 40%"],
]

demo = gr.Interface(
    fn=generate,
    inputs=gr.inputs.Textbox(lines=5, label="Input Text"),
    outputs=gr.outputs.Textbox(label="Generated Text"),
    examples=examples
)

demo.launch()