<a href="https://colab.research.google.com/github/mrkim21/mrkim21.github.io/blob/main/appfolder/appcodes/240202_tts_pitch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 🌀 TTS with pitch contour

[applink](https://mrkim21.github.io/appfolder/tts-pitch.html)

In [None]:
!pip install gradio librosa matplotlib gtts

In [None]:
import gradio as gr
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
from gtts import gTTS
import io
import os

# This function is adapted to work with Gradio
def generate_speech(text, lang='en'):
    tts = gTTS(text=text, lang=lang)
    audio_fp = io.BytesIO()
    tts.write_to_fp(audio_fp)
    audio_fp.seek(0)
    output_file = 'output.wav'
    with open(output_file, "wb") as f:
        f.write(audio_fp.getvalue())
    return output_file

def plot_pitch_contour(audio_file_path):
    y, sr = librosa.load(audio_file_path, sr=None)
    fmin = librosa.note_to_hz('C2')
    fmax = librosa.note_to_hz('C6')
    pitch, voiced_flag, voiced_probs = librosa.pyin(y, fmin=fmin, fmax=fmax, sr=sr)
    pitch[~np.isfinite(pitch)] = 0

    # Create time axis in seconds
    times = np.arange(len(y)) / sr

    plt.figure(figsize=(14, 5))
    plt.plot(times, y, label='Waveform')
    plt.plot(librosa.times_like(pitch), pitch, 'ro', label='Pitch', markersize=2)
    plt.title('Pitch Contour')
    plt.xlabel('Time (s)')
    plt.ylabel('Frequency (Hz)')
    plt.legend()
    plt.ylim(50, 350)  # Adjust the y-axis limits to the range of pitch values
    plt.savefig('pitch_contour.png')
    plt.close()
    return 'pitch_contour.png'


def generate_and_plot(text, lang):
    audio_file = generate_speech(text, lang)
    plot_img = plot_pitch_contour(audio_file)
    return audio_file, plot_img

# Gradio interface
iface = gr.Interface(fn=generate_and_plot,
                     inputs=[gr.Textbox(label="Enter Text"), gr.Radio(['en', 'ko'], label="Language")],
                     outputs=[gr.Audio(label="Generated Speech"), gr.Image(label="Pitch Contour")],
                     title="Speech Generation and Pitch Contour Visualization",
                     description="Generates speech from text and visualizes the pitch contour. Select a language and enter text to see the results.")

iface.launch(share=True)
