# Environment Set-up


On Touch designer you can create a conda environment and link the TD interpretter to that environment.
Below are the packages we need to install: see here for more guidance - https://conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html#creating-an-environment-with-commands

In [None]:
#conda create -n audiogen python=3.7.2 
#conda activate audiogen
!pip install -q numpy pandas requests transformers streamz

Downloading the voice synthesis models

In [None]:
!gdown --id 1NFsfhH8W8AgcfJ-BsL8CYAwQfZ5k4T-n -O tts_model.pth.tar
!gdown --id 1IAROF3yy9qTK43vG_-R67y3Py9yYbD6t -O config.json
!gdown --id 1Ty5DZdOc0F7OTGj9oJThYbL5iVu_2G0K -O vocoder_model.pth.tar
!gdown --id 1Rd0R_nRCrbjEdpOwq6XwZAktvugiBvmu -O config_vocoder.json
!gdown --id 11oY3Tv0kQtxK_JPgxrfesa99maVXHNxU -O scale_stats_vocoder.npy

In [None]:
!sudo apt-get install espeak
!git clone https://github.com/coqui-ai/TTS TTS_repo
%cd TTS_repo
!git checkout 4132240
!pip install -r requirements.txt
!pip install numpy==1.19.5
!python setup.py develop
%cd ..

# Weather Data API


In [None]:
from streamz.dataframe import PeriodicDataFrame
import operator as op
import numpy as np
import pandas as pd
import requests
import param

openweathermap_api_key='YOUR_TOKEN'

def weather_data(city, openweathermap_api_key=openweathermap_api_key):
    """
    Get weather data for a list of cities using the openweathermap API
    parameters: 
    city(str): Name of city from which current data is fetched
    """
    data = {}
    res = requests.get(f'http://api.openweathermap.org/data/2.5/weather?q={city}&appid={openweathermap_api_key}&units=metric')
    weather = res.json()
    data['Lat'] = weather.get('coord',{}).get('lat',0)
    data['Lon'] = weather.get('coord',{}).get('lon',0)
    data['Temperature'] = weather.get('main',{}).get('temp',0) # Temperature. Unit Default: Kelvin, Metric: Celsius
    data['Temperature Max'] = weather.get('main',{}).get('temp_max',0) # Maximum temperature at the moment. This is maximal currently observed temperature (within large megalopolises and urban areas).
    data['Temperature Min'] = weather.get('main',{}).get('temp_min',0) # Minimum temperature at the moment. This is minimal currently observed temperature (within large megalopolises and urban areas)
    data['Feels Like'] = weather.get('main',{}).get('feels_like',0) # Temperature. This temperature parameter accounts for the human perception of weather. 
    data['Visibility'] = weather.get('visibility',0) # Visibility, meter
    data['Humidity'] = weather.get('main',{}).get('humidity',0) # Humidity, %
    data['Pressure'] = weather.get('main',{}).get('pressure',0) # Atmospheric pressure (on the sea level, if there is no sea_level or grnd_level data), hPa
    data['Wind Speed'] = weather.get('wind',{}).get('speed',0) # Wind speed. Unit Default: meter/sec, Metric: meter/sec,
    data['Wind Gust'] = weather.get('main',{}).get('gust',0) # Wind direction, degrees (meteorological)
    data['Wind Deg'] = weather.get('clouds',{}).get('deg',0) # Wind gust. Unit Default: meter/sec, Metric: meter/sec, Imperial: miles/hour
    data['Clouds'] = weather.get('clouds',{}).get('all',0) # Cloudiness, %
    data['Snow 1h'] = weather.get('snow',{}).get('1h',0) # Rain volume for the last 1 hour, mm
    data['Snow 3h'] = weather.get('snow',{}).get('3h',0) # Rain volume for the last 3 hours, mm
    data['Rain 1h'] = weather.get('rain',{}).get('1h',0) #  Snow volume for the last 1 hour, mm
    data['Rain 3h'] = weather.get('rain',{}).get('3h',0) # Snow volume for the last 3 hours, mm
    data['weather'] = weather.get('weather',{})[0].get('main',0) # Group of weather parameters (Rain, Snow, Extreme etc.)
    data['weather_desc'] = weather.get('weather',{})[0].get('description',0) # Weather condition within the group.
    data['time'] = pd.Timestamp.now()
    return data

def streaming_weather_data(**kwargs):
    """
    callback function 
    get London weather data 
    """
    df = weather_data('London', index=[0])
    df['time'] = [pd.Timestamp.now()]
    return df.set_index('time')

## Make a Single Call to retrieve weather Data

In [None]:
weather_data('London')

## Generate a stream of wheather data based on the time interval you want

In [None]:
df = PeriodicDataFrame(streaming_weather_data, interval='1s')

# Set-up Generative Text Model

We are downloading an open source version of OpenAI's GPT3 called GPT Neo - This is a 5.31GB model so takes a bit of time to download locally. It also takes quite a while to load into memory

In [None]:
from transformers import GPTNeoForCausalLM, GPT2Tokenizer
from datetime import datetime as dt

def suffix(d):
    return 'th' if 11<=d<=13 else {1:'st',2:'nd',3:'rd'}.get(d%10, 'th')

def custom_strftime(format, t):
    return t.strftime(format).replace('{S}', str(t.day) + suffix(t.day))

model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B")
tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B")

Interestingly - the generative model takes a temperature argument - this is to calibrate how conservative or how out of the box it can strat creating sentences, we dynamically generate our temperature input by taking the division of the min and max temperature in our input city

In [None]:
city = 'London'

In [None]:
weatherData = weather_data(city)
input_temperature = weatherData.get("Temperature Min") / weatherData.get("Temperature Max")
print(input_temperature)

On CPU this can take up to 4 minutes to generate

In [None]:
# The prompt is formatted using the current date and the current weather for thegiven input location,
# it is the starting sentence that is used by the generative text model 
prompt = f"Welcome to the generative audio project, we are in {city} and it's the {custom_strftime('{S} %B %Y', dt.now())} and today we're going to talk about"

input_ids = tokenizer(prompt, return_tensors="pt").input_ids

gen_tokens = model.generate(input_ids, do_sample=True, temperature=input_temperature, max_length=250,)
gen_text = tokenizer.batch_decode(gen_tokens)[0]

In [None]:
gen_text

# Audio Generaton

In [None]:
def interpolate_vocoder_input(scale_factor, spec):
    """Interpolation to tolarate the sampling rate difference
    btw tts model and vocoder"""
    print(" > before interpolation :", spec.shape)
    spec = torch.tensor(spec).unsqueeze(0).unsqueeze(0)
    spec = torch.nn.functional.interpolate(spec, scale_factor=scale_factor, mode='bilinear').squeeze(0)
    print(" > after interpolation :", spec.shape)
    return spec


def tts(model, text, CONFIG, use_cuda, ap, use_gl, figures=True):
    t_1 = time.time()
    # run tts
    target_sr = CONFIG.audio['sample_rate']
    waveform, alignment, mel_spec, mel_postnet_spec, stop_tokens, inputs =\
     synthesis(model,
               text,
               CONFIG,
               use_cuda,
               ap,
               speaker_id,
               None,
               False,
               CONFIG.enable_eos_bos_chars,
               use_gl)
    # run vocoder
    mel_postnet_spec = ap._denormalize(mel_postnet_spec.T).T
    if not use_gl:
        target_sr = VOCODER_CONFIG.audio['sample_rate']
        vocoder_input = ap_vocoder._normalize(mel_postnet_spec.T)
        if scale_factor[1] != 1:
            vocoder_input = interpolate_vocoder_input(scale_factor, vocoder_input)
        else:
            vocoder_input = torch.tensor(vocoder_input).unsqueeze(0)
        waveform = vocoder_model.inference(vocoder_input)
    # format output
    if use_cuda and not use_gl:
        waveform = waveform.cpu()
    if not use_gl:
        waveform = waveform.numpy()
    waveform = waveform.squeeze()
    # compute run-time performance
    rtf = (time.time() - t_1) / (len(waveform) / ap.sample_rate)
    tps = (time.time() - t_1) / len(waveform)
    print(waveform.shape)
    print(" > Run-time: {}".format(time.time() - t_1))
    print(" > Real-time factor: {}".format(rtf))
    print(" > Time per step: {}".format(tps))
    # display audio
    IPython.display.display(IPython.display.Audio(waveform, rate=target_sr))  
    return alignment, mel_postnet_spec, stop_tokens, waveform

In [None]:
import sys
import os
import torch
import time
import IPython

# for some reason TTS installation does not work on Colab
sys.path.append('TTS_repo')

from TTS.utils.io import load_config
from TTS.utils.audio import AudioProcessor
from TTS.tts.utils.generic_utils import setup_model
from TTS.tts.utils.text.symbols import symbols, phonemes
from TTS.tts.utils.synthesis import synthesis
from TTS.tts.utils.io import load_checkpoint
from TTS.vocoder.utils.generic_utils import setup_generator

# runtime settings
use_cuda = False

# model paths
TTS_MODEL = "tts_model.pth.tar"
TTS_CONFIG = "config.json"
VOCODER_MODEL = "vocoder_model.pth.tar"
VOCODER_CONFIG = "config_vocoder.json"

# load configs
TTS_CONFIG = load_config(TTS_CONFIG)
VOCODER_CONFIG = load_config(VOCODER_CONFIG)

# TTS_CONFIG.audio['stats_path'] = "./scale_stats.npy"
VOCODER_CONFIG.audio['stats_path'] = "./scale_stats_vocoder.npy"

# load the audio processor
ap = AudioProcessor(**TTS_CONFIG.audio)   

In [None]:
# LOAD TTS MODEL
# multi speaker 
speakers = []
speaker_id = None
    
#if 'characters' in TTS_CONFIG.keys():
#    symbols, phonemes = make_symbols(**c.characters)

# load the model
num_chars = len(phonemes) if TTS_CONFIG.use_phonemes else len(symbols)
model = setup_model(num_chars, len(speakers), TTS_CONFIG)      

# load model state
model, _ =  load_checkpoint(model, TTS_MODEL, use_cuda=use_cuda)
model.eval();
model.store_inverse();

In [None]:
# LOAD VOCODER MODEL
vocoder_model = setup_generator(VOCODER_CONFIG)
vocoder_model.load_state_dict(torch.load(VOCODER_MODEL, map_location="cpu")["model"])
vocoder_model.remove_weight_norm()
vocoder_model.inference_padding = 0

# scale factor for sampling rate difference
scale_factor = [1,  VOCODER_CONFIG['audio']['sample_rate'] / ap.sample_rate]
print(f"scale_factor: {scale_factor}")

ap_vocoder = AudioProcessor(**VOCODER_CONFIG['audio'])    
if use_cuda:
    vocoder_model.cuda()
vocoder_model.eval();

In [None]:
model.length_scale = 1.0  # set speed of the speech. 
model.noise_scale = 0.33  # set speech variationd

# gen_text =  "Bill got in the habit of asking himself “Is that thought true?” and if he wasn’t absolutely certain it was, he just let it go."
align, spec, stop_tokedns, wav = tts(model, gen_text, TTS_CONFIG, use_cuda, ap, use_gl=False, figures=True)