In [1]:
! pip install librosa
! pip install sounddevice



In [9]:
import librosa
import numpy as np
import sounddevice as sd
import random 

## Module 1

In [3]:
def speak_text(text, voice_profile):
    text_splits = text.split(" ")
    print(text_splits)
    
    concatenated_audio = np.array([])
    for i in range(len(text_splits)):
        clip_path = "clips/" + voice_profile + "/" + text_splits[i] + ".wav"
        audio, sr = librosa.load(clip_path, sr=8000)
        concatenated_audio = np.concatenate([concatenated_audio, audio])
    
    sd.play(concatenated_audio, 8000)
    sd.wait()


def speak_text_multiple(text_array, voice_profile):
    grand_concatenated_audio = np.array([])
    for i in range(len(text_array)):
        text = text_array[i]
    
        text_splits = text.split(" ")
        
        concatenated_audio = np.array([])
        for i in range(len(text_splits)):
            clip_path = "clips/" + voice_profile + "/" + text_splits[i] + ".wav"
            audio, sr = librosa.load(clip_path, sr=8000)
            concatenated_audio = np.concatenate([concatenated_audio, audio])
    
        grand_concatenated_audio = np.concatenate([grand_concatenated_audio, np.array([point for point in range(int(0.5 * 8000))]), concatenated_audio])
    
    sd.play(grand_concatenated_audio, 8000)
    sd.wait()

In [4]:
text = "ball is not cat and cat is not ball"
voice_profile = "pradeep"
speak_text(text, voice_profile)

['ball', 'is', 'not', 'cat', 'and', 'cat', 'is', 'not', 'ball']


In [5]:
text_array = [
    "this is a cat", 
    "that is a ball", 
    "cat is not ball", 
    "this cat is not that ball", 
    "ball is not cat and cat is not ball",
    "a ball is not a cat and that is"
]
voice_profile = "pradeep" 
speak_text_multiple(text_array, voice_profile)

## Module 2

In [166]:
def speak_text_with_conjuction_times(text, voice_profile, conjuction_times_with_probabilities):
    conjuction_times = conjuction_times_with_probabilities[0]
    randomization = conjuction_times_with_probabilities[1]
    
    text_splits = text.split(" ")
    
    conjuction_times_list = [0]
    text_splits_with_conjuction_times = ["cnj_0"]
    
    for split in text_splits:
        text_splits_with_conjuction_times.append(split)
        conjuction_time = float(np.random.choice(conjuction_times, p=randomization))
        conjuction_times_list.append(conjuction_time)
        text_splits_with_conjuction_times.append("cnj_" + str(conjuction_time))
    
    conjuction_times_list.pop()
    text_splits_with_conjuction_times.pop()
    
    # Writing out a script that will use conjuction time while doing concatenation.
    concatenated_audio = np.array([])
    for i in range(len(text_splits)):
        clip_path = "clips/" + voice_profile + "/" + text_splits[i] + ".wav"
        audio, sr = librosa.load(clip_path, sr=8000)
        conjuction_time = float(conjuction_times_list[i])
        
        if conjuction_time == 0:
            concatenated_audio = np.concatenate([concatenated_audio, audio])
    
        if conjuction_time > 0:
            concatenated_audio = np.concatenate([concatenated_audio, np.zeros(int(conjuction_time*8000)), audio])
    
        if conjuction_time < 0:
            concatenated_audio = np.concatenate([
                concatenated_audio[:len(concatenated_audio)-int(-conjuction_time*8000)],
                concatenated_audio[len(concatenated_audio)-int(-conjuction_time*8000):] + audio[:int(-conjuction_time*8000)],
                audio[int(-conjuction_time*8000):]
            ])
            
    print(text_splits)
    print(conjuction_times_list)
    print(text_splits_with_conjuction_times)
    
    sd.play(concatenated_audio, 8000)
    sd.wait()

In [171]:
text = "this is a cat and that is not ball"
voice_profile = "pradeep"
conjuction_times_with_probabilities = [[-0.1, 0.1], [0.8, 0.2]]

for i in range(5):
    speak_text_with_conjuction_times(text, voice_profile, conjuction_times_with_probabilities)

['this', 'is', 'a', 'cat', 'and', 'that', 'is', 'not', 'ball']
[0, -0.1, -0.1, 0.1, -0.1, -0.1, 0.1, -0.1, -0.1]
['cnj_0', 'this', 'cnj_-0.1', 'is', 'cnj_-0.1', 'a', 'cnj_0.1', 'cat', 'cnj_-0.1', 'and', 'cnj_-0.1', 'that', 'cnj_0.1', 'is', 'cnj_-0.1', 'not', 'cnj_-0.1', 'ball']
['this', 'is', 'a', 'cat', 'and', 'that', 'is', 'not', 'ball']
[0, -0.1, 0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1]
['cnj_0', 'this', 'cnj_-0.1', 'is', 'cnj_0.1', 'a', 'cnj_-0.1', 'cat', 'cnj_-0.1', 'and', 'cnj_-0.1', 'that', 'cnj_-0.1', 'is', 'cnj_-0.1', 'not', 'cnj_-0.1', 'ball']
['this', 'is', 'a', 'cat', 'and', 'that', 'is', 'not', 'ball']
[0, -0.1, -0.1, -0.1, -0.1, 0.1, -0.1, -0.1, -0.1]
['cnj_0', 'this', 'cnj_-0.1', 'is', 'cnj_-0.1', 'a', 'cnj_-0.1', 'cat', 'cnj_-0.1', 'and', 'cnj_0.1', 'that', 'cnj_-0.1', 'is', 'cnj_-0.1', 'not', 'cnj_-0.1', 'ball']
['this', 'is', 'a', 'cat', 'and', 'that', 'is', 'not', 'ball']
[0, -0.1, 0.1, -0.1, -0.1, -0.1, -0.1, -0.1, 0.1]
['cnj_0', 'this', 'cnj_-0.1', 'is', 'cnj_0.1',

## Module 3