<a href="https://colab.research.google.com/github/no-clicks/Chat-GPT-Giphy-Connector/blob/main/TorToiSe_moop_build_v07.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **🐢TorToiSe (Moop's Notebook) v0.7**
[TorToiSe](https://github.com/neonbjb/tortoise-tts) by James Betker. Built off of James's [Original Notebook](https://colab.research.google.com/drive/1wVVqUPqwiDBUVeWWOUNglpGhU3hg_cbR?usp=sharing)

Check out the [Github](https://github.com/no-clicks/TorToiSe-Moops-Colabratory) for this notebook.

How to render:

1.   Make sure Runtime type is set to GPU
2.   Run **Setup**
3.   Run **Available Voices** to get a list of voices in the voice directory.
4.   Run **Voice Settings** - To use a *Single Voice*,  leave Voice Blending blank. To use *Voice Blending,* leave Single Voice blank.
5.   Run **Dialogue** - This supports multiple lines. Separate paragraphs with a double space. Only use periods at the end of a sentence (instead of Dr. write out doctor), elipses is not supported.
6.   Run **Run** - A new directory will be created inside of your drive called TorToiSe. A subdirectory with your project name will reside inside of it. - once completed, you will get a datatable of each sentence rendered with basic information about it.

How to train:
1.   Run **Setup**
2.   Run **Training** - Name the voice before running the Training Step. Upload your voice samples, (no background noise, 5-10 seconds each 15-20 samples seem to work best). A new directory in google drive under TorToise/Voices will be created.
3. After training, the new voice should appear in the Available voices section and can now be used.  




# Setup

In [None]:
#@title ## Installs
%%capture
!pip3 install -U scipy
!git clone https://github.com/jnordberg/tortoise-tts.git
%cd tortoise-tts
!pip3 install transformers==4.19.0
!xargs -n 1 pip3 install < requirements.txt
!python3 setup.py install
!pip install pydub
!pip install rake_nltk

In [None]:
#@title ## Imports
%%capture

import torch
import torchaudio
import torch.nn as nn
import torch.nn.functional as F

import IPython

import nltk
import pandas as pd
import itertools
import os
import shutil
import csv

from IPython.display import clear_output
from pydub import AudioSegment
from nltk.tokenize import sent_tokenize
from rake_nltk import Rake
from google.colab import data_table
from vega_datasets import data
from tortoise.api import TextToSpeech
from tortoise.utils.audio import load_audio, load_voice, load_voices

# This will download all the models used by Tortoise from the HuggingFace hub.
tts = TextToSpeech()

# Download the punkt tokenizer
nltk.download('punkt')

In [None]:
#@title ## Notebook Directories (Google Drive)

from google.colab import drive
drive.mount('/content/drive')
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

# Authenticate and create the PyDrive client.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

# Set the directory paths in Content
contVoiceDir = os.path.join('/content/tortoise-tts/tortoise/voices')

# Set the directory paths in Drive
driveParentDir = os.path.join('/content/drive/MyDrive/TorToiSe')
driveVoiceDir = os.path.join(driveParentDir, 'Voices')
driveProjectDir = os.path.join(driveParentDir, 'Projects')

# Create the Parent directory if it doesn't exist
os.makedirs(driveParentDir, exist_ok=True)
print('Parent directory created at {}'.format(driveParentDir))

# Create the Voices directory if it doesn't exist
os.makedirs(driveVoiceDir, exist_ok=True)
print('Voice directory created at {}'.format(driveVoiceDir))

# Create the Project directory if it doesn't exist
os.makedirs(driveProjectDir, exist_ok=True)
print('Project directory created at {}'.format(driveProjectDir))

# Set the source and destination directories (deliciously modular)
src_dir = driveVoiceDir
dst_dir = contVoiceDir
# Iterate through the directories and their contents in the source directory
for root, dirs, files in os.walk(src_dir):
  # Copy the directories and their contents to the destination
  shutil.copytree(root, os.path.join(dst_dir, root[len(src_dir):]), dirs_exist_ok=True)
print('Directories and their contents copied from {} to {}'.format(src_dir, dst_dir))

Mounted at /content/drive
Parent directory created at /content/drive/MyDrive/TorToiSe
Voice directory created at /content/drive/MyDrive/TorToiSe/Voices
Project directory created at /content/drive/MyDrive/TorToiSe/Projects
Directories and their contents copied from /content/drive/MyDrive/TorToiSe/Voices to /content/tortoise-tts/tortoise/voices


# Options

In [None]:
#@title ## Available Voices

#@markdown Remove default voices?
rem_def = False #@param {type:"boolean"}

if rem_def:
  # Iterate through the directories and files in the scanned directory
  for entry in os.scandir(contVoiceDir):
    # If the entry is a directory, remove it and its contents
    if entry.is_dir():
      shutil.rmtree(entry.path)
    # If the entry is a file, remove it
    else:
      os.unlink(entry.path)

# Set the source and destination directories (deliciously modular)
src_dir = driveVoiceDir
dst_dir = contVoiceDir
# Iterate through the directories and their contents in the source directory
for root, dirs, files in os.walk(src_dir):
  # Copy the directories and their contents to the destination
  shutil.copytree(root, os.path.join(dst_dir, root[len(src_dir):]), dirs_exist_ok=True)
print('Directories and their contents copied from {} to {}'.format(src_dir, dst_dir))

# Get a list of the voices in the directory
dir_list = [d for d in os.listdir(contVoiceDir) if os.path.isdir(os.path.join(contVoiceDir, d))]

# Group the directories into groups of three
dir_groups = [list(g) for k, g in itertools.groupby(dir_list, lambda x: dir_list.index(x) // 6)]

# Create a dataframe to hold the directories
df = pd.DataFrame(columns=['1', '2', '3', '4', '5', '6'])

# Add the groups of directories as rows to the dataframe
for group in dir_groups:
  if len(group) < 6:
    # Add None values to the group if it has less than three elements
    group += [None] * (6 - len(group))
  df = df.append({'1': group[0], '2': group[1], '3': group[2], '4': group[3], '5': group[4], '6': group[5]}, ignore_index=True)

# Display the dataframe
df
# Display the dataframe without the index column
data_table.DataTable(df, include_index=False)

In [None]:
#@title ## Voice Settings
#@markdown Choose a name for the project
project_title = '' #@param {type:"string"}

#@markdown Single voice
voice = '' #@param {type:"string"}

#@markdown Voice blending - Format as ['voice1', 'voice2'] in order for this to work properly.
voices = None #@param {type:"raw"}

#@markdown Voice render quality
preset = 'high_quality' #@param ["ultra_fast", "fast", "standard", "high_quality"]

In [None]:
#@title ## Dialogue
text = """
As the team delves deeper into the enigmatic expanse, they stumble upon an unexpected sight: a doorway that leads to a series of office like rooms.
The rooms are filled with sparse, old furniture, strewn about in nonsensical arrangements.
The contrast between these seemingly mundane office spaces and the bizarre, otherworldly environment outside the rooms is jarring, only adding to the overall mystery of their location.

Doctor Thompson, eager to press on and unravel the secrets of the spatial anomaly, is determined to continue their exploration.
However, Oliver, ever the pragmatic leader, argues that the team should take advantage of the relative safety provided by the rooms and set up camp for the night.
He insists that the team needs to rest and recuperate before venturing further into the unknown.

"""

In [None]:
#@title ## Run
# Set the initial project directory path
driveProjectFile = os.path.join(driveProjectDir, project_title)

# Initialize the version number to 1
version = 1

# Create a new version of the project directory by appending a version number
driveProjectFile_versioned = '{} ({})'.format(driveProjectFile, version)

# Check if the versioned project directory already exists
while os.path.exists(driveProjectFile_versioned):
    # Increment the version number and try again
    version += 1
    driveProjectFile_versioned = '{} ({})'.format(driveProjectFile, version)

# Create the versioned project directory
os.makedirs(driveProjectFile_versioned)
print('Project directory created at {}'.format(driveProjectFile_versioned))

driveProjectFile = driveProjectFile_versioned

# Check if the voice variable is not empty
if voice:
    # Use the load_voice() function
    voice_samples, conditioning_latents = load_voice(voice)
else:
    # Use the load_voices() function
    voice_samples, conditioning_latents = load_voices(voices)

# Split the text into paragraphs
paragraphs = text.split('\n\n')

# Create an empty list to store the data
data = []

# Initialize the paragraph count to 0
paragraph_count = 0

# Initialize the sentence count to 0
sentence_count = 0

# Initialize the audio length to 0

# Set the list of stop words
stop_words = ['the', 'and', 'a', 'an', 'of', 'in']

# Create an instance of the Rake class
rake = Rake(stop_words)

# Loop through the paragraphs
for paragraph in paragraphs:
  # Extract the key phrases from the paragraph
  rake.extract_keywords_from_text(paragraph)
  # Get the list of tuples containing the key phrases and their scores
  keyphrases_with_scores = rake.get_ranked_phrases_with_scores()
  # Sort the list in descending order based on the scores
  sorted_keyphrases_with_scores = sorted(keyphrases_with_scores, key=lambda x: x[1], reverse=True)
  # Select the first key phrase
  if sorted_keyphrases_with_scores:
      keyphrase = sorted_keyphrases_with_scores[0][1]
  else:
      keyphrase = ""

  paragraph_count += 1
  total_audio_length = 0

  # Split the paragraph into sentences
  sentences = nltk.sent_tokenize(paragraph)
  
  # Loop through the sentences in the paragraph
  for i in range(len(sentences)):
      # Get the sentence
      sentence = sentences[i]
      #Add to p counter
      sentence_count += 1

      # Synthesize the sentence using Tortoise TTS
      gen = tts.tts_with_preset(sentence, voice_samples=voice_samples, conditioning_latents=conditioning_latents, preset=preset)

      # Split the sentence into words and keep only the alphanumeric words
      words = [word for word in sentence.split() if word.isalnum()]

      # Join the first two words with an underscore
      file_name_prefix = '_'.join(words[:3])

      # Generate the file name for the wav file
      file_name = '{:03d}_{:02d}-{}.wav'.format(sentence_count, paragraph_count, file_name_prefix)

      # Set the file path to the output directory
      file_path = os.path.join(driveProjectFile, file_name)

      # Save the wav file
      torchaudio.save(file_path, gen.squeeze(0).cpu(), 24000)

      # Read the audio file as a tensor
      waveform, sample_rate = torchaudio.load(file_path)

      # Get the length of the audio file in samples
      num_samples = waveform.size(1)

      # Calculate the length of the audio file in seconds
      audio_length = num_samples / sample_rate
      audio_length = round(audio_length, 2)
      total_audio_length += audio_length + 0.6
  
  
      # Add the sentence, audio length, file name, and paragraph count to the data list
      data.append({'Clip': sentence_count,'Paragraph': paragraph_count, 'Sentence': sentence, 'Keyphrase': keyphrase,'File Name': file_name, 'Audio Length (s)': audio_length, 'Paragraph Audio Length': total_audio_length})
    
      # Clear the output
      from IPython.display import clear_output
      clear_output()

# Create a dataframe from the data
df = pd.DataFrame(data, columns=['Clip', 'Paragraph', 'Sentence', 'Keyphrase', 'File Name', 'Audio Length (s)', 'Paragraph Audio Length'])
csvFilename = 'voice data.csv'
csvPath = os.path.join(driveProjectFile, csvFilename)
df.to_csv(csvPath, index=False)
# Display the dataframe without the index column
data_table.DataTable(df, include_index=False)

# Training

In [None]:
# Optionally, upload use your own voice by running the next two cells. I recommend
# you upload at least 2 audio clips. They must be a WAV file, 6-10 seconds long.
CUSTOM_VOICE_NAME = '' #@param {type:"string"}

import os
from google.colab import files

custom_voice_folder = f"/content/drive/MyDrive/TorToiSe/Voices/{CUSTOM_VOICE_NAME}"
os.makedirs(custom_voice_folder)
for i, file_data in enumerate(files.upload().values()):
  with open(os.path.join(custom_voice_folder, f'{i}.wav'), 'wb') as f:
    f.write(file_data)

# Create the Parent directory if it doesn't exist
os.makedirs(driveParentDir, exist_ok=True)
print('New voice directory created at {}'.format(custom_voice_folder))