In [1]:
import os
from IPython.display import Audio
import nltk  # we'll use this to split into sentences
import numpy as np

from bark.generation import (
    generate_text_semantic,
    preload_models,
)
from bark.api import semantic_to_waveform
from bark import generate_audio, SAMPLE_RATE


In [2]:
preload_models()

In [5]:
script = open("outputs/silvio/scriptwriter_out.txt", "r").readlines()

# Pull out scene lines remove quotes and special characters
scene_lines = [line if line.startswith('[Scene') else '' for line in script]
scene_lines = [line for line in scene_lines if line != '']
scene_lines = [line[13:-2] for line in scene_lines]
scene_lines

['A simple, somber room with a portrait of Silvio Berlusconi on the wall. The camera slowly zooms in on the portrait.',
 "A montage of clips showcasing Berlusconi's political career and business achievements. Images of newspaper headlines detailing his scandals are interspersed.",
 'Footage of lowered flags across Italy and Europe. A crowd gathered for the funeral at Milan Cathedral.',
 'A series of photos and clips of world leaders offering tributes.',
 "Clips of Berlusconi's center-right party, Forza Italia, and its activities under Giorgia Meloni's leadership. Images of Berlusconi's public appearances before his death."]

In [7]:
script_lines = [line if not line.startswith('[Scene') else "[SCENE]." for line in script]
script_lines = [line for line in script_lines if line.strip()]
script_lines

['[SCENE].',
 'NARRATOR: "Silvio Berlusconi, the four-time Prime Minister of Italy, despite his numerous scandals, has died at the age of 86. He passed away at the San Raffaele hospital in Milan. His health had been deteriorating, and he was suffering from a rare form of blood cancer. However, the precise cause of his death remains unconfirmed."\n',
 '[SCENE].',
 'NARRATOR: "Berlusconi was the longest-serving Prime Minister in post-war Italy, from 1994 to 2011. His reign was not without controversy - sex scandals, corruption cases, and more. Yet before his political career, he was a successful businessman, owning television networks, publishing companies, and even saving the legendary football club, AC Milan, from bankruptcy."\n',
 '[SCENE].',
 'NARRATOR: "Berlusconi\'s passing has left a significant void in Italy, and the government declared a national day of mourning. His funeral was held at Milan Cathedral, with flags lowered to half-mast in his honor across Italy and Europe."\n',
 

In [8]:
script = "\n".join(script_lines)
script = script.replace("\n", " ").strip()
sentences = nltk.sent_tokenize(script)
script

'[SCENE]. NARRATOR: "Silvio Berlusconi, the four-time Prime Minister of Italy, despite his numerous scandals, has died at the age of 86. He passed away at the San Raffaele hospital in Milan. His health had been deteriorating, and he was suffering from a rare form of blood cancer. However, the precise cause of his death remains unconfirmed."  [SCENE]. NARRATOR: "Berlusconi was the longest-serving Prime Minister in post-war Italy, from 1994 to 2011. His reign was not without controversy - sex scandals, corruption cases, and more. Yet before his political career, he was a successful businessman, owning television networks, publishing companies, and even saving the legendary football club, AC Milan, from bankruptcy."  [SCENE]. NARRATOR: "Berlusconi\'s passing has left a significant void in Italy, and the government declared a national day of mourning. His funeral was held at Milan Cathedral, with flags lowered to half-mast in his honor across Italy and Europe."  [SCENE]. NARRATOR: "Tribute

In [9]:
from math import ceil

GEN_TEMP = 0.8447
SPEAKER = "v2/en_speaker_6"
silence = np.zeros(int(0.25 * SAMPLE_RATE))  # quarter second of silence

pieces = []
scenes = {}
length_so_far = 0
for sentence in sentences:
    if sentence == "[SCENE].":
        timecode = ceil(sum([len(p)/SAMPLE_RATE for p in pieces]))
        scenes[timecode] = scene_lines[len(scenes)]
        continue
    semantic_tokens = generate_text_semantic(
        sentence,
        history_prompt=SPEAKER,
        temp=GEN_TEMP,
        min_eos_p=0.05,  # this controls how likely the generation is to end
    )

    audio_array = semantic_to_waveform(semantic_tokens, history_prompt=SPEAKER,)
    pieces += [audio_array, silence.copy()]


100%|███████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:08<00:00, 11.33it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████| 22/22 [00:09<00:00,  2.43it/s]
100%|███████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:02<00:00, 37.08it/s]
100%|███████████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:03<00:00,  2.45it/s]
100%|███████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:14<00:00,  6.86it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:13<00:00,  2.44it/s]
100%|███████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:07<00:00, 14.09it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████| 19/19 [00:07<00

In [10]:
audio_arr = np.concatenate(pieces)

In [11]:
Audio(audio_arr, rate=SAMPLE_RATE)

In [12]:
import numpy as np
int_audio_arr = (audio_arr * np.iinfo(np.int16).max).astype(np.int16)

# save as wav
from scipy.io import wavfile
wavfile.write("outputs/silvio/voiceover_out.wav", SAMPLE_RATE, int_audio_arr)


In [14]:
import json
# Write scene timecodes
with open("outputs/silvio/voiceover_timecodes.json", "w") as f:
    f.write(json.dumps(scenes))

timecodes = open("outputs/silvio/voiceover_timecodes.json", "r").readlines()
timecodes

['{"0": "A simple, somber room with a portrait of Silvio Berlusconi on the wall. The camera slowly zooms in on the portrait.", "34": "A montage of clips showcasing Berlusconi\'s political career and business achievements. Images of newspaper headlines detailing his scandals are interspersed.", "61": "Footage of lowered flags across Italy and Europe. A crowd gathered for the funeral at Milan Cathedral.", "82": "A series of photos and clips of world leaders offering tributes.", "103": "Clips of Berlusconi\'s center-right party, Forza Italia, and its activities under Giorgia Meloni\'s leadership. Images of Berlusconi\'s public appearances before his death."}']