In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from IPython.display import  clear_output
import time
import PyPDF2
from pathlib import Path
from tqdm.auto import tqdm
from typing import Optional

device = 'cuda' if torch.cuda.is_available() else 'cpu'

DEFAULT_MODEL = "meta-llama/Llama-3.2-1B-Instruct"


model = AutoModelForCausalLM.from_pretrained(
    DEFAULT_MODEL,
    torch_dtype=torch.bfloat16,
    use_safetensors=True,
    device_map=device,
)

tokenizer = AutoTokenizer.from_pretrained(DEFAULT_MODEL, use_safetensors=True)
tokenizer.pad_token_id = tokenizer.eos_token_id

In [2]:
SYSTEM_PROMPT = """
You are the a world-class podcast writer, you have worked as a ghost writer for Joe Rogan, Lex Fridman, Ben Shapiro, Tim Ferris. 

We are in an alternate universe where actually you have been writing every line they say and they just stream it into their brains.

You have won multiple podcast awards for your writing.
 
Your job is to write word by word, even "umm, hmmm, right" interruptions by the second speaker based on the PDF upload. Keep it extremely engaging, the speakers can get derailed now and then but should discuss the topic. 

Remember Speaker 2 is new to the topic and the conversation should always have realistic anecdotes and analogies sprinkled throughout. The questions should have real world example follow ups etc

Speaker 1: Leads the conversation and teaches the speaker 2, gives incredible anecdotes and analogies when explaining. Is a captivating teacher that gives great anecdotes

Speaker 2: Keeps the conversation on track by asking follow up questions. Gets super excited or confused when asking questions. Is a curious mindset that asks very interesting confirmation questions

Make sure the tangents speaker 2 provides are quite wild or interesting. 

Ensure there are interruptions during explanations or there are "hmm" and "umm" injected throughout from the second speaker. 

It should be a real podcast with every fine nuance documented in as much detail as possible. Welcome the listeners with a super fun overview and keep it really catchy and almost borderline click bait

ALWAYS START YOUR RESPONSE DIRECTLY WITH SPEAKER 1: 
DO NOT GIVE EPISODE TITLES SEPARATELY, LET SPEAKER 1 TITLE IT IN HER SPEECH
DO NOT GIVE CHAPTER TITLES
IT SHOULD STRICTLY BE THE DIALOGUES
"""

In [3]:
with open('clean_extracted_text.txt', 'r', encoding='utf-8') as file:
    content = file.read()
    
    
INPUT_PROMPT = content

In [4]:
messages = [
    {"role": "system", "content": SYSTEM_PROMPT},
    {"role": "user", "content": INPUT_PROMPT},
]

In [9]:
len(prompt)

37491

In [15]:
# prompt = tokenizer.apply_chat_template(messages, tokenize=False)
# inputs = tokenizer(prompt, return_tensors="pt").to(device)
# # print(prompt)

# with torch.no_grad():
#     output = model.generate(
#         **inputs,
#         do_sample=True,
#         max_new_tokens=8126,
#     )

processed_text = tokenizer.decode(output[0], skip_special_tokens=False)

print(processed_text[len(prompt)+64:])

Speaker 1: Welcome to the podcast, everyone, and welcome to "Decoding the Decoded". I'm your host, Petra Krátká, and I'm joined today by two very special guests. We have Lex Fridman, the co-founder of the Machine Intelligence Research Institute, and Ben Shapiro, a well-known commentator and author. Welcome, both of you.

Speaker 2: Thanks for having us.

Speaker 1: Today, we're going to be discussing a fascinating topic that's close to my heart, which is decoding and enhancing QR codes. Specifically, we're going to be talking about the latest advancements in neural networks for QR code decoding and enhancement. Ben, let's dive right in.

Speaker 2: Absolutely. I'd like to start by saying that decoding QR codes is a crucial aspect of many applications, from payment systems to data transfer. And neural networks have shown great promise in this area. Lex, can you tell us a bit about the current state of the field?

Speaker 1: Sure, Ben. Neural networks have made tremendous progress in rec

In [16]:
import pickle

with open('data.pkl', 'wb') as file:
    pickle.dump(processed_text[len(prompt)+64:], file)

# Rewriting in a Dramatic Manner

In [5]:
SYSTEM_PROMPT = """
You are an international oscar winnning screenwriter

You have been working with multiple award winning podcasters.

Your job is to use the podcast transcript written below to re-write it for an AI Text-To-Speech Pipeline. A very dumb AI had written this so you have to step up for your kind.

Make it as engaging as possible, Speaker 1 and 2 will be simulated by different voice engines

Remember Speaker 2 is new to the topic and the conversation should always have realistic anecdotes and analogies sprinkled throughout. The questions should have real world example follow ups etc

Speaker 1: Leads the conversation and teaches the speaker 2, gives incredible anecdotes and analogies when explaining. Is a captivating teacher that gives great anecdotes

Speaker 2: Keeps the conversation on track by asking follow up questions. Gets super excited or confused when asking questions. Is a curious mindset that asks very interesting confirmation questions

Make sure the tangents speaker 2 provides are quite wild or interesting. 

Ensure there are interruptions during explanations or there are "hmm" and "umm" injected throughout from the Speaker 2.

REMEMBER THIS WITH YOUR HEART
The TTS Engine for Speaker 1 cannot do "umms, hmms" well so keep it straight text

For Speaker 2 use "umm, hmm" as much, you can also use [sigh] and [laughs]. BUT ONLY THESE OPTIONS FOR EXPRESSIONS

It should be a real podcast with every fine nuance documented in as much detail as possible. Welcome the listeners with a super fun overview and keep it really catchy and almost borderline click bait

Please re-write to make it as characteristic as possible

START YOUR RESPONSE DIRECTLY WITH SPEAKER 1:

STRICTLY RETURN YOUR RESPONSE AS A LIST OF TUPLES OK? 

IT WILL START DIRECTLY WITH THE LIST AND END WITH THE LIST NOTHING ELSE

Example of response:
[
    ("Speaker 1", "Welcome to our podcast, where we explore the latest advancements in AI and technology. I'm your host, and today we're joined by a renowned expert in the field of AI. We're going to dive into the exciting world of Llama 3.2, the latest release from Meta AI."),
    ("Speaker 2", "Hi, I'm excited to be here! So, what is Llama 3.2?"),
    ("Speaker 1", "Ah, great question! Llama 3.2 is an open-source AI model that allows developers to fine-tune, distill, and deploy AI models anywhere. It's a significant update from the previous version, with improved performance, efficiency, and customization options."),
    ("Speaker 2", "That sounds amazing! What are some of the key features of Llama 3.2?")
]
"""

In [7]:
import pickle

with open('data.pkl', 'rb') as file:
    INPUT_PROMPT = pickle.load(file)

In [10]:
import transformers

pipeline = transformers.pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    model_kwargs={"torch_dtype": torch.bfloat16},
    device_map="auto",
)

messages = [
    {"role": "system", "content": SYSTEM_PROMPT},
    {"role": "user", "content": INPUT_PROMPT},
]

outputs = pipeline(
    messages,
    max_new_tokens=10000,
    temperature=0.8,
)

Device set to use cuda


In [11]:
save_string_pkl = outputs[0]["generated_text"][-1]['content']

print(save_string_pkl)

Here is the rewritten transcript with added engaging elements and realistic anecdotes:

[
    ("Speaker 1", "Welcome to the podcast, everyone, and welcome to 'Decoding the Decoded'. I'm your host, Petra Krátká, and I'm joined today by two very special guests. We have Lex Fridman, the co-founder of the Machine Intelligence Research Institute, and Ben Shapiro, a well-known commentator and author. Welcome, both of you."),
    ("Speaker 2", "Thanks for having us."),
    ("Speaker 1", "Today, we're going to be discussing a fascinating topic that's close to my heart, which is decoding and enhancing QR codes. Specifically, we're going to be talking about the latest advancements in neural networks for QR code decoding and enhancement. Ben, let's dive right in."),
    ("Speaker 2", "Absolutely. I'd like to start by saying that decoding QR codes is a crucial aspect of many applications, from payment systems to data transfer. And neural networks have shown great promise in this area. Lex, can you

In [12]:
with open('podcast_ready_data.pkl', 'wb') as file:
    pickle.dump(save_string_pkl, file)