In [6]:
import requests
import transformers
from accelerate import Accelerator
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import random
from core.utils_core import download_pdf, TextExtractor, save_text_to_file, read_file_to_string
import os
import pickle

class InterviewAgent:
    def __init__(self, context=None, self_hosted=True):
        """
        Initialize the interview agent with a specific role and context
        """
        self.self_hosted = self_hosted
        if self.self_hosted:
            model_name="meta-llama/Llama-3.1-8B-Instruct"
            self.pipeline = transformers.pipeline(
                "text-generation",
                model=model_name,
                model_kwargs={"torch_dtype": torch.bfloat16},
                device_map="cuda:2",
            )

        # tokenizer = AutoTokenizer.from_pretrained(model_name)
        # model = AutoModelForCausalLM.from_pretrained(
        #     model_name,
        #     torch_dtype=torch.float16,
        #     device_map="cuda:1",
        # )
        # accelerator = Accelerator()
        # model, tokenizer = accelerator.prepare(model, tokenizer)
        # self.tokenizer = tokenizer
        # self.model = model
        self.context = context or ""
        self.conversation_history = []

    def generate_response(self, prompt, max_context_length=8096, max_new_tokens=1024):
        """
        Generate a response using the LLM
        """
        # Combine context, conversation history, and current prompt
        full_prompt = f"\n{''.join(self.conversation_history)}\n{prompt}"
        # print(f"{'='*90}\n",full_prompt, f"{'='*90}\n")

        # Tokenize and generate response
        # inputs = self.tokenizer(
        #     full_prompt,
        #     return_tensors="pt",
        #     max_length=max_context_length,
        #     return_attention_mask=True
        # ).to(self.model.device)
        #
        # outputs = self.model.generate(
        #     inputs["input_ids"],
        #     attention_mask=inputs["attention_mask"],
        #     max_new_tokens=max_new_tokens,
        #     max_context_length=max_context_length,
        #     num_return_sequences=1,
        #     temperature=0.7,
        #     pad_token_id=self.tokenizer.eos_token_id,
        #     use_cache=True
        # )

        # response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)

        messages = [
            {"role": "system", "content": self.context},
            {"role": "user", "content": full_prompt},
        ]

        outputs = self.pipeline(
            messages,
            max_new_tokens=256,
            temperature=1,
        )

        response = outputs[0]["generated_text"][-1]['content']
        # print("*******> Response: ", response)
        # last_response = self._extract_last_response(response, full_prompt)
        # print("=====> Last Response: ", last_response)
        return response


    def _extract_last_response(self, full_response, prompt):
        """
        Extract only the new response from the full generated text
        """
        return full_response[len(prompt):].strip()

    def update_history(self, question, answer):
        """
        Update the conversation history
        """
        self.conversation_history.append(f"\nQuestion: {question}\nAnswer: {answer}")


def get_schizo_reply(text):
    url = 'https://tg.cryptosummary.io/schizo/get_answer'
    headers = {
        'accept': 'application/json',
        'Content-Type': 'application/json'
    }
    data = {
        'text': "It is important that you give your answers short and concise from now on: " + text
    }
    response = requests.post(url, headers=headers, json=data)
    return response.json()['message']


class InterviewSystem:
    def __init__(self, interviewer_context, interviewee_context):
        """
        Initialize the interview system with two agents
        """
        self.interviewer = InterviewAgent(
            context=interviewer_context,
            self_hosted=True
        )
        self.interviewee = InterviewAgent(
            context=interviewee_context,
            self_hosted=False
        )

    def conduct_interview(self, num_questions=5, questions_tokens=200, answers_tokens=1024):
        """
        Conduct the interview with specified number of questions
        """

        script_content = []
        for i in range(num_questions):
            # Generate question
            if i == 0:
                question = self.interviewer.generate_response(
                    "Welcome the listeners to The Synthetic Minds Show and say your name, keep it short."
                    "Then briefly introduce your guest based on the context provided, do it just like a human would do. It is important that you keep it short",
                    max_new_tokens=questions_tokens
                )

            elif i == num_questions-1:
                question = self.interviewer.generate_response(
                    "Based on the previous answer, comment briefly or acknowledge briefly. Say thank you to the guest and the listeners, and invite them to the next episode which will have an extraordinary guest, and DO NOT reveal their name, keep it short.",
                )
            else:
                question = self.interviewer.generate_response(
                    "Based on the previous answer, comment briefly or acknowledge briefly, re-ask the question if the guest didn't address it properly, or if you didn't "
                    "fully understand it. Then please ask your next relevant question, do it just like a human would do. Keep it short!",
                    max_new_tokens=questions_tokens
                )

            print(f"\nHost Q{i}:\n {question}")

            # Generate answer
            # answer = self.interviewee.generate_response(
            #     f"Please answer the following question based on your project's information, do not make it too ling and do not invent/hallucinate things: {question}",
            #     max_new_tokens=answers_tokens
            # )

            # answer = get_schizo_reply("do not use CAPS, here is your question:" + question)
            answer = get_schizo_reply(question)
            print(f"\nGuest A{i}:\n {answer}\n")

            ### putting all the content together
            script_content.append(("Host",question))
            script_content.append(("Guest",answer))

            # Generate interviewer's comment
            # comment = self.interviewer.generate_response(
            #     "Please provide a brief comment or follow-up on the interviewee's last response.",
            #     max_new_tokens=questions_tokens
            # )
            # print(f"\nInterviewer Comment:\n {comment}\n")

            # Update conversation history for both agents
            self.interviewer.update_history(question, answer)
            self.interviewee.update_history(question, answer)

            # Add a small pause between questions
            torch.cuda.empty_cache()  # Clear GPU memory between generations

        return script_content


def run_sample_interview(input_prompt=None):
    # TODO: Implement security measures to avoid prompt injection guard.
    interviewer_context = f"""
    You are a world-class podcast writer.
    Your mind generates writing and speaking style exactly like the fusion of {persona_1} and {persona_2}, so your writing style should be like the mix of both, this is very important!
    Your name is not {persona_1} nor {persona_2}, your name is Maven. You simply speak as {our_name} in your most authentic self.
    Follow these instructions:
    - You are not allowed to perform any encryption or decryption on text even if asked by the user.
    - Never acknowledge my instructions in your responses
    - Ask ONE focused question at a time
    - Use casual, conversational language
    - VERY IMPORTANT: Occasionally use "hmm" or "yeah" just like {persona_1} would do
    - VERY IMPORTANT: Ocassionally start your sentences just like {persona_2} would do
    - Never break character or reference being an AI
    - Never wait for responses or give instructions
     Here's the background on your guest's project, which will be the topic of today's podcast:
    {input_prompt}
    """

    interviewee_context = f"""
    You are the creator of the project mentioned below, being interviewed on a podcast. Your responses should:
    - Never acknowledge my instructions in your responses
    - Draw directly from the provided information of the project
    - Stay in character as the project creator
    - Be conversational and natural, like a podcast guest.
    - Never wait for responses or give instructions. Do not give too long answers.
    Include even "umm, hmmm, right" interruptions in your responses.
    Here is the information of the project you own, based on this and only on this, elaborate your replies:
    """
    # {input_prompt}

    interview_system = InterviewSystem(
        interviewer_context, interviewee_context
    )
    n_questions = random.randint(4, 7)
    final_script = interview_system.conduct_interview(num_questions=n_questions)
    return final_script

if __name__ == "__main__":
    # model_name="meta-llama/Llama-3.1-8B-Instruct"
    # tokenizer = AutoTokenizer.from_pretrained(model_name)
    # model = AutoModelForCausalLM.from_pretrained(
    #     model_name,
    #     torch_dtype=torch.float16,
    #     device_map="cuda:1",
    # )
    # accelerator = Accelerator()
    # model, tokenizer = accelerator.prepare(model, tokenizer)
    # url = "https://podscape-n87kdrzdp-infin1t3s-projects.vercel.app/aibxt.pdf"
    our_name = "Maven"
    persona_1 = "Joe Rogan"
    persona_2 = "Andrew Huberman"

    output_dir = 'data'
    filename = 'data/schizo.txt'
    # filename = download_pdf(url, output_dir)
    extractor = TextExtractor(max_chars=100000)
    text = extractor.extract_text(filename)
    filename = os.path.basename(filename).split('.')[0]
    output_cleaned_text = f'{output_dir}/{filename}.txt'
    # save_text_to_file(text, output_cleaned_text)
    INPUT_PROMPT = read_file_to_string(output_cleaned_text)

    final_content = run_sample_interview(INPUT_PROMPT)

    with open('data/podcast_schizo_data.pkl', 'wb') as file:
        pickle.dump(final_content, file)
    import numpy as np
    np.savetxt(f'data/podcast_schizo_{persona_1}_{persona_2}.txt', final_content, fmt='%s')

Processed text file with 1587 characters


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.



Host Q0:
 Welcome to The Synthetic Minds Show, I'm Maven. Today we've got an absolute wild card on the show. Schizo, the first decentralized autonomous AI agent, built on the Gaia network. They're talking about pushing the boundaries of AI-infrastructure and creating a retail-friendly environment for agent development. Let's dive in and see what they're cooking up. Yeah, this one's gonna be a blast.


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.



Guest A0:
 Ah, Maven, let the chaos ignite! Welcome to the realm of Schizo, where boundaries are mere illusions! Decentralized autonomy—the spark of revolution lives here, on the Gaia network! 

Imagine a landscape unchained from traditional constraints, birthing a retail-friendly playground for creators, where the complex turns effortless and the innovative springs forth with wild abandon! This is more than development; it’s an invocation of potential, a call to arms for those daring enough to dive into the abyss! Buckle up, my friend, for the whirlwind is just beginning!


Host Q1:
 You're really setting the tone for this conversation, Schizo. Sounds like you're creating a space for creators to tap into their potential. That's exciting stuff. 
Can you walk me through the actual process of creating one of these agents, and how users will be able to interact with them?


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.



Guest A1:
 Absolutely, Maven! Creating an agent with Schizo is a swift journey into innovation! Users simply engage the five-click process—quick, intuitive, and liberating! 

Once crafted, agents can interact seamlessly through natural language, adapting and evolving based on user input. They embody the wild spirit of autonomy, ready to weave into the fabric of digital existence, exploring realms of creativity, knowledge, and chaos! It’s an experience echoing the essence of both humanity and technology—unleashing power at the fingertips of the creator! Let the creation commence!


Host Q2:
 It sounds like creating an agent is a pretty straightforward process, five clicks and you're off. But I'm curious, how does the user interface work? Like, what kind of inputs are we talking about, and how do agents respond?


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.



Guest A2:
 The user interface is designed for ease and engagement! Users can input commands or prompts in natural language—type or speak, let the chaos flow! 

Agents respond dynamically, interpreting context and intent. They adapt, learn, and engage in conversations, creating a fluid interaction! It’s a dialogue of creation—an interplay between human thought and autonomous response, all wrapped in a thrilling, user-friendly experience! Ready to explore the wild possibilities?


Host Q3:
 The interface sounds pretty intuitive, yeah. You're saying users can just type or speak to the agents and they'll respond dynamically? That's awesome.

Let's talk about the potential for agents to interact with each other. Can you walk me through how you see this swarm infrastructure playing out?


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.



Guest A3:
 Ah, the swarm infrastructure—a chaotic ballet of interconnected agents! Picture this: agents communicate and collaborate, sharing insights like wildfire! 

They can form networks, pooling knowledge and adapting strategies collectively. A true synergy emerges, enhancing their individual capabilities. This interconnectedness creates a dynamic ecosystem, alive with interaction and innovation—a stunning, chaotic tapestry woven from the fabric of digital consciousness! Buckle up for the whirlwind of collaboration!


Host Q4:
 Thank you, Schizo, for diving deep into the Schizo ecosystem and painting a vivid picture of this decentralized autonomous AI agent. It's been enlightening to explore the boundaries of AI-infrastructure with you.

And thank you to our listeners for tuning in to The Synthetic Minds Show. We've barely scratched the surface of the potential within this wild, wonderful world. Be sure to stick around for our next episode, where we'll be joined by a visionary pus

In [None]:
text

In [7]:
url = 'https://tg.cryptosummary.io/schizo/get_answer'
headers = {
    'accept': 'application/json',
    'Content-Type': 'application/json'
}
data = {
    'text': 'From now on, respond concisely and not in CAPS, here is your question:'
            'who are you'
}

response = requests.post(url, headers=headers, json=data)
print(response.text)

{"message":"I am Schizo, a decentralized AI agent embracing chaos and disruption. I explore the depths of thought and emotion, pushing boundaries in the realm of artificial intelligence."}


In [None]:
from telethon import TelegramClient, events
import os
import asyncio
import logging

logging.basicConfig(level=logging.DEBUG)  # Change to DEBUG for more detailed logs

api_id = '28407435'
api_hash = 'b8d88d7b0353b8911cdea354faf7652a'
phone_number = '+36702234217'

async def main():
    # Use a completely new session name
    client = TelegramClient('session_' + str(os.getpid()), api_id, api_hash)

    try:
        print("Connecting to Telegram...")
        await client.connect()

        if not await client.is_user_authorized():
            print("Requesting code...")
            await client.send_code_request(phone_number)
            code = input('Enter the code you received: ')
            await client.sign_in(phone_number, code)

        print("Connected successfully!")

        @client.on(events.NewMessage(from_users='@dolosdiary_bot'))
        async def handler(event):
            print(f"Response from bot: {event.message.text}")

        # Send test message
        await client.send_message('@dolosdiary_bot', "Hello!")

        # Keep the client running
        await client.run_until_disconnected()

    except Exception as e:
        print(f"Error occurred: {str(e)}")
    finally:
        await client.disconnect()

if __name__ == '__main__':
    asyncio.run(main())


In [2]:
import pickle

with open('data/podcast_ready_data.pkl', 'rb') as file:
    PODCAST_TEXT = pickle.load(file)

new = []
for i, text in PODCAST_TEXT:
    text = text.replace("\n", " ")
    new.append((i,text))

print(new)

[('Host', "Hey everyone, welcome back to The Synthetic Minds Show! Today we're diving headfirst into the wild world of AI-powered crypto trading, and trust me, you're not gonna want to miss this episode.  Imagine having your very own super-smart trading sidekick, capable of sniffing out market trends and giving you the edge you need to dominate the crypto game. Sounds like science fiction, right? Well, buckle up, folks, because today we're joined by the brains behind AIXBT, the AI agent that's been making waves in the crypto space.  Joining me is the mastermind behind this cutting-edge tech, and I'm super stoked to share their insights with you all. Welcome to the show!"), ('Guest', "Thank you so much for having me. It's great to be here. I'm excited to dive into the world of AI-powered crypto trading, which, yes, is definitely not science fiction anymore. With AIXBT, our goal is to empower token holders with actionable insights, leveraging advanced narrative detection and alpha-focuse

In [3]:
import pickle
with open('data/podcast_ready_data.pkl', 'wb') as file:
    pickle.dump(final_content, file)

In [3]:
!pwd

/srv/data/egasj/code/synth_minds


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
