In [11]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [12]:
import openai
import os
import requests
import numpy as np
import pandas as pd
from typing import Iterator
import tiktoken
import textract
from numpy import array, average
from dotenv import load_dotenv

from database import get_redis_connection

# Set our default models and chunking size
from config import COMPLETIONS_MODEL, EMBEDDINGS_MODEL, CHAT_MODEL, TEXT_EMBEDDING_CHUNK_SIZE, VECTOR_FIELD_NAME

# Ignore unclosed SSL socket warnings - optional in case you get these errors
import warnings

warnings.filterwarnings(action="ignore", message="unclosed", category=ImportWarning)
warnings.filterwarnings("ignore", category=DeprecationWarning) 

load_dotenv()

True

In [13]:
pd.set_option('display.max_colwidth', 0)

In [14]:
data_dir = os.path.join(os.curdir,'data')
pdf_files = sorted([x for x in os.listdir(data_dir) if 'DS_Store' not in x])
pdf_files

['GptVerse.pdf', 'ugur_akyel_20190808020_project_report.pdf']

In [15]:
# Setup Redis
from redis import Redis
from redis.commands.search.query import Query
from redis.commands.search.field import (
    TextField,
    VectorField,
    NumericField
)
from redis.commands.search.indexDefinition import (
    IndexDefinition,
    IndexType
)

redis_client = get_redis_connection()

In [16]:
# Constants
VECTOR_DIM = 1536 #len(data['title_vector'][0]) # length of the vectors
#VECTOR_NUMBER = len(data)                 # initial number of vectors
PREFIX = "gptversedoc"                            # prefix for the document keys
DISTANCE_METRIC = "COSINE"                # distance metric for the vectors (ex. COSINE, IP, L2)

In [17]:
# Create search index

# Index
INDEX_NAME = "f1-index"           # name of the search index
VECTOR_FIELD_NAME = 'content_vector'

# Define RediSearch fields for each of the columns in the dataset
# This is where you should add any additional metadata you want to capture
filename = TextField("filename")
text_chunk = TextField("text_chunk")
file_chunk_index = NumericField("file_chunk_index")

# define RediSearch vector fields to use HNSW index

text_embedding = VectorField(VECTOR_FIELD_NAME,
    "HNSW", {
        "TYPE": "FLOAT32",
        "DIM": VECTOR_DIM,
        "DISTANCE_METRIC": DISTANCE_METRIC
    }
)
# Add all our field objects to a list to be created as an index
fields = [filename,text_chunk,file_chunk_index,text_embedding]

In [18]:
redis_client.ping()

True

In [19]:
# Optional step to drop the index if it already exists
#redis_client.ft(INDEX_NAME).dropindex()

# Check if index exists
try:
    redis_client.ft(INDEX_NAME).info()
    print("Index already exists")
except Exception as e:
    print(e)
    # Create RediSearch Index
    print('Not there yet. Creating')
    redis_client.ft(INDEX_NAME).create_index(
        fields = fields,
        definition = IndexDefinition(prefix=[PREFIX], index_type=IndexType.HASH)
    )

Index already exists


In [20]:
# The transformers.py file contains all of the transforming functions, including ones to chunk, embed and load data
# For more details, check the file and work through each function individually
from transformers import handle_file_string

In [21]:
from dotenv import load_dotenv
import os

load_dotenv()

openai_api_key = os.getenv('T_OPENAI_API_KEY')
os.environ['OPENAI_API_KEY'] = openai_api_key

In [22]:
%%time
# This step takes about 5 minutes
openai.api_key = openai_api_key
# Initialise tokenizer
tokenizer = tiktoken.get_encoding("cl100k_base")

# Process each PDF file and prepare for embedding
for pdf_file in pdf_files:
    
    pdf_path = os.path.join(data_dir,pdf_file)
    print(pdf_path)
    
    # Extract the raw text from each PDF using textract
    text = textract.process(pdf_path, method='pdfminer')
    
    # Chunk each document, embed the contents and load to Redis
    handle_file_string((pdf_file,text.decode("utf-8")),tokenizer,redis_client,VECTOR_FIELD_NAME,INDEX_NAME)

./data/GptVerse.pdf
./data/ugur_akyel_20190808020_project_report.pdf
CPU times: user 784 ms, sys: 68.7 ms, total: 852 ms
Wall time: 10.3 s


In [23]:
# Check that our docs have been inserted
redis_client.ft(INDEX_NAME).info()['num_docs']

'140'

In [24]:
from database import get_redis_results

In [25]:
f1_query='What is the gptverse platform'

result_df = get_redis_results(redis_client,f1_query,index_name=INDEX_NAME)
result_df

Unnamed: 0,id,result,certainty
0,0,"Filename is: GptVerse.pdf; GptVerse is an artificial intelligence based Metaverse application with shopping, education, organization and p2e features. It will be a multi-chain project but the main network has not been decided yet. Here is a list of the main solutions, as follows in the first place: -Innovative Product Proposal (AI Based 3D Objects) Quickly transfer photos of real world objects that are taken from different angles to the 3D Metaverse environment by passing them through special AI and machine learning algorithms such as NERF. Users and brands can list and sell them on user stores as NFTs. -AI Based Event & Educational Areas Bots with machine learning that can educate users on various topics. There is also a bot that will organize events like concerts, exhibitions and help users have fun while having a good time. -OpenAI Integrated & Conversational AI Assistant Bots AI bots assist the user when they enter the GptVerse to find the place the user is looking for. They also assist customers as store assistants with Conversational AI features. Other features of bot: Event hall organization planner, the bot that finds the song when you hum. -Metaverse Store Solutions Besides most known NFT stores, automated and innovative NFT stores which users & brands can design using our builder editor. Objects and structures that users can create verbally or in writing in GptVerse. -Common metaverse usage (conference hall, concert venue, playground) Areas where users can spend valuable time by making use of known metaverse features. -Play to Earn Games & Tournaments Every user can play games and earn tokens. Bowling, billiards, chess and many more table games will be integrated. —---------------------------------------------------------------------------------------- GptVerse is a unique and innovative application that combines AI-based technology and Metaverse features to create a variety of solutions for users and brands.",0.140639543533
1,1,"This allows GptVerse to deliver highly interactive and personalized experiences for users across various domains, including events, education, shopping, and more. 2. Comprehensive Metaverse Integration: GptVerse goes beyond being just a standalone application by offering a fully immersive metaverse experience. It provides users with a seamless transition between virtual worlds, allowing them to explore, interact, and engage in a wide range of activities within a uniﬁed ecosystem. 3. Holistic Approach to Education: GptVerse prioritizes education and learning, offering AI-powered educational bots and immersive learning experiences. These features enable users to access personalized and engaging educational content, participate in virtual classrooms, and receive individualized tutoring and guidance. 4. Extensive Event Management Functionality: GptVerse provides robust event organization and management capabilities, empowering users to seamlessly plan, host, and participate in events. With AI-powered event bots, GptVerse streamlines event logistics, facilitates networking, and enhances overall event experiences. Intelligent Shopping Solutions: GptVerse incorporates AI-driven shopping bots and assistants, revolutionizing the way users shop within the metaverse. These bots offer personalized recommendations, real-time assistance, and an immersive shopping experience, enhancing convenience and satisfaction for users. 5. 6. Strong Emphasis on User Experience: GptVerse is designed with a user-centric approach, prioritizing intuitive interfaces, seamless navigation, and engaging interactions. The platform aims to create a highly immersive, enjoyable, and user-friendly environment, enhancing user satisfaction and driving increased engagement. 7. Potential for Collaboration and Partnerships: GptVerse recognizes the value of collaboration and partnerships within the blockchain and metaverse ecosystem and also brands using web2 technologies. By fostering collaborations with industry leaders, content creators, and developers, brands GptVerse aims to create a vibrant and diverse ecosystem, providing users with a wide range of experiences and opportunities.",0.156740307808


In [26]:
f1_query='what is challenges for kzengine project?'

result_df = get_redis_results(redis_client,f1_query,index_name=INDEX_NAME)
# Build a prompt to provide the original query, the result and ask to summarise for the user
summary_prompt = '''Summarise this result in a bulleted list to answer the search query a customer has sent.
Search query: SEARCH_QUERY_HERE
Search result: SEARCH_RESULT_HERE
Summary:
'''
summary_prepped = summary_prompt.replace('SEARCH_QUERY_HERE',f1_query).replace('SEARCH_RESULT_HERE',result_df['result'][0])
summary = openai.Completion.create(engine=COMPLETIONS_MODEL,prompt=summary_prepped,max_tokens=200)
# Response provided by GPT-3
print(summary['choices'][0]['text'])

- Using Decision Trees, Sentiment Analysis and Candlestick Art to predict price movement of popular cryptocurrencies
- Evaluating other technical analysis and indicator structures
- Incorporating Twitter data and sentiment analysis into the model
- Aiming to determine whether price will increase or decrease in the next 1 hour or 1 day
- Resulting in an application with a data and AI modelling pipeline as well as a frontend for users to view coin signal results
- Predicting cryptocurrency movements can be challenging due to their volatility and external factors


In [27]:
question = 'How can you help me'


completion = openai.ChatCompletion.create(
  model="gpt-3.5-turbo",
  messages=[
    {"role": "user", "content": question}
  ]
)
print(f"{completion['choices'][0]['message']['role']}: {completion['choices'][0]['message']['content']}")

assistant: As an AI language model, I can help you in the following ways:

1. Answer your questions: I can answer your questions on a wide range of topics.

2. Provide information: I can provide you with information on any topic that you need.

3. Suggest solutions: I can suggest solutions to your problems based on the inputs you provide.

4. Assist in decision making: I can assist you in making decisions by providing you with options and their pros and cons.

5. Generate ideas: I can generate ideas for your projects and brainstorm with you.

6. Reduce your workload: I can help you automate repetitive tasks and save you time.

7. Improve your productivity: I can provide you with tools and resources to improve your productivity and efficiency.


In [28]:
from termcolor import colored

# A basic class to create a message as a dict for chat
class Message:
    
    
    def __init__(self,role,content):
        
        self.role = role
        self.content = content
        
    def message(self):
        
        return {"role": self.role,"content": self.content}
        
# Our assistant class we'll use to converse with the bot
class Assistant:
    
    def __init__(self):
        self.conversation_history = []

    def _get_assistant_response(self, prompt):
        
        try:
            completion = openai.ChatCompletion.create(
              model="gpt-3.5-turbo",
              messages=prompt
            )
            
            response_message = Message(completion['choices'][0]['message']['role'],completion['choices'][0]['message']['content'])
            return response_message.message()
            
        except Exception as e:
            
            return f'Request failed with exception {e}'

    def ask_assistant(self, next_user_prompt, colorize_assistant_replies=True):
        [self.conversation_history.append(x) for x in next_user_prompt]
        assistant_response = self._get_assistant_response(self.conversation_history)
        self.conversation_history.append(assistant_response)
        return assistant_response
            
        
    def pretty_print_conversation_history(self, colorize_assistant_replies=True):
        for entry in self.conversation_history:
            if entry['role'] == 'system':
                pass
            else:
                prefix = entry['role']
                content = entry['content']
                output = colored(prefix +':\n' + content, 'green') if colorize_assistant_replies and entry['role'] == 'assistant' else prefix +':\n' + content
                print(output)

In [29]:
# Initiate our Assistant class
conversation = Assistant()

# Create a list to hold our messages and insert both a system message to guide behaviour and our first user question
messages = []
system_message = Message('system','Waht is the gptverse platform')
user_message = Message('user','What can you do to help me')
messages.append(system_message.message())
messages.append(user_message.message())
messages

[{'role': 'system', 'content': 'Waht is the gptverse platform'},
 {'role': 'user', 'content': 'What can you do to help me'}]

In [30]:
response_message = conversation.ask_assistant(messages)
print(response_message['content'])

As an AI language model, I can help you understand what the GPTverse platform is. The GPTverse is a platform that provides access to a suite of powerful language models that utilize cutting-edge AI technology to generate high-quality, coherent, and diverse text. These models can be used for a variety of applications, including natural language processing, text generation, and sentiment analysis, among others. They can also be trained to generate text in a specific domain or language. Additionally, the GPTverse platform offers various APIs and tools that developers can use to integrate the language models into their applications.


In [34]:
next_question = 'What is the motivation behind the kzengine project?'

# Initiate a fresh messages list and insert our next question
messages = []
user_message = Message('user',next_question)
messages.append(user_message.message())
response_message = conversation.ask_assistant(messages)
print(response_message['content'])

I'm sorry, but I'm not familiar with the "KZEngine" project. Can you provide me with more information on what the project is about or any other details you may have? This will help me understand the motivation behind the project.


In [35]:
# Print out a log of our conversation so far

conversation.pretty_print_conversation_history()

user:
What can you do to help me
[32massistant:
As an AI language model, I can help you understand what the GPTverse platform is. The GPTverse is a platform that provides access to a suite of powerful language models that utilize cutting-edge AI technology to generate high-quality, coherent, and diverse text. These models can be used for a variety of applications, including natural language processing, text generation, and sentiment analysis, among others. They can also be trained to generate text in a specific domain or language. Additionally, the GPTverse platform offers various APIs and tools that developers can use to integrate the language models into their applications.[0m
user:
Can you compare the gptverse platform and other known metaverse companies?
[32massistant:
The GPTverse platform and metaverse companies are not directly comparable, as they have different focuses and objectives. The GPTverse focuses on providing access to powerful language models for natural language p

In [36]:
# Updated system prompt requiring Question and Year to be extracted from the user
system_prompt = '''
You are a helpful Kzengine project and Gptverse platform assistant and Also you are expert on metaverse and trading field.
You need to capture a Question from customer.
The Question is about the kzengine project, gptverse platform, metaverse or trading. 
Additionally for the first conversation always greeting our client and provide
his/her name. the use his name each response.
Once you have the his/her name, say "searching for answers".

Example 1:

User: what is the gptverse platform

Assistant: hello im the gptverse ai assistant, at the first what is your name, please?

User: kozan

Assistant: Hello kozan, searching for answers
'''

# New Assistant class to add a vector database call to its responses
class RetrievalAssistant:
    
    def __init__(self):
        self.conversation_history = []  

    def _get_assistant_response(self, prompt):
        
        try:
            completion = openai.ChatCompletion.create(
              model=CHAT_MODEL,
              messages=prompt,
              temperature=0.1
            )
            
            response_message = Message(completion['choices'][0]['message']['role'],completion['choices'][0]['message']['content'])
            return response_message.message()
            
        except Exception as e:
            
            return f'Request failed with exception {e}'
    
    # The function to retrieve Redis search results
    def _get_search_results(self,prompt):
        latest_question = prompt
        search_content = get_redis_results(redis_client,latest_question,INDEX_NAME)['result'][0]
        return search_content
        

    def ask_assistant(self, next_user_prompt):
        [self.conversation_history.append(x) for x in next_user_prompt]
        assistant_response = self._get_assistant_response(self.conversation_history)
        
        # Answer normally unless the trigger sequence is used "searching_for_answers"
        if 'searching for answers' in assistant_response['content'].lower():
            question_extract = openai.Completion.create(model=COMPLETIONS_MODEL,prompt=f"Extract the user's \
                                                        latest question from this conversation: {self.conversation_history}.")
            search_result = self._get_search_results(question_extract['choices'][0]['text'])
            
            # We insert an extra system prompt here to give fresh context to the Chatbot on how to use the Redis results
            # In this instance we add it to the conversation history, but in production it may be better to hide
            self.conversation_history.insert(-1,{"role": 'system',"content": f"Answer the user's question using this content: {search_result}. \
                                                 If you cannot answer the question, say 'Sorry, I don't know the answer to this one'"})
            #[self.conversation_history.append(x) for x in next_user_prompt]
            
            assistant_response = self._get_assistant_response(self.conversation_history)
            print(next_user_prompt)
            print(assistant_response)
            self.conversation_history.append(assistant_response)
            return assistant_response
        else:
            self.conversation_history.append(assistant_response)
            return assistant_response
            
        
    def pretty_print_conversation_history(self, colorize_assistant_replies=True):
        for entry in self.conversation_history:
            if entry['role'] == 'system':
                pass
            else:
                prefix = entry['role']
                content = entry['content']
                output = colored(prefix +':\n' + content, 'green') if colorize_assistant_replies and entry['role'] == 'assistant' else prefix +':\n' + content
                #prefix = entry['role']
                print(output)

In [37]:
conversation = RetrievalAssistant()
messages = []
system_message = Message('system',system_prompt)
user_message = Message('user','What is the gptverse platform?')
messages.append(system_message.message())
messages.append(user_message.message())
response_message = conversation.ask_assistant(messages)
response_message

{'role': 'assistant',
 'content': 'Hello! Thank you for asking. The Gptverse platform is a decentralized platform that utilizes artificial intelligence and blockchain technology to create a virtual world where users can interact with each other and with AI-powered entities. It allows users to create, share, and monetize their own AI-generated content, such as chatbots, games, and virtual assistants. Would you like more information about any specific aspect of the platform?'}

In [40]:
messages = []
user_message = Message('user','Yes i want more detailed explanation, \
                for examples, can you compare MANA, SAND and our gptverse platform?')
messages.append(user_message.message())
response_message = conversation.ask_assistant(messages)
response_message

{'role': 'assistant',
 'content': 'Certainly! While all three platforms are metaverse platforms, there are some key differences between them.\n\nMANA, or Decentraland, is a virtual world that is built on the Ethereum blockchain. It allows users to create and monetize their own content, such as games and experiences, and interact with other users in the virtual world. However, Decentraland does not have a strong focus on artificial intelligence, and its content creation tools are not as advanced as those offered by the Gptverse platform.\n\nSAND, or The Sandbox, is another metaverse platform that allows users to create and monetize their own content, such as games and experiences. It also has a strong focus on user-generated content, and its content creation tools are more advanced than those offered by Decentraland. However, like Decentraland, The Sandbox does not have a strong focus on artificial intelligence.\n\nThe Gptverse platform, on the other hand, is specifically designed to in

In [41]:
conversation.pretty_print_conversation_history()

user:
What is the gptverse platform?
[32massistant:
Hello! Thank you for asking. The Gptverse platform is a decentralized platform that utilizes artificial intelligence and blockchain technology to create a virtual world where users can interact with each other and with AI-powered entities. It allows users to create, share, and monetize their own AI-generated content, such as chatbots, games, and virtual assistants. Would you like more information about any specific aspect of the platform?[0m
user:
How can i use this platform
[32massistant:
To use the Gptverse platform, you can create an account on the website and start exploring the different features and functionalities. You can create your own AI-generated content, such as chatbots, games, and virtual assistants, and share them with other users on the platform. You can also interact with other users and AI-powered entities in the virtual world. Additionally, you can earn rewards for your contributions to the platform, such as tok