In [None]:
require 'httparty'
require 'numo/narray'
require 'openai'
require 'faiss'
require 'dotenv'

In [None]:
Dotenv.load

In [None]:
response = HTTParty.get('https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/paul_graham/paul_graham_essay.txt')
text = response.body

File.open('essay.txt', 'w') { |file| file.write(text) }

text.length

In [None]:
chunk_size = 2048
chunks = text.chars.each_slice(chunk_size).map(&:join)
chunks.length

In [None]:
client = OpenAI::Client.new(
 access_token: ENV['OPENAI_API_KEY'],
 request_options: {
   headers: {
     'Authorization': "Bearer #{ENV['OPENAI_API_KEY']}"
   }
 }
)

def get_text_embedding(client, input)
  response = client.embeddings(
    parameters: {
      model: 'text-embedding-3-small',
      input: input
    }
  )
  response.dig('data', 0, 'embedding')
end

text_embeddings = chunks.map { |chunk| get_text_embedding(client, chunk) }
text_embeddings = Numo::DFloat[*text_embeddings]
text_embeddings.shape


In [None]:
d = text_embeddings.shape[1]
index = Faiss::IndexFlatL2.new(d)
index.add(text_embeddings)

In [None]:
question = "What were the two main things the author worked on before college?"
question_embedding = get_text_embedding(client, question)
question_embeddings = question_embedding
# question_embeddings.shape

In [None]:
distances, indices = index.search([question_embeddings], 2)
index_array = indices.to_a[0]
retrieved_chunks = index_array.map { |i| chunks[i] }

In [None]:
prompt = <<-PROMPT
Context information is below.
---------------------
#{retrieved_chunks.join("\n---------------------\n")}
---------------------
Given the context information and not prior knowledge, answer the query.
Query: #{question}
Answer:
PROMPT

def run_completion(client, user_message, model: 'gpt-3.5-turbo')
  response = client.chat(
    parameters: {
      model: model,
      messages: [{ role: 'user', content: user_message }],
      temperature: 0.0
    }
  )
  response.dig('choices', 0, 'message', 'content')
end

puts run_completion(client, prompt)