In [17]:
gem 'httparty'
gem 'numo-narray'
gem 'ruby-openai'
gem 'faiss'

false

In [18]:
require 'httparty'
require 'numo/narray'
require 'openai'
require 'faiss'
require 'matrix'
require 'io/console'
require 'dotenv'

false

In [24]:
Dotenv.load
nil

In [20]:
response = HTTParty.get('https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/paul_graham/paul_graham_essay.txt')
text = response.body

File.open('essay.txt', 'w') { |file| file.write(text) }

text.length

75014

In [21]:
chunk_size = 2048
chunks = text.chars.each_slice(chunk_size).map(&:join)
chunks.length

37

In [25]:
client = OpenAI::Client.new(
 access_token: ENV['OPENAI_API_KEY'],
 request_options: {
   headers: {
     'Authorization': "Bearer #{ENV['OPENAI_API_KEY']}"
   }
 }
)

def get_text_embedding(client, input)
  response = client.embeddings(
    parameters: {
      model: 'text-embedding-3-small',
      input: input
    }
  )
  response.dig('data', 0, 'embedding')
end

text_embeddings = chunks.map { |chunk| get_text_embedding(client, chunk) }
text_embeddings = Numo::DFloat[*text_embeddings]
text_embeddings.shape

[37, 1536]

In [26]:
d = text_embeddings.shape[1]
index = Faiss::IndexFlatL2.new(d)
index.add(text_embeddings)

In [27]:
question = "What were the two main things the author worked on before college?"
question_embedding = get_text_embedding(client, question)
question_embeddings = Numo::DFloat[question_embedding]
question_embeddings.shape

[1, 1536]

In [28]:
distances, indices = index.search(question_embeddings, 2)
index_array = indices.to_a[0]
retrieved_chunks = index_array.map { |i| chunks[i] }

["\n\nWhat I Worked On\n\nFebruary 2021\n\nBefore college the two main things I worked on, outside of school, were writing and programming. I didn't write essays. I wrote what beginning writers were supposed to write then, and probably still are: short stories. My stories were awful. They had hardly any plot, just characters with strong feelings, which I imagined made them deep.\n\nThe first programs I tried writing were on the IBM 1401 that our school district used for what was then called \"data processing.\" This was in 9th grade, so I was 13 or 14. The school district's 1401 happened to be in the basement of our junior high school, and my friend Rich Draves and I got permission to use it. It was like a mini Bond villain's lair down there, with all these alien-looking machines — CPU, disk drives, printer, card reader — sitting up on a raised floor under bright fluorescent lights.\n\nThe language we used was an early version of Fortran. You had to type programs on punch cards, then s

In [29]:
prompt = <<-PROMPT
Context information is below.
---------------------
#{retrieved_chunks.join("\n---------------------\n")}
---------------------
Given the context information and not prior knowledge, answer the query.
Query: #{question}
Answer:
PROMPT

def run_completion(client, user_message, model: 'gpt-3.5-turbo')
  response = client.chat(
    parameters: {
      model: model,
      messages: [{ role: 'user', content: user_message }],
      temperature: 0.7
    }
  )
  response.dig('choices', 0, 'message', 'content')
end

puts run_completion(client, prompt)

The two main things the author worked on before college were writing and programming.
