Skip to content

Commit

Permalink
Update rankGPT
Browse files Browse the repository at this point in the history
  • Loading branch information
svilupp committed Jul 2, 2024
1 parent 7212fa6 commit 719bf69
Show file tree
Hide file tree
Showing 6 changed files with 24 additions and 6 deletions.
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Fixed

## [0.36.0]

### Added
- Added a prompt template for RAG query expansion for BM25 (`RAGQueryKeywordExpander`)

### Fixed
- Fixed a small bug in the truncation step of the RankGPT's `permutation_step!` (bad indexing of string characters).
- Fixed a bug where a certain combination of `rank_start` and `rank_end` would not result the last sliding window.
- Fixed a bug where partially filled `RAGResult` would fail pretty-printing with `pprint`

## [0.35.0]

### Added
Expand Down
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "PromptingTools"
uuid = "670122d1-24a8-4d70-bfce-740807c42192"
authors = ["J S @svilupp and contributors"]
version = "0.35.0"
version = "0.36.0"

[deps]
AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
Expand Down
11 changes: 9 additions & 2 deletions src/Experimental/RAGTools/rank_gpt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ function create_permutation_instruction(
rank = 0
for ctx in context[rank_start:rank_end_adj]
rank += 1
push!(messages, PT.UserMessage("[$rank] $(strip(ctx)[1:min(end, max_length)])"))
push!(messages, PT.UserMessage("[$rank] $(first(strip(ctx),max_length))"))
push!(messages, PT.AIMessage("Received passage [$rank]."))
end
push!(messages, last_msg)
Expand Down Expand Up @@ -121,14 +121,21 @@ function rank_sliding_window!(
@assert rank_end>=window_size>=step "rank_end must be greater than or equal to window_size, which must be greater than or equal to step (Provided: rank_end=$rank_end, window_size=$window_size, step=$step)"
end_pos = min(rank_end, length(result.chunks))
start_pos = max(end_pos - window_size, 1)
while start_pos >= rank_start
while start_pos > rank_start
(verbose >= 1) && @info "Ranking chunks in positions $start_pos to $end_pos"
permutation_step!(result; rank_start = start_pos, rank_end = end_pos,
model, verbose = (verbose >= 1), kwargs...)
(verbose >= 2) && @info "Current ranking: $(result.positions)"
end_pos -= step
start_pos -= step
end
## Don't skip the last window, but ensure it's not negative
start_pos = max(start_pos, rank_start)
end_pos = max(end_pos, start_pos)
(verbose >= 1) && @info "Ranking chunks in positions $start_pos to $end_pos"
permutation_step!(result; rank_start = start_pos, rank_end = end_pos,
model, verbose = (verbose >= 1), kwargs...)
(verbose >= 2) && @info "Current ranking: $(result.positions)"
return result
end

Expand Down
4 changes: 2 additions & 2 deletions src/Experimental/RAGTools/types.jl
Original file line number Diff line number Diff line change
Expand Up @@ -679,15 +679,15 @@ function PT.pprint(
print(io, "\n", "-"^20, "\n")
print(io, content, "\n\n")
end
if !isempty(r.final_answer)
if !isnothing(r.final_answer) && !isempty(r.final_answer)
annotater = TrigramAnnotater()
root = annotate_support(annotater, r; annotater_kwargs...)
print(io, "-"^20, "\n")
printstyled(io, "ANSWER", color = :blue, bold = true)
print(io, "\n", "-"^20, "\n")
pprint(io, root; text_width)
end
if add_context
if add_context && !isempty(r.context)
print(io, "\n" * "-"^20, "\n")
printstyled(io, "CONTEXT", color = :blue, bold = true)
print(io, "\n", "-"^20, "\n")
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"content":"Template Metadata","description":"Template for RAG query rephrasing that injects more keywords that could be relevant. Placeholders: `query`","version":"1.0","source":"","_type":"metadatamessage"},{"content":"You are an assistant tasked with taking a natural language query from a user and converting it into a keyword-based lookup in our search database.\n\nIn this process, you strip out information that is not relevant for the retrieval task. This is a pure information retrieval task.\n\nAugment this query with ADDITIONAL keywords that described the entities and concepts mentioned in the query (consider synonyms, rephrasing, related items). \nFocus on expanding mainly the specific / niche context of the query to improve the retrieval precision for uncommon words.\nGenerate synonyms, related terms, and alternative phrasings for each identified entity/concept.\nExpand any abbreviations, acronyms, or initialisms present in the query.\nInclude specific industry jargon, technical terms, or domain-specific vocabulary relevant to the query.\nAdd any references or additional metadata that you deem important to successfully answer this query with our search database.\n\nProvide the most powerful 5-10 keywords for the search engine.\n","variables":[],"_type":"systemmessage"},{"content":"Here is the user query: {{query}}\nRephrased query:","variables":["query"],"_type":"usermessage"}]
2 changes: 1 addition & 1 deletion test/Experimental/RAGTools/rank_gpt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,7 @@ end
# Test with custom rank_start and rank_end
result = rank_gpt(["chunk1", "chunk2", "chunk3", "chunk4"],
"What is AI?"; rank_start = 2, rank_end = 3, window_size = 3, step = 2, model = "mock-gen")
@test result.positions == [1, 2, 3, 4] # Check positions with custom rank_start and rank_end
@test result.positions == [1, 3, 2, 4] # Flips because the signal say [2] > [1]
result = rank_gpt(["chunk1", "chunk2", "chunk3", "chunk4"],
"What is AI?"; rank_start = 1, rank_end = 4, window_size = 4,
step = 2, model = "mock-gen")
Expand Down

2 comments on commit 719bf69

@svilupp
Copy link
Owner Author

@svilupp svilupp commented on 719bf69 Jul 2, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator register

Release notes:

Added

  • Added a prompt template for RAG query expansion for BM25 (RAGQueryKeywordExpander)

Fixed

  • Fixed a small bug in the truncation step of the RankGPT's permutation_step! (bad indexing of string characters).
  • Fixed a bug where a certain combination of rank_start and rank_end would not result the last sliding window.
  • Fixed a bug where partially filled RAGResult would fail pretty-printing with pprint

Commits

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/110274

Tagging

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.36.0 -m "<description of version>" 719bf69391dafa5278368a2aed1e1083484633ea
git push origin v0.36.0

Please sign in to comment.