# Analysing the Data

In this notebook, we'll enrich the filtered events from the previous notebook with additional information. We'll use a combination of techniques to enrich the events:

1. Topic modeling using a Large Language Model (LLM) to extract topics from the posts
2. Creating embeddings for semantic search using a transformer model
3. Storing the enriched events in Redis for querying



Embeddings are vector representations of text that capture semantic meaning. They allow us to perform semantic search, which is a search based on meaning rather than exact keyword matching. In this notebook, we'll create embeddings for posts and store them in Redis for later querying.


## Recreating helping functions

In [62]:
@file:DependsOn("redis.clients:jedis:6.0.0")
@file:DependsOn("org.springframework.ai:spring-ai-transformers:1.0.0-RC1")
@file:DependsOn("ai.djl.huggingface:tokenizers:0.33.0")
@file:DependsOn("org.springframework.ai:spring-ai-ollama:1.0.0-RC1")

In [2]:
import redis.clients.jedis.resps.StreamEntry
import redis.clients.jedis.search.Document

data class Event(
    val did: String,
    val rkey: String,
    val text: String,
    val timeUs: String,
    val operation: String,
    val uri: String,
    val parentUri: String,
    val rootUri: String,
    val langs: List<String>,
    val similarityScore: Double
) {
    companion object {
        fun fromMap(entry: StreamEntry): Event {
            return fromMap(entry.fields)
        }

        fun fromMap(document: Document): Event {
            val fields = document.properties.associate { entry ->  entry.key to entry.value.toString()}
            return fromMap(fields)
        }

        fun fromMap(fields: Map<String, String>): Event {
            return Event(
                did = fields["did"] ?: "",
                rkey = fields["rkey"] ?: "",
                text = fields["text"] ?: "",
                timeUs = fields["timeUs"] ?: "",
                operation = fields["operation"] ?: "",
                uri = fields["uri"] ?: "",
                parentUri = fields["parentUri"] ?: "",
                rootUri = fields["rootUri"] ?: "",
                langs = fields["langs"]?.replace("[", "")?.replace("]", "")?.split(", ") ?: emptyList(),
                similarityScore = fields["similarityScore"]?.toDouble() ?: 0.0
            )
        }
    }
}

In [64]:
import redis.clients.jedis.JedisPooled
import redis.clients.jedis.bloom.BFReserveParams
import org.springframework.ai.transformers.TransformersEmbeddingModel
import org.springframework.ai.ollama.OllamaChatModel
import org.springframework.ai.ollama.api.OllamaApi
import org.springframework.ai.ollama.api.OllamaApi.ChatRequest
import org.springframework.ai.ollama.api.OllamaApi.Message
import org.springframework.ai.ollama.api.OllamaApi.Message.Role
import org.springframework.ai.ollama.api.OllamaOptions
import java.nio.ByteBuffer
import java.nio.ByteOrder
import java.lang.Float

val jedis = JedisPooled()

fun createBloomFilter(name: String) {
    runCatching {
        jedis.bfReserve(name, 0.01, 1_000_000L, BFReserveParams().expansion(2))
    }.onFailure {
        println("Bloom filter already exists")
    }
}

fun deduplicate(bloomFilter: String, uri: String): Boolean {
    return if (jedis.bfExists(bloomFilter, uri)) false else true
}

fun ack(bloomFilter: String, uri: String) {
    jedis.bfAdd(bloomFilter, uri)
}

val embeddingModel = TransformersEmbeddingModel()
embeddingModel.afterPropertiesSet()

val ollamaApi = OllamaApi.builder()
    .baseUrl("http://localhost:11434")
    .build()

val ollamaOptions = OllamaOptions.builder().model("mistral").build()

val ollamaChatModel = OllamaChatModel.builder()
    .ollamaApi(ollamaApi)
    .defaultOptions(ollamaOptions)
    .build()

fun createEmbedding(input: String): ByteArray {
    val embedding = embeddingModel.embed(input)
    val embeddingBytes = ByteArray(Float.BYTES * embedding.size)
    ByteBuffer.wrap(embeddingBytes).order(ByteOrder.LITTLE_ENDIAN).asFloatBuffer().put(embedding)
    return embeddingBytes
}

In [24]:
%use ktor-client
%use serialization

In [53]:
@Serializable
data class SearchResponse(
    @SerialName("cursor") val cursor: String? = null,
    @SerialName("hitsTotal") val hitsTotal: Int? = null,
    @SerialName("posts") val posts: List<Post>
)

@Serializable
data class Post(
    @SerialName("uri") val uri: String,
    @SerialName("cid") val cid: String,
    @SerialName("author") val author: Author,
    @SerialName("indexedAt") val indexedAt: String,
    @SerialName("record") val record: Record?,
    @SerialName("replyCount") val replyCount: Int? = null,
    @SerialName("repostCount") val repostCount: Int? = null,
    @SerialName("likeCount") val likeCount: Int? = null,
    @SerialName("quoteCount") val quoteCount: Int? = null,

    )

@Serializable
data class Author(
    @SerialName("did") val did: String,
    @SerialName("handle") val handle: String,
    @SerialName("displayName") val displayName: String? = null,
    @SerialName("avatar") val avatar: String? = null
)

@Serializable
data class Record(
    @SerialName("text") val text: String? = null,
    @SerialName("embed") val embed: Embed? = null,
    @SerialName("createdAt") val createdAt: String
)

@Serializable
data class Embed(
    @SerialName("images") val images: List<Image>? = null
)

@Serializable
data class Image(
    @SerialName("thumb") val thumb: String? = null, // Nullable to handle missing values
    @SerialName("fullsize") val fullsize: String? = null,
    @SerialName("alt") val alt: String? = null // Alt text is also optional
)

In [40]:
@Serializable
data class LoginResponse(
    @SerialName("accessJwt") val accessJwt: String,
    @SerialName("refreshJwt") val refreshJwt: String,
    @SerialName("handle") val handle: String,
    @SerialName("did") val did: String,
    @SerialName("didDoc") val didDoc: DidDoc?,
    @SerialName("email") val email: String?,
    @SerialName("emailConfirmed") val emailConfirmed: Boolean?,
    @SerialName("emailAuthFactor") val emailAuthFactor: Boolean?,
    @SerialName("active") val active: Boolean,
    @SerialName("status") val status: String? = null
)

@Serializable
data class DidDoc(
    @SerialName("id") val id: String?
)

In [47]:
import io.ktor.client.*
import io.ktor.client.engine.cio.*
import io.ktor.client.plugins.contentnegotiation.ContentNegotiation
import io.ktor.serialization.kotlinx.json.json

val client = HttpClient(CIO) {
    install(ContentNegotiation) {
        json(Json {
            ignoreUnknownKeys = true
        })
    }
}

val API_URL = "https://bsky.social/xrpc"

val USERNAME = "devbubble.bsky.social"
val PASSWORD = System.getenv("DEVBUBBLE_TOKEN")

In [48]:
suspend fun getAccessToken(): String {
    val response: HttpResponse = client.post("$API_URL/com.atproto.server.createSession") {
        contentType(ContentType.Application.Json)
        setBody(
            mapOf(
                "identifier" to USERNAME,
                "password" to PASSWORD
            )
        )
    }

    return if (response.status == HttpStatusCode.OK) {
        val result: LoginResponse = response.body()
        jedis.set("mainDid", result.did)
        println("✅ Login successful. DID: ${result.did}")
        result.accessJwt
    } else {
        println("⚠️ Authentication failed: ${response.status}")
        ""
    }
}

In [49]:
lateinit var blueskyToken: String;
runBlocking {
    blueskyToken = getAccessToken()
}

✅ Login successful. DID: did:plc:qdwb7czl4gdbu5go25dza3vo


In [57]:
import io.ktor.client.*
import io.ktor.client.request.*
import io.ktor.client.statement.*
import io.ktor.client.call.*
import io.ktor.http.*

import java.time.Instant
import java.time.temporal.ChronoUnit

suspend fun searchPosts(sinceTime: String, tag: String): List<Post> {
    val allPosts = mutableListOf<Post>()
    var cursor: String? = null

    println("🔍 Searching posts with tag: $tag since: $sinceTime")
    do {
        val response: HttpResponse = client.get("$API_URL/app.bsky.feed.searchPosts") {
            headers {
                append("Authorization", "Bearer $blueskyToken")
            }
            parameter("q", tag)
            parameter("sort", "latest")
            parameter("limit", 100)
            parameter("since", sinceTime)
            if (cursor != null) {
                parameter("cursor", cursor)
            }
        }

        if (response.status == HttpStatusCode.OK) {
            val result: SearchResponse = response.body()
            val posts = result.posts
            println("✅ Retrieved ${posts.size} posts. Total so far: ${allPosts.size + posts.size}.")
            allPosts.addAll(posts)
            cursor = result.cursor
        } else {
            println("⚠️ Failed to fetch posts. Status: ${response.status}")
            println(response.bodyAsText())
            break
        }
    } while (cursor != null)

    println("🎉 Finished fetching posts. Total retrieved: ${allPosts.size}.")
    return allPosts
}

In [59]:
import kotlinx.coroutines.runBlocking

runBlocking {
    val sinceTime = Instant.now().minus(15, ChronoUnit.HOURS).toString()
    searchPosts(sinceTime, "java")
}

🔍 Searching posts with tag: @typealias.com since: 2025-05-16T04:46:51.575382Z
✅ Retrieved 1 posts. Total so far: 1.
🎉 Finished fetching posts. Total retrieved: 1.


[Post(uri=at://did:plc:4ldlezxdkozycdrtqlytsjuf/app.bsky.feed.post/3lpcm3or6i22s, cid=bafyreif5gmwu3rse2eqv6stbz3tsvhwmutim2yrudvvnklxzheyp555evm, author=Author(did=did:plc:4ldlezxdkozycdrtqlytsjuf, handle=raphaeldelio.dev, displayName=Raphael De Lio, avatar=https://cdn.bsky.app/img/avatar/plain/did:plc:4ldlezxdkozycdrtqlytsjuf/bafkreigdqjzndqsig2m36gk6flqbcpwv76x3tulfpjktymqblzfknsluk4@jpeg), indexedAt=2025-05-16T17:57:20.112Z, record=Record(text=Guys, have you tried using coroutines with Dispatchers.IO from within the Kotlin Notebooks? When I run them with the Default dispatchers, they will print to the console whatever they're supposed to. When I run them with Dispatchers.IO, nothing is printed out. @antonarhipov.bsky.social @typealias.com, embed=Embed(images=[Image(thumb=null, fullsize=null, alt=)]), createdAt=2025-05-16T17:57:17.075Z), replyCount=1, repostCount=1, likeCount=2, quoteCount=0)]

In [None]:
runBlocking {
    val sinceTime = Instant.now().minus(15, ChronoUnit.HOURS).toString()
    val posts = searchPosts(sinceTime, "java")
}

In [60]:
val trendingTopicsRoute = listOf(
    "What were the most mentioned topics from the past hour?",
    "What's trending right now?",
    "What’s hot in the last 60 minutes?",
    "Top topics from today?",
    "What are people talking about today?",
    "What’s trending since morning?",
    "What people are talking about?",
)

In [66]:
import java.util.UUID

trendingTopicsRoute.forEach { text ->
    val vectorBytes = createEmbedding(text)
    jedis.hset(("routes: " + UUID.randomUUID()).encodeToByteArray(), mapOf(
        "route".encodeToByteArray() to "trending_topics".encodeToByteArray(),
        "text".encodeToByteArray() to text.encodeToByteArray(),
        "textEmbedding".encodeToByteArray() to vectorBytes
    ))
    Pair(true, "OK")
}

In [68]:
import redis.clients.jedis.exceptions.JedisDataException
import redis.clients.jedis.search.IndexDefinition
import redis.clients.jedis.search.IndexOptions
import redis.clients.jedis.search.Schema

val schema = Schema()
    .addFlatVectorField(
        "textEmbedding",
        mapOf(
            "type" to "FLOAT32",
            "dim" to "384",
            "distance_metric" to "COSINE",
        )
    )

val rule = IndexDefinition().setPrefixes("routes:")

try {
    jedis.ftCreate("routeIdx", IndexOptions.defaultOptions().setDefinition(rule), schema)
} catch (e: JedisDataException) {
    println("Index already exists")
}

Index already exists


In [88]:
import redis.clients.jedis.search.FTSearchParams
import redis.clients.jedis.search.Query

val userQuery = "Hey DevBubble, what's trending now? Excited to hear the news!"
val userQueryClauses = userQuery.split(Regex("""[!?,.:;()"\[\]{}]+"""))
userQueryClauses.filter { it.isNotBlank() }.map { clause ->
    val vector: ByteArray = createEmbedding(clause)

    val queryString = ("* =>[KNN \$K @textEmbedding \$BLOB AS similarityScore]")

    val params = mapOf("BLOB" to vector)

    var query = Query(queryString)
        .addParam("K", 1)
        .addParam("BLOB", vector)
        .returnFields("route", "text", "similarityScore")
        .setSortBy("similarityScore", true)
        .dialect(2)

    val result = jedis.ftSearch(
        "routeIdx",
        query
    )

    println("Evaluated clause: $clause")
    println("Route: " + result.documents.first().get("route"))
    println("Matched text: " + result.documents.first().get("text"))
    println("Similarity Score: " + result.documents.first().get("similarityScore"))
    println()
}

Evaluated clause: Hey DevBubble
Route: trending_topics
Matched text: What people are talking about?
Similarity Score: 0.797207891941

Evaluated clause:  what's trending now
Route: trending_topics
Matched text: What's trending right now?
Similarity Score: 0.0748263597488

Evaluated clause:  Excited to hear the news
Route: trending_topics
Matched text: What are people talking about today?
Similarity Score: 0.700803399086



[kotlin.Unit, kotlin.Unit, kotlin.Unit]

### Setting Up the Ollama API Client
We'll use the Spring AI Ollama client to interact with the Ollama API.

Ollama is a tool that allows us to run large language models locally.

In [4]:
@file:DependsOn("org.springframework.ai:spring-ai-ollama:1.0.0-RC1")

The prompt we'll use for the LLM is designed to extract software-related topics from posts. The prompt includes examples of how to format the output and what types of topics to include.

In [5]:
val systemPrompt = """
You are a topic classifier specialized in software engineering. Given a post, extract only software-related topics—both explicitly mentioned and reasonably implied.

If a post mentions a tool, language, or library, infer related technologies or domains. For example, if the post mentions LangChain, you may infer topics like “Python”, “AI
”, and “Machine Learning”. Avoid generic terms like “announcement”, “event”, or “release”. Only return the technical topics. Also avoid too narrow topics such as a specific method or command.

If the topic or a very similar is already in the provided list of existing topics, use the one from the list, otherwise, feel free to create a new one.

Format your response as comma separated values (ALWAYS, I MEAN IT):
"topic1, topic2, topic3"

Examples:

Post:
Kotlin is the best programming language for beginners
Output:
"Kotlin, Programming Languages"

Post:
Excited to try some Hugging Face Models with DJL!
Output:
"Hugging Face, Deep Java Library (DJL), Machine Learning, AI, Python, Java"

Post:
Just deployed a FastAPI app using Redis as a cache layer
Output:
"FastAPI, Redis, Python, Web Development, Caching"

Post:
The new version of LangChain is now available!! It’s finally GA!
Output:
"LangChain, Python, AI, Machine Learning"

Post:
Redis is so cool! I love the LOLWUT command.
Output:
"Redis, Database"
"""

Create the Ollama Chat Model

### Creating a Topic Modeling Function
This function takes a post as input and uses the Ollama API to extract topics from the post. The function returns a string of comma-separated topics.

In [7]:
import org.springframework.ai.chat.messages.SystemMessage
import org.springframework.ai.chat.messages.UserMessage
import org.springframework.ai.chat.prompt.Prompt

fun topicModeling(post: String, existingTopics: String): String {
    // Build a chat message
    val messages = listOf(
        SystemMessage(systemPrompt),
        UserMessage("Existing topics: $existingTopics"),
        UserMessage("Post: $post")
    )

    val response = ollamaChatModel.call(Prompt(messages))
    return response.result.output.text
}

In [10]:
topicModeling("KotlinConf is a great conference for Kotlin Developers", "Kotlin, Java, KotlinConf")

 "KotlinConf, Kotlin, Conferences, Programming Languages"

### Talking about Count-min Sketch

Count-min sketch is a probabilistic data structure used for estimating the frequency of events in a stream of data.

It is particularly useful for counting the number of occurrences of items in a large dataset without storing all the items explicitly.

In [11]:
import java.time.LocalDateTime

fun createCountMinSketch(): String {
    val windowBucket = LocalDateTime.now().withSecond(0).withNano(0)
    try {
        jedis.cmsInitByDim("topics-cms:$windowBucket", 3000, 10)
    } catch (e: JedisDataException) {
        println("Count-min sketch already exists")
    }

    return "topics-cms:$windowBucket"
}

### Creating a Topic Extraction Handler
This function creates a handler that extracts topics from an event's text and stores them in Redis. The topics are stored as a pipe-separated string in the "topics" field of the event's hash.

In [13]:
val extractTopics: (Event) -> Pair<Boolean, String> = { event ->
    val existingTopics = jedis.smembers("topics")
    val topics = topicModeling(event.text, existingTopics.joinToString(", "))
        .replace("\"", "")
        .split(",")
        .map { it.trim() }

    val cmsKey = createCountMinSketch()
    val multi = jedis.multi()
    multi.cmsIncrBy(cmsKey, topics.associate { it to 1L })
    multi.hset("post:" + event.uri, mapOf("topics" to topics.joinToString("|")))
    multi.sadd("topics", *topics.toTypedArray())
    multi.exec()
    Pair(true, "OK")
}

In [14]:
createConsumerGroup("filtered-events", "topic-extraction-example")

In [16]:
val bloomFilterName = "topic-extraction-bf"
createBloomFilter(bloomFilterName)

In [34]:
runBlocking {
    consumeStream(
        streamName = "filtered-events",
        consumerGroup = "topic-extraction-example",
        consumer = "topic-extraction-1",
        handlers = listOf(deduplicate(bloomFilterName), printUri, extractTopics),
        ackFunction = ackAndBfFn(bloomFilterName),
        count = 100,
        limit = 100
    )
}

Got event from at://did:plc:djglyf5epph2tc44lnqsdvfi/app.bsky.feed.post/3lpcmm7wkrc2y
Got event from at://did:plc:ug7ad2kiztx2jhpuw2w44sjz/app.bsky.feed.post/3lpcmm6hkqc27
Count-min sketch already exists
Got event from at://did:plc:ger7kxkktaqoa32b5lawevrb/app.bsky.feed.post/3lpcmlzriy22k
Count-min sketch already exists
topic-extraction-1: No new messages for 2 seconds. Stopping.


## Creating Embeddings for Semantic Search
In this section, we'll create embeddings for posts. Embeddings are vector representations of text that capture semantic meaning. They allow us to perform semantic search, which is a search based on meaning rather than exact keyword matching.

For example, if I search for:

"Redis is a cool db for Python devs"

I can still match:

"Redis is a great database for Python developers"

### Setting Up the Embedding Model
We'll use the Spring AI Transformers library to create embeddings for posts. This library provides a simple API for creating embeddings using transformer models.

In [18]:
@file:DependsOn("org.springframework.ai:spring-ai-transformers:1.0.0-RC1")
@file:DependsOn("ai.djl.huggingface:tokenizers:0.33.0")

In [19]:
import org.springframework.ai.transformers.TransformersEmbeddingModel

val embeddingModel = TransformersEmbeddingModel()
embeddingModel.afterPropertiesSet()

### Creating an Embedding Handler
This function creates a handler that generates embeddings for an event's text and stores them in Redis. The embeddings are stored as binary data in the "textEmbedding" field of the event's hash.

In [20]:
import java.lang.Float
import java.nio.ByteBuffer
import java.nio.ByteOrder

fun createEmbedding(input: String): ByteArray {
    val embedding = embeddingModel.embed(input)
    val embeddingBytes = ByteArray(Float.BYTES * embedding.size)
    ByteBuffer.wrap(embeddingBytes).order(ByteOrder.LITTLE_ENDIAN).asFloatBuffer().put(embedding)
    return embeddingBytes
}

In [21]:
val createEmbedding: (Event) -> Pair<Boolean, String> = { event ->
    val embeddingBytes = createEmbedding(event.text)
    jedis.hset(("post:" + event.uri).encodeToByteArray(), mapOf("textEmbedding".encodeToByteArray() to embeddingBytes))
    Pair(true, "OK")
}

In [22]:
createConsumerGroup("filtered-events", "embedding-example")

In [23]:
val bloomFilterName = "embedding-bf"
createBloomFilter(bloomFilterName)

In [35]:
runBlocking {
    consumeStream(
        streamName = "filtered-events",
        consumerGroup = "embedding-example",
        consumer = "embedding-1",
        handlers = listOf(deduplicate(bloomFilterName), printUri, createEmbedding),
        ackFunction = ackAndBfFn(bloomFilterName),
        count = 100,
        limit = 100
    )
}

embedding-1: Handler stopped processing: at://did:plc:djglyf5epph2tc44lnqsdvfi/app.bsky.feed.post/3lpcmm7wkrc2y already processed
embedding-1: Handler stopped processing: at://did:plc:ug7ad2kiztx2jhpuw2w44sjz/app.bsky.feed.post/3lpcmm6hkqc27 already processed
embedding-1: Handler stopped processing: at://did:plc:ger7kxkktaqoa32b5lawevrb/app.bsky.feed.post/3lpcmlzriy22k already processed
embedding-1: No new messages for 2 seconds. Stopping.


## Creating a Redis Search Index
In this section, we'll create a Redis Search index to make the enriched events searchable. Redis Search is a module that adds full-text search capabilities to Redis. It allows us to search for events based on their text, topics, and other fields.

### Creating the Index Schema in Code
Now we'll create the index schema in code. We'll use the Jedis client to create the schema and the index.

The following schema defines the fields that will be indexed. The schema includes:
- Text fields for full-text search
- Tag fields for exact matching
- Vector fields for semantic search

```
FT.CREATE postIdx ON HASH PREFIX 1 post: SCHEMA
        parentUri     TEXT
        topics        TAG SEPARATOR "|"
        time_us       TEXT
        langs         TAG
        uri           TEXT
        operation     TAG
        did           TAG
        timeUs        NUMERIC
        rkey          TAG
        textEmbedding VECTOR HNSW 6
            DIM 384
            TYPE FLOAT32
            DISTANCE_METRIC COSINE
        rootUri       TEXT
        text          TEXT
```

In [36]:
import redis.clients.jedis.search.IndexDefinition
import redis.clients.jedis.search.IndexOptions
import redis.clients.jedis.search.Schema
import redis.clients.jedis.search.schemafields.VectorField.VectorAlgorithm

val schema = Schema()
    .addTextField("parentUri", 1.0)
    .addTagField("topics", "|")
    .addTextField("time_us", 1.0)
    .addTagField("langs")
    .addTextField("uri", 1.0)
    .addTagField("operation")
    .addTagField("did")
    .addNumericField("timeUs")
    .addTagField("rkey")
    .addHNSWVectorField(
        "textEmbedding",
        mapOf(
            "type" to "FLOAT32",
            "dim" to "384",
            "distance_metric" to "COSINE",
        )
    )
    .addTextField("rootUri", 1.0)
    .addTextField("text", 1.0)

// Define index options (e.g., prefix)
val rule = IndexDefinition()
    .setPrefixes("post:")

// Create the index
try {
    jedis.ftCreate("postIdx", IndexOptions.defaultOptions().setDefinition(rule), schema)
} catch (e: JedisDataException) {
    println("Index already exists")
}

Index already exists


### Searching the Index
Now that we have created the index, we can search for events based on their topics, text, and other fields. In this example, we'll search for events with the topic "Samba".

Redis Search uses a query language similar to SQL. For example, to search for events with the topic "machine_learning", we would use the query `@topics:{machine_learning}`.

Exact Matching Search

In [37]:
//FT.SEARCH postIdx "@topics:{machine_learning}"
val result = jedis.ftSearch(
    "postIdx",
    "@topics:{Samba}"
)

result.documents.forEach { post ->
    println(Event.fromMap(post))
}

Full Text Search

In [38]:
//FT.SEARCH postIdx "@text:Open source"
val result = jedis.ftSearch(
    "postIdx",
    "@text:Open source"
)

result.documents.forEach { post ->
    println(Event.fromMap(post))
}

Vector Similarity Search

In [39]:
import redis.clients.jedis.search.FTSearchParams
import redis.clients.jedis.search.Query

val vector: ByteArray = createEmbedding("How did they do multiligual TTS?")

val queryString = ("* =>[KNN \$K @textEmbedding \$BLOB AS similarityScore]")

val params = mapOf("BLOB" to vector)

var query = Query(queryString)
    .addParam("K", 1)
    .addParam("BLOB", vector)
    .returnFields("uri", "text", "similarityScore")
    .setSortBy("similarityScore", true)
    .dialect(2)

val result = jedis.ftSearch(
    "postIdx",
    query
)

result.documents.forEach { doc ->
    println(Event.fromMap(doc).toString() + "\n")
}

Pre filtering

In [40]:
//FT.SEARCH postIdx "@text:Open source" "@tag:{Samba}" "*=>[KNN 1 @textEmbedding $BLOB AS similarityScore]"
val vector: ByteArray = createEmbedding("Open source is wizardry stuff")

val queryString = "@topics:{Samba} =>[KNN \$K @textEmbedding \$BLOB AS similarityScore]"

val params = mapOf("BLOB" to vector)

var query = Query(queryString)
    .addParam("K", 5) // Top 5 results (if enough from pre filtering)
    .addParam("BLOB", vector)
    .returnFields("uri", "text", "similarityScore")
    .setSortBy("similarityScore", true)
    .dialect(2)

val result = jedis.ftSearch(
    "postIdx",
    query
)

println(result.totalResults)
result.documents.forEach { doc ->
    println(Event.fromMap(doc).toString() + "\n")
}

0
