# Analyzing the Data

In this notebook, we'll analyze the enriched events from the previous notebook. We'll use a combination of techniques to analyze the data:

1. Querying Redis for trending topics
2. Semantic search using embeddings
3. Summarization of posts using a Large Language Model (LLM)
4. Creating a simple query router to handle different types of user queries

This notebook demonstrates how to build a simple question-answering system on top of the enriched data.

In [1]:
import dev.raphaeldelio.*

In [2]:
%use coroutines

In [3]:
import io.ktor.client.HttpClient
import io.ktor.client.engine.cio.CIO
import io.ktor.client.plugins.contentnegotiation.ContentNegotiation
import io.ktor.serialization.kotlinx.json.json
import kotlinx.serialization.json.Json

val client = HttpClient(CIO) {
    install(ContentNegotiation) {
        json(Json {
            ignoreUnknownKeys = true
        })
    }
}

In [4]:
val API_URL = "https://bsky.social/xrpc"
val USERNAME = "devbubble.bsky.social"
val PASSWORD = System.getenv("DEVBUBBLE_TOKEN")

In [5]:
import kotlinx.serialization.SerialName
import kotlinx.serialization.Serializable

@Serializable
data class LoginResponse(
    @SerialName("accessJwt") val accessJwt: String,
    @SerialName("refreshJwt") val refreshJwt: String,
    @SerialName("handle") val handle: String,
    @SerialName("did") val did: String,
    @SerialName("didDoc") val didDoc: DidDoc?,
    @SerialName("email") val email: String?,
    @SerialName("emailConfirmed") val emailConfirmed: Boolean?,
    @SerialName("emailAuthFactor") val emailAuthFactor: Boolean?,
    @SerialName("active") val active: Boolean,
    @SerialName("status") val status: String? = null
)

@Serializable
data class DidDoc(
    @SerialName("id") val id: String?
)

In [6]:
import io.ktor.client.call.body
import io.ktor.client.request.HttpRequestBuilder
import io.ktor.client.request.headers
import io.ktor.client.request.post
import io.ktor.client.request.setBody
import io.ktor.http.ContentType
import io.ktor.http.HeadersBuilder
import io.ktor.http.HttpStatusCode
import io.ktor.http.contentType

suspend fun getAccessToken(): String {
    val response = client.post("$API_URL/com.atproto.server.createSession") {
        contentType(ContentType.Application.Json)
        setBody(
            mapOf(
                "identifier" to USERNAME,
                "password" to PASSWORD
            )
        )
    }

    return if (response.status == HttpStatusCode.OK) {
        val result: LoginResponse = response.body()
        jedisPooled.set("mainDid", result.did)
        println("✅ Login successful. DID: ${result.did}")
        result.accessJwt
    } else {
        println("⚠️ Authentication failed: ${response.status}")
        ""
    }
}

In [7]:
var blueskyToken: String
runBlocking {
    blueskyToken = getAccessToken()
}

✅ Login successful. DID: did:plc:qdwb7czl4gdbu5go25dza3vo


In [8]:
@Serializable
data class SearchResponse(
    @SerialName("cursor") val cursor: String? = null,
    @SerialName("hitsTotal") val hitsTotal: Int? = null,
    @SerialName("posts") val posts: List<Post>
)

@Serializable
data class Post(
    @SerialName("uri") val uri: String,
    @SerialName("cid") val cid: String,
    @SerialName("author") val author: Author,
    @SerialName("indexedAt") val indexedAt: String,
    @SerialName("record") val record: Record?,
    @SerialName("replyCount") val replyCount: Int? = null,
    @SerialName("repostCount") val repostCount: Int? = null,
    @SerialName("likeCount") val likeCount: Int? = null,
    @SerialName("quoteCount") val quoteCount: Int? = null,

    )

@Serializable
data class Author(
    @SerialName("did") val did: String,
    @SerialName("handle") val handle: String,
    @SerialName("displayName") val displayName: String? = null,
    @SerialName("avatar") val avatar: String? = null
)

@Serializable
data class Record(
    @SerialName("text") val text: String? = null,
    @SerialName("embed") val embed: Embed? = null,
    @SerialName("createdAt") val createdAt: String
)

@Serializable
data class Embed(
    @SerialName("images") val images: List<Image>? = null
)

@Serializable
data class Image(
    @SerialName("thumb") val thumb: String? = null, // Nullable to handle missing values
    @SerialName("fullsize") val fullsize: String? = null,
    @SerialName("alt") val alt: String? = null // Alt text is also optional
)

In [9]:
import io.ktor.client.*
import io.ktor.client.request.*
import io.ktor.client.statement.*
import io.ktor.client.call.*
import io.ktor.http.*

import java.time.Instant
import java.time.temporal.ChronoUnit

suspend fun searchPosts(sinceTime: String, term: String): List<Post> {
    val allPosts = mutableListOf<Post>()
    var cursor: String? = null

    println("🔍 Searching posts with tag: $term since: $sinceTime")
    do {
        val response: HttpResponse = client.get("$API_URL/app.bsky.feed.searchPosts") {
            headers {
                append("Authorization", "Bearer $blueskyToken")
            }
            parameter("q", term)
            parameter("sort", "latest")
            parameter("limit", 100)
            parameter("since", sinceTime)
            if (cursor != null) {
                parameter("cursor", cursor)
            }
        }

        if (response.status == HttpStatusCode.OK) {
            val result: SearchResponse = response.body()
            val posts = result.posts
            println("✅ Retrieved ${posts.size} posts. Total so far: ${allPosts.size + posts.size}.")
            allPosts.addAll(posts)
            cursor = result.cursor
        } else {
            println("⚠️ Failed to fetch posts. Status: ${response.status}")
            println(response.bodyAsText())
            break
        }
    } while (cursor != null)

    println("🎉 Finished fetching posts. Total retrieved: ${allPosts.size}.")
    return allPosts
}

In [10]:
val sinceTime = Instant.now().minus(15, ChronoUnit.HOURS).toString()
runBlocking {
    val posts = searchPosts(sinceTime, "@devbubble.bsky.social")
    posts.forEach { post ->
        post.record?.text?.replace("@devbubble.bsky.social", "")?.trim()!!.let { cleanedPost ->
            println(processUserRequest(cleanedPost, multiHandler))
        }
    }
}

🔍 Searching posts with tag: @devbubble.bsky.social since: 2025-05-19T02:29:39.640411Z
✅ Retrieved 1 posts. Total so far: 1.
🎉 Finished fetching posts. Total retrieved: 1.
What's being said about Trump and Angela Merkel
summarization
0.8182173371315002
0.8

[summarization]
[ Donald Trump, Angela Merkel, US Politics, German Politics]
 People are discussing how Trump is compared to Merkel's leadership. Some highlight her calm demeanor and long-term vision, suggesting a positive comparison in terms of effective governance amidst political instability.


In [11]:
@Serializable
data class ReplyRef(
    val root: PostRef,
    val parent: PostRef
)

@Serializable
data class PostRef(
    val cid: String,
    val uri: String
)

@Serializable
data class PostRecord(
    val `$type`: String = "app.bsky.feed.post",
    val text: String,
    val createdAt: String,
    val reply: ReplyRef? = null
)

@Serializable
data class PostRequest(
    val repo: String,
    val collection: String,
    val record: PostRecord
)

suspend fun createPost(
    text: String,
    replyToUri: String? = null,
    replyToCid: String? = null
): Boolean {
    val replyRef = if (replyToUri != null && replyToCid != null) {
        ReplyRef(
            root = PostRef(uri = replyToUri, cid = replyToCid),
            parent = PostRef(uri = replyToUri, cid = replyToCid)
        )
    } else {
        null
    }

    val record = PostRecord(
        text = text,
        createdAt = Instant.now().toString(),
        reply = replyRef
    )

    val response: HttpResponse = client.post("$API_URL/com.atproto.repo.createRecord") {
        headers {
            append("Authorization", "Bearer $blueskyToken")
            contentType(ContentType.Application.Json)
        }
        setBody(
            PostRequest(
                repo = "did:plc:qdwb7czl4gdbu5go25dza3vo",
                collection = "app.bsky.feed.post",
                record = record
            )
        )
    }

    return if (response.status == HttpStatusCode.OK || response.status == HttpStatusCode.Accepted) {
        println("✅ Post created${if (replyRef != null) " (as reply)" else ""}!")
        true
    } else {
        println("❌ Failed to create post: ${response.status}")
        println(response.bodyAsText())
        false
    }
}

In [12]:
runBlocking {
    createPost("test")
}

✅ Post created!


true

In [13]:
fun splitIntoChunks(text: String, maxLength: Int = 300): List<String> {
    val words = text.split(Regex("\\s+"))
    val chunks = mutableListOf<String>()
    var current = StringBuilder()

    for (word in words) {
        if (current.length + word.length + 1 > maxLength) {
            chunks.add(current.toString().trim())
            current = StringBuilder()
        }
        current.append(word).append(' ')
    }

    if (current.isNotEmpty()) {
        chunks.add(current.toString().trim())
    }

    return chunks
}

In [14]:
val sinceTime = Instant.now().minus(15, ChronoUnit.HOURS).toString()
runBlocking {
    val posts = searchPosts(sinceTime, "@devbubble.bsky.social")
    posts.forEach { post ->
        post.record?.text?.replace("@devbubble.bsky.social", "")?.trim()!!.let { cleanedPost ->
            val handle = post.author.handle
            val response = "@$handle ${processUserRequest(cleanedPost, multiHandler)}"
            val chunks = splitIntoChunks(response)
            var lastUri = post.uri
            var lastCid = post.cid

            chunks.forEach { chunk ->
                createPost(
                    chunk,
                    post.uri,
                    post.cid
                )
                lastUri = post.uri
                lastCid = post.cid
            }
        }
    }
}

🔍 Searching posts with tag: @devbubble.bsky.social since: 2025-05-19T02:29:53.025798Z
✅ Retrieved 1 posts. Total so far: 1.
🎉 Finished fetching posts. Total retrieved: 1.
What's being said about Trump and Angela Merkel
summarization
0.8182173371315002
0.8

[summarization]
[ Donald Trump, Angela Merkel, US Foreign Policy, German Politics]
✅ Post created (as reply)!
✅ Post created (as reply)!
