# 🤖 .NET Interactive Notebook: Hybrid RAG with OpenAI Integration

**Language**: C# (.NET 7+ with .NET Interactive kernel)

### Features:
- Document loading and preprocessing
- Tokenization and chunking
- Semantic embedding via OpenAI API
- BM25 sparse search with Lucene.NET
- Hybrid scoring + query handling
- Final RAG response using OpenAI LLM

In [None]:
// Load required packages
#r "nuget:Lucene.Net, 4.8.0-beta00016"
#r "nuget:Lucene.Net.Analysis.Common, 4.8.0-beta00016"
#r "nuget:Newtonsoft.Json"
#r "nuget:Microsoft.ML"
#r "nuget:System.Net.Http.Json"

## 📄 Load and Chunk Text Document

In [None]:
using System.Text;
using System.Net.Http;
using Newtonsoft.Json;

string LoadTextFile(string path)
{
    return File.ReadAllText(path);
}

List<string> ChunkText(string content, int maxTokens = 200)
{
    var sentences = content.Split(new[] { ".", "\n" }, StringSplitOptions.RemoveEmptyEntries);
    var chunks = new List<string>();
    var sb = new StringBuilder();
    foreach (var sentence in sentences)
    {
        sb.Append(sentence.Trim() + ". ");
        if (sb.Length >= maxTokens * 4) // rough estimate
        {
            chunks.Add(sb.ToString().Trim());
            sb.Clear();
        }
    }
    if (sb.Length > 0) chunks.Add(sb.ToString().Trim());
    return chunks;
}

var content = LoadTextFile("data/sample.txt");
var chunks = ChunkText(content);

## 🧠 Generate Embeddings using OpenAI API

In [None]:
record OpenAIEmbeddingRequest(string model, List<string> input);
record OpenAIEmbeddingResponse(List<EmbeddingData> data);
record EmbeddingData(int index, List<float> embedding);

async Task<List<List<float>>> GetEmbeddings(List<string> texts, string apiKey)
{
    using var client = new HttpClient();
    client.DefaultRequestHeaders.Add("Authorization", $"Bearer {apiKey}");
    var request = new OpenAIEmbeddingRequest("text-embedding-ada-002", texts);
    var response = await client.PostAsJsonAsync("https://api.openai.com/v1/embeddings", request);
    var result = await response.Content.ReadFromJsonAsync<OpenAIEmbeddingResponse>();
    return result.data.Select(d => d.embedding).ToList();
}

var apiKey = Environment.GetEnvironmentVariable("OPENAI_API_KEY");
var embeddings = await GetEmbeddings(chunks, apiKey);

## 🔍 Setup Lucene BM25 Index

In [None]:
using Lucene.Net.Store;
using Lucene.Net.Index;
using Lucene.Net.Documents;
using Lucene.Net.Analysis.Standard;
using Lucene.Net.Search;
using Lucene.Net.QueryParsers.Classic;

var dir = new RAMDirectory();
var analyzer = new StandardAnalyzer(Lucene.Net.Util.LuceneVersion.LUCENE_48);
var config = new IndexWriterConfig(Lucene.Net.Util.LuceneVersion.LUCENE_48, analyzer);
var writer = new IndexWriter(dir, config);

for (int i = 0; i < chunks.Count; i++)
{
    var doc = new Document
    {
        new TextField("content", chunks[i], Field.Store.YES),
        new StringField("id", i.ToString(), Field.Store.YES)
    };
    writer.AddDocument(doc);
}
writer.Commit();

## 🔀 Hybrid Search: BM25 + Vector Similarity

In [None]:
double CosineSim(List<float> v1, List<float> v2)
{
    double dot = 0, mag1 = 0, mag2 = 0;
    for (int i = 0; i < v1.Count; i++)
    {
        dot += v1[i] * v2[i];
        mag1 += v1[i] * v1[i];
        mag2 += v2[i] * v2[i];
    }
    return dot / (Math.Sqrt(mag1) * Math.Sqrt(mag2) + 1e-8);
}

async Task<List<string>> HybridSearch(string query, int k = 5)
{
    var reader = DirectoryReader.Open(dir);
    var searcher = new IndexSearcher(reader);
    var parser = new QueryParser(Lucene.Net.Util.LuceneVersion.LUCENE_48, "content", analyzer);
    var luceneQuery = parser.Parse(QueryParser.Escape(query));
    var hits = searcher.Search(luceneQuery, 10).ScoreDocs;

    var qEmbed = (await GetEmbeddings(new List<string> { query }, apiKey))[0];
    var scored = hits.Select(h =>
    {
        var doc = searcher.Doc(h.Doc);
        var idx = int.Parse(doc.Get("id"));
        var score = CosineSim(qEmbed, embeddings[idx]);
        return (chunk: chunks[idx], score);
    }).OrderByDescending(x => x.score).Take(k).ToList();
    return scored.Select(x => x.chunk).ToList();
}

// var results = await HybridSearch("licensing requirements");
// results.ForEach(r => Console.WriteLine("---\n" + r));