# Azure Cosmos DB Data Seeding

This notebook is used to seed Azure Cosmos DB with sample data for the Contoso Travel Agent application.

## Setup

Install required packages and configure the Cosmos DB connection.

**Step 1**: Install NuGet packages

**IMPORTANT**: If you get version conflict errors, restart the kernel first (click "Clear All Outputs" or restart the notebook kernel).

We'll use:
- `Microsoft.Azure.Cosmos` - Azure Cosmos DB SDK for .NET
- `DotNetEnv` - Load environment variables from .env file
- `System.Text.Json` - JSON serialization
- `Azure.AI.OpenAI` - Azure OpenAI for embeddings
- `Azure.Identity` - Authentication

In [None]:
#r "nuget: DotNetEnv, 3.1.0"
#r "nuget: System.Text.Json, 8.0.0"
#r "nuget: Azure.Identity, 1.13.1"
#r "nuget: Azure.AI.OpenAI, 2.1.0"
#r "nuget: Microsoft.Azure.Cosmos, 3.43.1"
#r "nuget: Newtonsoft.Json, 13.0.3"

**Step 2**: Load environment variables from .env file

The .env file in the root directory contains connection strings and configuration.

In [None]:
using DotNetEnv;
using System.IO;

var envFilePath = Path.Combine(Directory.GetCurrentDirectory(), "..", ".env");
if (File.Exists(envFilePath))
{
    Env.Load(envFilePath);
    Console.WriteLine($"Loaded environment variables from .env");
}
else
{
    Console.WriteLine($"No .env file found at {envFilePath}");
}

var cosmosEndpoint = Environment.GetEnvironmentVariable("COSMOS_DB_ENDPOINT");
var databaseName = Environment.GetEnvironmentVariable("COSMOS_DB_DATABASE_NAME");
var containerName = Environment.GetEnvironmentVariable("COSMOS_DB_CHAT_HISTORY_CONTAINER");

Console.WriteLine($"Cosmos DB Endpoint: {cosmosEndpoint}");
Console.WriteLine($"Database: {databaseName}");
Console.WriteLine($"Container: {containerName}");

**Step 3**: Connect to Cosmos DB

Establish connection using the connection string or endpoint/key pair.

In [None]:
using Microsoft.Azure.Cosmos;
using System.Diagnostics;

CosmosClient client = null;
var cosmosConnectionString = Environment.GetEnvironmentVariable("COSMOS_DB_CONNECTION_STRING");

if (!string.IsNullOrEmpty(cosmosConnectionString))
{
    client = new CosmosClient(cosmosConnectionString);
    Console.WriteLine("Connected using connection string");
}
else
{
    // Get key from Azure CLI
    var accountName = cosmosEndpoint.Replace("https://", "").Split('.')[0];
    var resourceGroup = Environment.GetEnvironmentVariable("AZURE_RESOURCE_GROUP");
    
    var process = Process.Start(new ProcessStartInfo
    {
        FileName = "az",
        Arguments = $"cosmosdb keys list --name {accountName} --resource-group {resourceGroup} --query primaryMasterKey -o tsv",
        RedirectStandardOutput = true,
        UseShellExecute = false,
        CreateNoWindow = true
    });
    
    var primaryKey = (await process.StandardOutput.ReadToEndAsync()).Trim();
    await process.WaitForExitAsync();
    
    client = new CosmosClient(cosmosEndpoint, primaryKey);
    Console.WriteLine("Connected using endpoint and key");
}

var container = client.GetContainer(databaseName, containerName);
Console.WriteLine($"Container reference obtained: {containerName}");

In [None]:
using Azure.AI.OpenAI;
using Azure.Identity;

var azureAIEndpoint = Environment.GetEnvironmentVariable("AZURE_AI_FOUNDRY_SERVICE_ENDPOINT");
var embeddingModelName = Environment.GetEnvironmentVariable("AZURE_EMBEDDING_MODEL_NAME") ?? "text-embedding-ada-002";

var credential = new DefaultAzureCredential();
var azureOpenAIClient = new AzureOpenAIClient(new Uri(azureAIEndpoint), credential);
var embeddingClient = azureOpenAIClient.GetEmbeddingClient(embeddingModelName);

Console.WriteLine($"Azure OpenAI Embedding Client configured");
Console.WriteLine($"Model: {embeddingModelName}");
Console.WriteLine($"Endpoint: {azureAIEndpoint}");

**Step 3b**: Configure Azure OpenAI Embedding Client

Set up the Azure OpenAI client to generate vector embeddings for chat content. These embeddings enable semantic search capabilities.

**Step 4**: Insert Chat History Seed Data

Load seed data from chat_history.json and insert it into Cosmos DB with real embeddings generated by Azure OpenAI.

In [None]:
using System.Text.Json;
using Newtonsoft.Json.Linq;

var seedDataPath = Path.Combine(Directory.GetCurrentDirectory(), "..", "data", "chat_history.json");
if (!File.Exists(seedDataPath))
{
    Console.WriteLine($"Seed data file not found: {seedDataPath}");
}
else
{
    var seedDataJson = await File.ReadAllTextAsync(seedDataPath);
    var seedData = JArray.Parse(seedDataJson);
    
    Console.WriteLine($"Loaded {seedData.Count} records from chat_history.json");
    Console.WriteLine("Inserting records into Cosmos DB ...\n");
    
    foreach (JObject item in seedData)
    {
        // Generate real embedding using Azure OpenAI
        var content = item["Content"].ToString();
        var embeddingResponse = await embeddingClient.GenerateEmbeddingAsync(content);
        var embedding = embeddingResponse.Value.ToFloats().ToArray();
        
        // Build the record with embedding
        var record = new JObject
        {
            ["id"] = item["id"],
            ["ApplicationId"] = item["ApplicationId"],
            ["UserId"] = item["UserId"],
            ["ThreadId"] = item["ThreadId"],
            ["Role"] = item["Role"],
            ["Content"] = content,
            ["ContentEmbedding"] = new JArray(embedding),
            ["CreatedAt"] = DateTime.UtcNow.AddDays(-(int)(item["DaysAgo"] ?? 0)).AddMinutes((int)(item["MinutesOffset"] ?? 0)),
            ["_ts"] = DateTimeOffset.UtcNow.ToUnixTimeSeconds()
        };
        
        try
        {
            await container.CreateItemAsync(record, new PartitionKey(record["ApplicationId"].ToString()));
            var preview = content.Length > 50 ? content.Substring(0, 50) + "..." : content;
            Console.WriteLine($"  Inserted: {record["Role"]} - {preview}");
        }
        catch (CosmosException ex) when (ex.StatusCode == System.Net.HttpStatusCode.Conflict)
        {
            Console.WriteLine($"  Already exists: {record["id"]} - {record["Role"]}");
        }
    }
    
    Console.WriteLine("\nSeed data loading complete with real embeddings!");
}

---

**Step 5**: Now let's seed the Flights container with sample flight data. This data will be used by the travel agent to search and book flights.

Get reference to the Flights container, then load flight records from flights_data.json and insert them into Cosmos DB.

In [None]:
using System.Text.Json;
using Newtonsoft.Json.Linq;

// Get Flights container reference
var flightsContainerName = Environment.GetEnvironmentVariable("COSMOS_DB_FLIGHTS_CONTAINER") ?? "Flights";
var flightsContainer = client.GetContainer(databaseName, flightsContainerName);
Console.WriteLine($"Flights container: {flightsContainerName}\n");

// Load and insert flight data
var flightsDataPath = Path.Combine(Directory.GetCurrentDirectory(), "..", "data", "flights_data.json");

if (!File.Exists(flightsDataPath))
{
    Console.WriteLine($"Flight data file not found: {flightsDataPath}");
}
else
{
    var flightsJson = await File.ReadAllTextAsync(flightsDataPath);
    var flights = JArray.Parse(flightsJson);
    
    Console.WriteLine($"Loaded {flights.Count} flight records");
    Console.WriteLine("Inserting into Cosmos DB with vector embeddings ...\n");
    
    var (inserted, existing, vectorized) = (0, 0, 0);
    
    foreach (JObject flight in flights)
    {
        try
        {
            var flightId = flight["id"].ToString();
            var flightNumber = flight["flightNumber"].ToString();
            
            // Generate vector embedding for flightProfile if it exists
            if (flight["flightProfile"] != null)
            {
                var flightProfile = flight["flightProfile"].ToString();
                var embeddingResponse = await embeddingClient.GenerateEmbeddingAsync(flightProfile);
                var embedding = embeddingResponse.Value.ToFloats().ToArray();
                
                // Add the vector embedding to the flight document
                flight["flightProfileVector"] = new JArray(embedding);
                vectorized++;
            }
            
            await flightsContainer.CreateItemAsync(
                flight,
                new PartitionKey(flightId));
            
            inserted++;
        }
        catch (CosmosException ex) when (ex.StatusCode == System.Net.HttpStatusCode.Conflict)
        {
            Console.WriteLine($"  Already exists: {flight["flightNumber"]}");
            existing++;
        }
    }
    
    Console.WriteLine($"\nComplete! Inserted: {inserted} | Already existed: {existing} | Vectorized: {vectorized}");
}