# Cosine Similarity - Near Search

- Perform a near search using cosine similarity

### Importt packages

In [65]:
#r "nuget: dotenv.net"

using System;
using System.Net.Http;

using dotenv.net;
DotEnv.Load();

var apiKey=Environment.GetEnvironmentVariable("OPENAI_KEY");
var adaEndpoint=Environment.GetEnvironmentVariable("OPENAI_ADA_FULLURI");

record Data(string @object, int index, double[] embedding);
record EmbeddingResponse(string @bject, Data[] data);

HttpClient client = new();
// Json content
client.DefaultRequestHeaders.TryAddWithoutValidation("Content-Type", "application/json");
client.DefaultRequestHeaders.TryAddWithoutValidation("api-key", apiKey);


### Get an ADA text embedding

In [66]:
async Task<double[]> GetEmbedding(string input) 
{
    var content = new { input = input};
    var json = System.Text.Json.JsonSerializer.Serialize(content);
    var stringContent = new StringContent(json, System.Text.Encoding.UTF8, "application/json");    
    var request = await client.PostAsync(adaEndpoint, stringContent);
    if (request.IsSuccessStatusCode)
    {
        var response = await request.Content.ReadAsStringAsync();
        var adares = System.Text.Json.JsonSerializer.Deserialize<EmbeddingResponse>(response);
        return adares.data[0].embedding;
    }
    Console.WriteLine(request.StatusCode);
    return [];
}

### Calculate the Cosine Similarity

In [67]:
double CosineSimilarity(double[] vectorA, double[] vectorB)
{
    double dotProduct = 0.0;
    double magnitudeA = 0.0;
    double magnitudeB = 0.0;
    for (int i = 0; i < vectorA.Length; i++)
    {
        dotProduct += (vectorA[i] * vectorB[i]);
        magnitudeA += Math.Pow(vectorA[i], 2);
        magnitudeB += Math.Pow(vectorB[i], 2);
    }
    magnitudeA = Math.Sqrt(magnitudeA);
    magnitudeB = Math.Sqrt(magnitudeB);
    if (magnitudeA != 0 && magnitudeB != 0)
    {
        return dotProduct / (magnitudeA * magnitudeB);
    }
    else
    {
        return 0.0;
    }
}

### Prepare the mock vector database

In [68]:
List<string> content = [
    "The chemical composition of water is H2O.",
    "The speed of light is 300,000 km/s.",
    "Acceleration of gravity on earth is 9.8m/s^2.",
    "The chemical composition of salt or sodium clorida is NaCl.",
];

List<Tuple<string, double[]>> vectorDb = new();
foreach (var item in content)
{
    var embedding = await GetEmbedding(item);
    vectorDb.Add(new Tuple<string, double[]>(item, embedding));
}

index,value
,
,
,
,
0,"(The chemical composition of water is H2O., System.Double[])Item1The chemical composition of water is H2O.Item2[ 0.022812204, 0.033907063, -0.005532173, -0.027023125, -0.006658137, -0.0070243035, -0.019748608, -0.035029978, -0.00016563336, -0.02624197, -0.0045526763, 0.03317473, -0.009642398, 0.007903105, -0.0069815842, -0.006584903, 0.018735545, -0.0050714132, 0.027828693, -0.00016267732 ... (1516 more) ]"
,
Item1,The chemical composition of water is H2O.
Item2,"[ 0.022812204, 0.033907063, -0.005532173, -0.027023125, -0.006658137, -0.0070243035, -0.019748608, -0.035029978, -0.00016563336, -0.02624197, -0.0045526763, 0.03317473, -0.009642398, 0.007903105, -0.0069815842, -0.006584903, 0.018735545, -0.0050714132, 0.027828693, -0.00016267732 ... (1516 more) ]"
1,"(The speed of light is 300,000 km/s., System.Double[])Item1The speed of light is 300,000 km/s.Item2[ -0.0035969934, 0.00456386, -0.0021389208, 0.0022228612, -0.0011285294, -0.0073494315, -0.024125038, -0.036013458, -0.0021933266, -0.025990376, 0.016079213, 0.0428779, -0.01580563, -0.02132703, -0.0075919256, 0.0103526255, 0.0053690644, -0.015793195, 0.008866573, -0.0046446915 ... (1516 more) ]"
,

Unnamed: 0,Unnamed: 1
Item1,The chemical composition of water is H2O.
Item2,"[ 0.022812204, 0.033907063, -0.005532173, -0.027023125, -0.006658137, -0.0070243035, -0.019748608, -0.035029978, -0.00016563336, -0.02624197, -0.0045526763, 0.03317473, -0.009642398, 0.007903105, -0.0069815842, -0.006584903, 0.018735545, -0.0050714132, 0.027828693, -0.00016267732 ... (1516 more) ]"

Unnamed: 0,Unnamed: 1
Item1,"The speed of light is 300,000 km/s."
Item2,"[ -0.0035969934, 0.00456386, -0.0021389208, 0.0022228612, -0.0011285294, -0.0073494315, -0.024125038, -0.036013458, -0.0021933266, -0.025990376, 0.016079213, 0.0428779, -0.01580563, -0.02132703, -0.0075919256, 0.0103526255, 0.0053690644, -0.015793195, 0.008866573, -0.0046446915 ... (1516 more) ]"

Unnamed: 0,Unnamed: 1
Item1,Acceleration of gravity on earth is 9.8m/s^2.
Item2,"[ 0.018518053, -0.013706567, -0.007618186, -0.028572824, 0.008395426, 0.0056380746, -0.037949055, -0.027955968, 0.00073598773, -0.024304174, -0.0042964104, 0.03671534, 0.0038337675, -0.009573624, -0.007914278, -0.0052031903, 0.011152778, 0.01067163, -0.007359106, -0.011202127 ... (1516 more) ]"

Unnamed: 0,Unnamed: 1
Item1,The chemical composition of salt or sodium clorida is NaCl.
Item2,"[ 0.030546002, 0.023610748, -0.0020088435, -0.027998358, 0.005574581, 6.634492E-05, -0.032604706, -0.034714874, -0.003567346, -0.01159307, -0.0019268171, 0.029568119, -0.014372319, 0.03440607, -0.011805374, 0.0019107335, 0.021178905, -0.018077984, 0.0128733255, -0.0025299527 ... (1516 more) ]"


### Embed the question

In [69]:
var input = "What is the speed of light?";
var e1 = await GetEmbedding(input);


### Perform Nearest search

In [70]:
const int limit=3;
const double relevance=0.5;
var count = 0;
List<Tuple<string, double[]>> resultsList = new();
foreach(var item in vectorDb)
{
    var sim = CosineSimilarity(e1, item.Item2);
    if(sim>relevance)
    {
        //Console.WriteLine($"{item.Item1} - {sim}");
        resultsList.Add(item);
        count++;
    }
    if(count>=limit)
    {
        break;
    }
}

### Print Results

In [71]:
resultsList = resultsList.OrderByDescending(x => CosineSimilarity(e1, x.Item2)).ToList();
foreach(var item in resultsList)
{
    Console.WriteLine($"Similarity: {CosineSimilarity(e1, item.Item2)}, Content: {item.Item1}");
}

Similarity: 0.9076484494720922 Content: The speed of light is 300,000 km/s.
Similarity: 0.8044927978430234 Content: Acceleration of gravity on earth is 9.8m/s^2.
Similarity: 0.7380432345349796 Content: The chemical composition of water is H2O.
