# Movie Search with Azure OpenAI

## Install Milvus C# SDK

In [1]:
#r "nuget: Milvus.Client, 2.2.2-preview.5"
#r "nuget: Azure.AI.OpenAI, 1.0.0-beta.7"

In [2]:
using Milvus.Client;
using Azure.AI.OpenAI;

## Initialize Milvus Client

In [3]:
var milvusClient = new MilvusClient("localhost", username: "username", password: "password");

## Cleanup (optional)

In [4]:
await milvusClient.GetCollection("movies").DropAsync()

## List collections

In [5]:
var collections = milvusClient.GetCollection("movies");

In [6]:
collections

Unnamed: 0,Unnamed: 1
Name,movies


## Create collection

### Define schema

In [7]:
var schema = new CollectionSchema
{
    Fields =
    {
        FieldSchema.Create<long>("movie_id", isPrimaryKey: true),
        FieldSchema.CreateVarchar("movie_name", maxLength: 200),
        FieldSchema.CreateFloatVector("movie_description", dimension: 1536)
    },
    Description = "Test movie search",
    EnableDynamicFields = true
};

### Create collection

In [8]:
var collection = await milvusClient.CreateCollectionAsync(collectionName: "movies",schema: schema, shardsNum: 2);

In [9]:
await collection.DescribeAsync()

index,value
,
,
,
Aliases,[ ]
CollectionName,movies
CollectionId,444190052365369509
ConsistencyLevel,Session
CreationTimestamp,1694450629307
Schema,Milvus.Client.CollectionSchemaNamemoviesDescriptionTest movie searchFieldsindexvalue0Milvus.Client.FieldSchemaNamemovie_idDataTypeInt64IsPrimaryKeyTrueAutoIdFalseIsPartitionKeyFalseDescriptionIsDynamicFalseMaxLength<null>Dimension<null>StateFieldCreatedFieldId1001Milvus.Client.FieldSchemaNamemovie_nameDataTypeVarCharIsPrimaryKeyFalseAutoIdFalseIsPartitionKeyFalseDescriptionIsDynamicFalseMaxLength200Dimension<null>StateFieldCreatedFieldId1012Milvus.Client.FieldSchemaNamemovie_descriptionDataTypeFloatVectorIsPrimaryKeyFalseAutoIdFalseIsPartitionKeyFalseDescriptionIsDynamicFalseMaxLength<null>Dimension1536StateFieldCreatedFieldId102EnableDynamicFieldsFalse
,

index,value
,
,
,
Name,movies
Description,Test movie search
Fields,indexvalue0Milvus.Client.FieldSchemaNamemovie_idDataTypeInt64IsPrimaryKeyTrueAutoIdFalseIsPartitionKeyFalseDescriptionIsDynamicFalseMaxLength<null>Dimension<null>StateFieldCreatedFieldId1001Milvus.Client.FieldSchemaNamemovie_nameDataTypeVarCharIsPrimaryKeyFalseAutoIdFalseIsPartitionKeyFalseDescriptionIsDynamicFalseMaxLength200Dimension<null>StateFieldCreatedFieldId1012Milvus.Client.FieldSchemaNamemovie_descriptionDataTypeFloatVectorIsPrimaryKeyFalseAutoIdFalseIsPartitionKeyFalseDescriptionIsDynamicFalseMaxLength<null>Dimension1536StateFieldCreatedFieldId102
index,value
0,Milvus.Client.FieldSchemaNamemovie_idDataTypeInt64IsPrimaryKeyTrueAutoIdFalseIsPartitionKeyFalseDescriptionIsDynamicFalseMaxLength<null>Dimension<null>StateFieldCreatedFieldId100
,
Name,movie_id

index,value
,
,
,
0,Milvus.Client.FieldSchemaNamemovie_idDataTypeInt64IsPrimaryKeyTrueAutoIdFalseIsPartitionKeyFalseDescriptionIsDynamicFalseMaxLength<null>Dimension<null>StateFieldCreatedFieldId100
,
Name,movie_id
DataType,Int64
IsPrimaryKey,True
AutoId,False
IsPartitionKey,False

Unnamed: 0,Unnamed: 1
Name,movie_id
DataType,Int64
IsPrimaryKey,True
AutoId,False
IsPartitionKey,False
Description,
IsDynamic,False
MaxLength,<null>
Dimension,<null>
State,FieldCreated

Unnamed: 0,Unnamed: 1
Name,movie_name
DataType,VarChar
IsPrimaryKey,False
AutoId,False
IsPartitionKey,False
Description,
IsDynamic,False
MaxLength,200
Dimension,<null>
State,FieldCreated

Unnamed: 0,Unnamed: 1
Name,movie_description
DataType,FloatVector
IsPrimaryKey,False
AutoId,False
IsPartitionKey,False
Description,
IsDynamic,False
MaxLength,<null>
Dimension,1536
State,FieldCreated


## Add data to collection

### Configure AOAI Client

In [10]:
var AOAI_ENDPOINT = Environment.GetEnvironmentVariable("AOAI_ENDPOINT");
var AOAI_KEY = Environment.GetEnvironmentVariable("AOAI_KEY");
var AOAI_DEPLOYMENTID = Environment.GetEnvironmentVariable("AOAI_DEPLOYMENTID");

In [11]:
var endpoint = new Uri(AOAI_ENDPOINT);
var credentials = new Azure.AzureKeyCredential(AOAI_KEY);
var openAIClient = new OpenAIClient(endpoint, credentials);

In [12]:
record Movie(long Id, string Name, string Description);
record MovieEmbedding(long Id, string Name, ReadOnlyMemory<float> DescriptionEmbedding);

In [13]:
var movies = new Movie[] {
    new Movie(1L, "The Lion King", "The Lion King is a classic Disney animated film that tells the story of a young lion named Simba who embarks on a journey to reclaim his throne as the king of the Pride Lands after the tragic death of his father."),
    new Movie(2L, "Inception", "Inception is a mind-bending science fiction film directed by Christopher Nolan. It follows the story of Dom Cobb, a skilled thief who specializes in entering people's dreams to steal their secrets. However, he is offered a final job that involves planting an idea into someone's mind."),
    new Movie(3L, "Toy Story", "Toy Story is a groundbreaking animated film from Pixar. It follows the secret lives of toys when their owner, Andy, is not around. Woody and Buzz Lightyear are the main characters in this heartwarming tale."),
    new Movie(4L, "Pulp Fiction", "Pulp Fiction is a crime film directed by Quentin Tarantino. It weaves together interconnected stories of mobsters, hitmen, and other colorful characters in a non-linear narrative filled with dark humor and violence."),
    new Movie(5L, "Shrek", "Shrek is an animated comedy film that follows the adventures of Shrek, an ogre who embarks on a quest to rescue Princess Fiona from a dragon-guarded tower in order to get his swamp back."),
};

### Generate embeddings

In [14]:
float[] GenerateEmbedding(string input)
{
    var embeddingOptions = new EmbeddingsOptions(input);
    Embeddings embeddingResponse = openAIClient.GetEmbeddings(AOAI_DEPLOYMENTID,embeddingOptions);
    return embeddingResponse.Data[0].Embedding.ToArray();
}

In [15]:
var movieEmbeddings =
    movies.Select(movie => {
        var embeddingResponse = GenerateEmbedding(movie.Description);
        var embedding = new ReadOnlyMemory<float>(embeddingResponse);
        return new MovieEmbedding(Id: movie.Id, Name: movie.Name, DescriptionEmbedding: embedding);        
    });

In [16]:
movieEmbeddings

index,value
,
,
,
,
,
0,"MovieEmbedding { Id = 1, Name = The Lion King, DescriptionEmbedding = System.ReadOnlyMemory<Single>[1536] }Id1NameThe Lion KingDescriptionEmbedding[ 0.010041435, -0.023260927, -0.025179984, -0.034249667, -0.00086708984, 0.0322206, -0.023334267, -0.00757233, -0.005338523, -0.01050592, 0.008446296, 0.012895574, 0.028504718, -0.026035614, 0.007517325, -0.0011084998, 0.02230751, -0.027795767, 0.026280079, -0.0037647742 ... (1516 more) ]"
,
Id,1
Name,The Lion King
DescriptionEmbedding,"[ 0.010041435, -0.023260927, -0.025179984, -0.034249667, -0.00086708984, 0.0322206, -0.023334267, -0.00757233, -0.005338523, -0.01050592, 0.008446296, 0.012895574, 0.028504718, -0.026035614, 0.007517325, -0.0011084998, 0.02230751, -0.027795767, 0.026280079, -0.0037647742 ... (1516 more) ]"

Unnamed: 0,Unnamed: 1
Id,1
Name,The Lion King
DescriptionEmbedding,"[ 0.010041435, -0.023260927, -0.025179984, -0.034249667, -0.00086708984, 0.0322206, -0.023334267, -0.00757233, -0.005338523, -0.01050592, 0.008446296, 0.012895574, 0.028504718, -0.026035614, 0.007517325, -0.0011084998, 0.02230751, -0.027795767, 0.026280079, -0.0037647742 ... (1516 more) ]"

Unnamed: 0,Unnamed: 1
Id,2
Name,Inception
DescriptionEmbedding,"[ 0.009283638, -0.021505617, 0.00025231577, -0.04139008, -0.014058442, 0.02042907, -0.025672488, -0.024494618, -0.0046766484, 0.0053922357, 0.018883908, 0.020023782, 0.005699368, -0.018237978, 0.0049996125, 0.009435621, 0.03513344, 0.006674594, -0.010436177, -0.009739587 ... (1516 more) ]"

Unnamed: 0,Unnamed: 1
Id,3
Name,Toy Story
DescriptionEmbedding,"[ -0.017549831, -0.041784104, -0.010693833, -0.02706817, -0.0026464537, -0.011907453, -0.004530424, -0.004346157, -0.00059569033, -0.02025665, 0.0076820226, -0.002994337, 0.004412874, -0.001655225, -0.0011572278, 0.012110782, 0.019354377, 0.0018315493, 0.017054217, -0.030168938 ... (1516 more) ]"

Unnamed: 0,Unnamed: 1
Id,4
Name,Pulp Fiction
DescriptionEmbedding,"[ -0.0013762183, -0.031044293, -0.011941586, -0.02680634, -0.013722292, 0.0070462376, 0.0045379302, -0.00040827808, -0.01810066, -0.017807068, 0.02081959, 0.021534426, 0.0082972, -0.012152207, 0.010014082, -0.01418183, 0.03849901, -0.004493253, 0.0006230878, -0.039111726 ... (1516 more) ]"

Unnamed: 0,Unnamed: 1
Id,5
Name,Shrek
DescriptionEmbedding,"[ 0.009915214, -0.06572093, -0.022210566, -0.014778709, 0.009235175, -0.005179227, -0.013163616, -0.03987944, -0.006053563, -0.017838886, 0.0307232, 0.009417328, 0.03290904, 0.0012530633, 0.01266573, -0.004769382, 0.025962925, -0.02027974, -0.0011020278, -0.027055845 ... (1516 more) ]"


In [17]:
var movieIds = movieEmbeddings.Select(x => x.Id).ToArray();
var movieNames = movieEmbeddings.Select(x => x.Name).ToArray();
var movieDescriptions = movieEmbeddings.Select(x => x.DescriptionEmbedding).ToArray();

In [18]:
await collection.InsertAsync(new FieldData[]
{
    FieldData.Create("movie_id", movieIds),
    FieldData.Create("movie_name", movieNames),
    FieldData.CreateFloatVector("movie_description", movieDescriptions)
});

## Persist data

In [19]:
await collection.FlushAsync();

In [20]:
await collection.GetEntityCountAsync()

## Search for data

### Create Index

In [21]:
await collection.CreateIndexAsync(
    fieldName: "movie_description", 
    indexType: IndexType.Flat, 
    metricType: SimilarityMetricType.L2, 
    extraParams: new Dictionary<string,string> {["nlist"] = "1024"}, 
    indexName: "movie_idx");

### Load collection

In [22]:
await collection.LoadAsync();
await collection.WaitForCollectionLoadAsync();

### Define search parameters

In [23]:
var parameters = new SearchParameters
{
    OutputFields = { "movie_name" },
    ConsistencyLevel = ConsistencyLevel.Strong,
    ExtraParameters = { ["nprobe"] = "1024" }
};

### Search for data

Search for data using embedding vectors for the query "A movie that's fun for the whole family".

In [24]:
var queryEmbedding = GenerateEmbedding("A movie that's fun for the whole family");

In [25]:
var results = await collection.SearchAsync(
    vectorFieldName: "movie_description",
    vectors: new ReadOnlyMemory<float>[] { new ReadOnlyMemory<float>(queryEmbedding) },
    SimilarityMetricType.L2,
    limit: 3,
    parameters);

In [26]:
results

index,value
,
,
CollectionName,movies
FieldsData,"indexvalue0Field: {FieldName: movie_name, DataType: VarChar, Data: 3, RowCount: 3}Data[ Toy Story, The Lion King, Shrek ]RowCount3FieldNamemovie_nameFieldId0DataTypeVarCharIsDynamicFalse"
index,value
0,"Field: {FieldName: movie_name, DataType: VarChar, Data: 3, RowCount: 3}Data[ Toy Story, The Lion King, Shrek ]RowCount3FieldNamemovie_nameFieldId0DataTypeVarCharIsDynamicFalse"
,
Data,"[ Toy Story, The Lion King, Shrek ]"
RowCount,3
FieldName,movie_name

index,value
,
0,"Field: {FieldName: movie_name, DataType: VarChar, Data: 3, RowCount: 3}Data[ Toy Story, The Lion King, Shrek ]RowCount3FieldNamemovie_nameFieldId0DataTypeVarCharIsDynamicFalse"
,
Data,"[ Toy Story, The Lion King, Shrek ]"
RowCount,3
FieldName,movie_name
FieldId,0
DataType,VarChar
IsDynamic,False

Unnamed: 0,Unnamed: 1
Data,"[ Toy Story, The Lion King, Shrek ]"
RowCount,3
FieldName,movie_name
FieldId,0
DataType,VarChar
IsDynamic,False

Unnamed: 0,Unnamed: 1
LongIds,"[ 3, 1, 5 ]"
StringIds,<null>


##