# Movie Embeddings

Using ML.NET to apply Principal Component Analysis (PCA) for dimensionality reduction on OpenAI Ada embedding vectors and plotting them in a 2-D scatterplot

## Install packages

In [1]:
#r "nuget: Microsoft.ML, 3.0.0-preview.23266.6"
#r "nuget: Azure.AI.OpenAI, 1.0.0-beta.7"
#r "nuget: Plotly.NET, 4.2.0"
#r "nuget: Plotly.NET.Interactive, 4.2.0"
#r "nuget: Plotly.NET.CSharp, 0.11.1"

Loading extensions from `/home/vscode/.nuget/packages/plotly.net.interactive/4.2.0/interactive-extensions/dotnet/Plotly.NET.Interactive.dll`

In [2]:
using Microsoft.ML;
using Microsoft.ML.Data;
using Azure.AI.OpenAI;
using Plotly.NET.CSharp;

## Configure OpenAI Client

In [3]:
var AOAI_ENDPOINT = Environment.GetEnvironmentVariable("AOAI_ENDPOINT");
var AOAI_KEY = Environment.GetEnvironmentVariable("AOAI_KEY");
var AOAI_DEPLOYMENTID = Environment.GetEnvironmentVariable("AOAI_DEPLOYMENTID");

In [4]:
var endpoint = new Uri(AOAI_ENDPOINT);
var credentials = new Azure.AzureKeyCredential(AOAI_KEY);
var openAIClient = new OpenAIClient(endpoint, credentials);

## Define data types

In [5]:
public record Movie(string Title, string Description);

public class MovieEmbedding
{
    public string Title {get;set;}

    public string Description {get;set;}
    
    [VectorType(1536)]
    public float[] Embedding {get;set;}
}

## Define movie titles

In [6]:
var movies = new Movie[] {
    new Movie("The Lion King", "The Lion King is a classic Disney animated film that tells the story of a young lion named Simba who embarks on a journey to reclaim his throne as the king of the Pride Lands after the tragic death of his father."),
    new Movie("Inception", "Inception is a mind-bending science fiction film directed by Christopher Nolan. It follows the story of Dom Cobb, a skilled thief who specializes in entering people's dreams to steal their secrets. However, he is offered a final job that involves planting an idea into someone's mind."),
    new Movie("Titanic", "Titanic is an epic romance and disaster film directed by James Cameron. It revolves around the ill-fated maiden voyage of the RMS Titanic and the love story between Jack and Rose, two passengers from different social classes."),
    new Movie("Shrek", "Shrek is an animated comedy film that follows the adventures of Shrek, an ogre who embarks on a quest to rescue Princess Fiona from a dragon-guarded tower in order to get his swamp back."),
    new Movie("The Shawshank Redemption", "The Shawshank Redemption is a drama film based on Stephen King's novella. It tells the story of Andy Dufresne, a banker who is wrongfully convicted of murder and his experiences at Shawshank State Penitentiary."),
    new Movie("Toy Story", "Toy Story is a groundbreaking animated film from Pixar. It follows the secret lives of toys when their owner, Andy, is not around. Woody and Buzz Lightyear are the main characters in this heartwarming tale."),
    new Movie("The Dark Knight", "The Dark Knight is a superhero film directed by Christopher Nolan. It is the second installment in Nolan's Batman trilogy and explores the conflict between Batman and the Joker, a chaotic and unpredictable criminal mastermind."),
    new Movie("Jurassic Park", "Jurassic Park is a science fiction adventure film directed by Steven Spielberg. It centers around a theme park populated with genetically resurrected dinosaurs and the chaos that ensues when the dinosaurs escape."),
    new Movie("Pulp Fiction", "Pulp Fiction is a crime film directed by Quentin Tarantino. It weaves together interconnected stories of mobsters, hitmen, and other colorful characters in a non-linear narrative filled with dark humor and violence.")
};

## Generate embeddings

In [7]:
var movieEmbeddings = 
    movies
        .Select(movie => {
            var embeddingOptions = new EmbeddingsOptions(movie.Description);
            Embeddings embeddingResponse = openAIClient.GetEmbeddings(AOAI_DEPLOYMENTID,embeddingOptions);
            var embedding = embeddingResponse.Data[0].Embedding.ToArray();
            return new MovieEmbedding{Title = movie.Title, Description=movie.Description, Embedding = embedding};
        });

In [8]:
movieEmbeddings

index,value
,
,
,
,
,
,
,
,
,
0,"Submission#5+MovieEmbeddingTitleThe Lion KingDescriptionThe Lion King is a classic Disney animated film that tells the story of a young lion named Simba who embarks on a journey to reclaim his throne as the king of the Pride Lands after the tragic death of his father.Embedding[ 0.010041435, -0.023260927, -0.025179984, -0.034249667, -0.00086708984, 0.0322206, -0.023334267, -0.00757233, -0.005338523, -0.01050592, 0.008446296, 0.012895574, 0.028504718, -0.026035614, 0.007517325, -0.0011084998, 0.02230751, -0.027795767, 0.026280079, -0.0037647742 ... (1516 more) ]"

Unnamed: 0,Unnamed: 1
Title,The Lion King
Description,The Lion King is a classic Disney animated film that tells the story of a young lion named Simba who embarks on a journey to reclaim his throne as the king of the Pride Lands after the tragic death of his father.
Embedding,"[ 0.010041435, -0.023260927, -0.025179984, -0.034249667, -0.00086708984, 0.0322206, -0.023334267, -0.00757233, -0.005338523, -0.01050592, 0.008446296, 0.012895574, 0.028504718, -0.026035614, 0.007517325, -0.0011084998, 0.02230751, -0.027795767, 0.026280079, -0.0037647742 ... (1516 more) ]"

Unnamed: 0,Unnamed: 1
Title,Inception
Description,"Inception is a mind-bending science fiction film directed by Christopher Nolan. It follows the story of Dom Cobb, a skilled thief who specializes in entering people's dreams to steal their secrets. However, he is offered a final job that involves planting an idea into someone's mind."
Embedding,"[ 0.009283638, -0.021505617, 0.00025231577, -0.04139008, -0.014058442, 0.02042907, -0.025672488, -0.024494618, -0.0046766484, 0.0053922357, 0.018883908, 0.020023782, 0.005699368, -0.018237978, 0.0049996125, 0.009435621, 0.03513344, 0.006674594, -0.010436177, -0.009739587 ... (1516 more) ]"

Unnamed: 0,Unnamed: 1
Title,Titanic
Description,"Titanic is an epic romance and disaster film directed by James Cameron. It revolves around the ill-fated maiden voyage of the RMS Titanic and the love story between Jack and Rose, two passengers from different social classes."
Embedding,"[ -0.01679535, -0.048044574, -0.011542848, -0.025490457, -0.0035786626, 0.0050594886, -0.015985327, 0.002781295, 0.007961022, -0.021402365, -0.015656253, 0.03845085, 0.012998361, -0.014681693, -0.004426657, 0.0066194185, 0.024933565, -0.012276933, 0.00958107, -0.045563877 ... (1516 more) ]"

Unnamed: 0,Unnamed: 1
Title,Shrek
Description,"Shrek is an animated comedy film that follows the adventures of Shrek, an ogre who embarks on a quest to rescue Princess Fiona from a dragon-guarded tower in order to get his swamp back."
Embedding,"[ 0.009915214, -0.06572093, -0.022210566, -0.014778709, 0.009235175, -0.005179227, -0.013163616, -0.03987944, -0.006053563, -0.017838886, 0.0307232, 0.009417328, 0.03290904, 0.0012530633, 0.01266573, -0.004769382, 0.025962925, -0.02027974, -0.0011020278, -0.027055845 ... (1516 more) ]"

Unnamed: 0,Unnamed: 1
Title,The Shawshank Redemption
Description,"The Shawshank Redemption is a drama film based on Stephen King's novella. It tells the story of Andy Dufresne, a banker who is wrongfully convicted of murder and his experiences at Shawshank State Penitentiary."
Embedding,"[ 0.0014322961, -0.04671103, -0.0132510895, -0.024045024, -0.027580319, 0.016021658, -0.015846148, -0.0004564073, -0.029887037, -0.0070956643, 0.0026671423, 0.004582094, 0.0033159065, 0.013012896, -0.0165858, 0.016360143, 0.028708605, -0.02496019, 0.0007929342, -0.042022377 ... (1516 more) ]"

Unnamed: 0,Unnamed: 1
Title,Toy Story
Description,"Toy Story is a groundbreaking animated film from Pixar. It follows the secret lives of toys when their owner, Andy, is not around. Woody and Buzz Lightyear are the main characters in this heartwarming tale."
Embedding,"[ -0.017549831, -0.041784104, -0.010693833, -0.02706817, -0.0026464537, -0.011907453, -0.004530424, -0.004346157, -0.00059569033, -0.02025665, 0.0076820226, -0.002994337, 0.004412874, -0.001655225, -0.0011572278, 0.012110782, 0.019354377, 0.0018315493, 0.017054217, -0.030168938 ... (1516 more) ]"

Unnamed: 0,Unnamed: 1
Title,The Dark Knight
Description,"The Dark Knight is a superhero film directed by Christopher Nolan. It is the second installment in Nolan's Batman trilogy and explores the conflict between Batman and the Joker, a chaotic and unpredictable criminal mastermind."
Embedding,"[ -0.011673308, -0.031593926, -0.016282832, -0.02168968, -0.0068395366, 0.012570296, 0.002127233, -0.027806642, -0.016631661, -0.0065343115, 0.026187079, 0.008783012, 0.0012240155, -0.0059861518, 0.0024340155, -0.014675727, 0.025402214, -0.012651274, 0.008390579, -0.024654724 ... (1516 more) ]"

Unnamed: 0,Unnamed: 1
Title,Jurassic Park
Description,Jurassic Park is a science fiction adventure film directed by Steven Spielberg. It centers around a theme park populated with genetically resurrected dinosaurs and the chaos that ensues when the dinosaurs escape.
Embedding,"[ 0.005926464, -0.037009142, -0.017391797, -0.028106945, 0.004035372, -0.0005966318, -0.018254511, -0.027106697, 0.0044729803, 0.003017933, -0.0020489437, 0.017166741, 0.005857697, -0.0097149005, 0.017091723, 0.0055076107, 0.030707587, -0.014791154, 0.0094398325, -0.012334297 ... (1516 more) ]"

Unnamed: 0,Unnamed: 1
Title,Pulp Fiction
Description,"Pulp Fiction is a crime film directed by Quentin Tarantino. It weaves together interconnected stories of mobsters, hitmen, and other colorful characters in a non-linear narrative filled with dark humor and violence."
Embedding,"[ -0.0013762183, -0.031044293, -0.011941586, -0.02680634, -0.013722292, 0.0070462376, 0.0045379302, -0.00040827808, -0.01810066, -0.017807068, 0.02081959, 0.021534426, 0.0082972, -0.012152207, 0.010014082, -0.01418183, 0.03849901, -0.004493253, 0.0006230878, -0.039111726 ... (1516 more) ]"


## Initialize MLContext

In [9]:
var ctx = new MLContext();

## Load movie title embedding data into IDataView

In [10]:
var dv = ctx.Data.LoadFromEnumerable(movieEmbeddings);

## Inspect IDataView Schema

In [11]:
dv.Schema

## Define Principal Component Analysis pipeline

In [12]:
var pipeline = 
    ctx.Transforms.ProjectToPrincipalComponents(
        outputColumnName:"PCA",
        inputColumnName:"Embedding",
        rank:2,
        seed:1234);


## Run PCA on movie embedding data

In [13]:
var transformedDv = 
    pipeline
        .Fit(dv)
        .Transform(dv);

## Get the titles and computed principal components

In [14]:
var titles = transformedDv.GetColumn<string>("Title");
var pc = transformedDv.GetColumn<float[]>("PCA");

## Plot movies

In [15]:
var x = pc.Select(x => x[0]);
var y = pc.Select(x => x[1]);

In [16]:
Chart.Point<float, float, string>(
    x: x, 
    y: y,
    MultiText: titles.ToList(),
    TextPosition: Plotly.NET.StyleParam.TextPosition.BottomCenter
)
.WithXAxisStyle<int,int,string>(Title: Plotly.NET.Title.init("Component 1"))
.WithYAxisStyle<int,int,string>(Title: Plotly.NET.Title.init("Component 2"))
.Display()