### Install necessary packages
```bash
    !pip install ollama
    !pip install datasets
    !pip install -U scikit-learn
    
    
    

# 1. Load Dataset

In [1]:
from datasets import load_dataset

imdb = load_dataset("imdb")

In [2]:
imdb_train_pd = imdb["train"].to_pandas()
imdb_test_pd = imdb["test"].to_pandas()

In [3]:
imdb_train_pd

Unnamed: 0,text,label
0,I rented I AM CURIOUS-YELLOW from my video sto...,0
1,"""I Am Curious: Yellow"" is a risible and preten...",0
2,If only to avoid making this type of film in t...,0
3,This film was probably inspired by Godard's Ma...,0
4,"Oh, brother...after hearing about this ridicul...",0
...,...,...
24995,A hit at the time but now better categorised a...,1
24996,I love this movie like no other. Another time ...,1
24997,This film and it's sequel Barry Mckenzie holds...,1
24998,'The Adventures Of Barry McKenzie' started lif...,1


In [4]:
# getting 10 for faster processing
samples_per_label = 10 // imdb_train_pd['label'].nunique()
df_small_train_sample = imdb_train_pd.groupby('label').apply(lambda x: x.sample(samples_per_label)).reset_index(drop=True)

In [5]:
df_small_train_sample

Unnamed: 0,text,label
0,This movie is like the material S.E. Hinton wa...,0
1,RUN...do not walk away from this movie!!!!! Ai...,0
2,"Dieter Bohlen, Germany's notorious composer an...",0
3,If I had not read Pat Barker's 'Union Street' ...,0
4,The explosion of TV channels must be eternally...,0
5,"This is Jackie Chan's best film, and my person...",1
6,What I loved about the on-screen adaptation of...,1
7,This movie is perfect for all the romantics in...,1
8,"I bought this movie a few days ago, and though...",1
9,"This is a very funny movie, easy to watch, tha...",1


# 2. Convert Text To Embeddings

In [6]:
import pandas as pd
import ollama

In [7]:
# Assuming small_train_sample is your DataFrame
df = df_small_train_sample

# Define a function to generate embeddings
def generate_embeddings(row,text_column_name='text'):
    embeddings = ollama.embeddings(
        model='llama3',
        prompt=row[text_column_name],
    )
    return embeddings["embedding"]


In [8]:
# Specify the column name
column_name = 'text'

# Apply the function to the specified column and store the result in a new column 'embeddings'
df['embeddings'] = df.apply(lambda row: generate_embeddings(row, column_name), axis=1)

In [9]:
df

Unnamed: 0,text,label,embeddings
0,This movie is like the material S.E. Hinton wa...,0,"[-1.3971306085586548, -2.118555784225464, 0.04..."
1,RUN...do not walk away from this movie!!!!! Ai...,0,"[-2.844423770904541, 0.012460770085453987, 1.7..."
2,"Dieter Bohlen, Germany's notorious composer an...",0,"[-1.0280311107635498, -1.0665245056152344, 0.0..."
3,If I had not read Pat Barker's 'Union Street' ...,0,"[-2.0157930850982666, 0.15467536449432373, -2...."
4,The explosion of TV channels must be eternally...,0,"[-1.297585129737854, -2.260007381439209, -1.45..."
5,"This is Jackie Chan's best film, and my person...",1,"[-2.6929218769073486, 0.6353673338890076, 1.17..."
6,What I loved about the on-screen adaptation of...,1,"[-0.7430300116539001, -0.21735097467899323, 1...."
7,This movie is perfect for all the romantics in...,1,"[0.6251738667488098, -2.127596378326416, 1.711..."
8,"I bought this movie a few days ago, and though...",1,"[-3.6958141326904297, 0.06789332628250122, -3...."
9,"This is a very funny movie, easy to watch, tha...",1,"[-1.786821961402893, -1.2234193086624146, 1.91..."


# 3. Let's Classify Using Sklearn

In [10]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Assuming df is your DataFrame, 'embeddings' is your feature column and 'label' is your target column
X_train = df["embeddings"].tolist()
y_train = df['label']


# Initialize the KNN classifier
knn = KNeighborsClassifier(n_neighbors=3)

knn.fit(X_train, y_train)


### 4. Test the model

In [None]:
# You'll need a separate test set (X_test and y_test) to evaluate the model
# y_pred = knn.predict(X_test)

# Calculate the accuracy of the model
# accuracy = accuracy_score(y_test, y_pred)

# print(f'Accuracy: {accuracy}')