# Interacting with CLIP
This notebook is intended to get in touch with the official CLIP repo provided by openai

In [None]:
import numpy as np
import torch
from pkg_resources import packaging

print("Torch version:", torch.__version__)


## Loading the model

In [None]:
import clip
clip.available_models()

In [None]:
model, preprocess = clip.load("ViT-B/32")
model.cuda().eval()
input_resolution = model.visual.input_resolution
context_length = model.context_length
vocab_size = model.vocab_size

print("Model parameters:", f"{np.sum([int(np.prod(p.shape)) for p in model.parameters()]):,}")
print("Input resolution:", input_resolution)
print("Context length:", context_length)
print("Vocab size:", vocab_size)

In [None]:
import os
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np

from collections import OrderedDict
import torch

%matplotlib inline

colors = ['red', 'green', 'blue', 'yellow', 'purple']
cars = [f'a {color} car' for color in colors]

descriptions = cars + [
    "a man drinking a coffee in a restaurante",
    "a astronaut riding a horse"
]

descriptions

### Tokenization

In [None]:
text_tokens = clip.tokenize([x for x in descriptions]).cuda()
text_tokens[0].shape

### Inference

In [None]:
with torch.no_grad():
    text_features = model.encode_text(text_tokens)

In [None]:
text_features[0].shape
text_features.shape

## Calculating cosine similarity

We normalize the features and calculate the dot product of each pair.

In [None]:
normalized = text_features
normalized /= normalized.norm(dim=-1, keepdim=True)
similarity = normalized.cpu().numpy() @ normalized.cpu().numpy().T

In [None]:
from pandas import DataFrame as DF
similarity


In [None]:
descriptions_short = descriptions
descriptions_short[5] = descriptions[5][:10] + "..."
descriptions_short[6] = descriptions[6][:10] + "..."
descriptions_short

In [None]:
import seaborn as sns
sns.heatmap(similarity,annot=True, cmap="Greens", xticklabels=descriptions_short, yticklabels=descriptions_short)

## UMAP
Reduce dimensions and plot

Sart with scaling the data [doku](https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html)

In [None]:
import umap
import sklearn as sk

data = text_features.cpu()

reducer = umap.UMAP(n_neighbors=3)
scaled = sk.preprocessing.StandardScaler().fit_transform(data)

In [None]:
reduced_umap = reducer.fit_transform(data)
reduced_umap.shape

In [None]:
p = sns.scatterplot(x=reduced_umap[:,0], y=reduced_umap[:,1], hue=descriptions)
sns.move_legend(p, "upper left", bbox_to_anchor=(1, 1))

## PCA

In [None]:
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
pca.fit(data)
pca.explained_variance_ratio_

In [None]:
reduced_pca=pca.transform(data)
reduced_pca

In [None]:
p = sns.scatterplot(x=reduced_pca[:,0], y=reduced_pca[:,1], hue=descriptions)
sns.move_legend(p, "upper left", bbox_to_anchor=(1, 1))