# Experiments on embeddings quantization and SVD

## Configuration & helpers

In [1]:
import boto3
import pandas as pd
import numpy as np

boto_session = boto3.Session(region_name="us-east-1", profile_name="<INSERT YOUR AWS CONFIG PROFILE>"))
bruntime = boto_session.client('bedrock-runtime')


In [3]:
def form_analysis_df(df_list, measurement):

    final_df = pd.concat(df_list)
    maxperf = {model_id:final_df.query(f"model_id=='{model_id}' and type=='classic'")[measurement].values[0] for model_id in final_df.model_id.unique()}
    classicsize = {model_id:final_df.query(f"model_id=='{model_id}' and type=='classic'")['size_in_bytes'].values[0] for model_id in final_df.model_id.unique()}
    final_df['log_size']=np.log(final_df.size_in_bytes)
    final_df['size_multiplier']=final_df.apply(lambda x: classicsize[x['model_id']] / x['size_in_bytes'], axis = 1)
    final_df['performance_retention']=final_df.apply(lambda x: round(100*x[measurement] / maxperf[x['model_id']], 2), axis = 1)
    return final_df

## STS-B

In [4]:
from experiments import STSLabAssistant
sts_titan_operator = STSLabAssistant.instantiate_titan_experiments(bruntime)
titan_df=sts_titan_operator.run()

sts_cohere_operator = STSLabAssistant.instantiate_cohere_experiments(bruntime)
cohere_df=sts_cohere_operator.run()

Handling amazon.titan-embed-text-v1, quantization classic


Handling amazon.titan-embed-text-v1, quantization binary


Handling amazon.titan-embed-text-v1, quantization scalar


Handling amazon.titan-embed-text-v1, quantization ternary


Handling amazon.titan-embed-text-v1, quantization rotated-ternary


Handling amazon.titan-embed-text-v1, quantization rotated-binary


Handling amazon.titan-embed-text-v1, quantization rotated-scalar


Handling amazon.titan-embed-image-v1, quantization classic


Handling amazon.titan-embed-image-v1, quantization binary


Handling amazon.titan-embed-image-v1, quantization scalar


Handling amazon.titan-embed-image-v1, quantization ternary


Handling amazon.titan-embed-image-v1, quantization rotated-ternary


Handling amazon.titan-embed-image-v1, quantization rotated-binary


Handling amazon.titan-embed-image-v1, quantization rotated-scalar


Handling cohere.embed-multilingual-v3, quantization classic


Handling cohere.embed-multilingual-v3, quantization scalar


Handling cohere.embed-multilingual-v3, quantization binary


Handling cohere.embed-multilingual-v3, quantization ternary


Handling cohere.embed-multilingual-v3, quantization rotated-ternary


Handling cohere.embed-multilingual-v3, quantization rotated-scalar


Handling cohere.embed-multilingual-v3, quantization rotated-binary


Handling cohere.embed-english-v3, quantization classic


Handling cohere.embed-english-v3, quantization scalar


Handling cohere.embed-english-v3, quantization binary


Handling cohere.embed-english-v3, quantization ternary


Handling cohere.embed-english-v3, quantization rotated-ternary


Handling cohere.embed-english-v3, quantization rotated-scalar


Handling cohere.embed-english-v3, quantization rotated-binary


In [6]:

final_sts_df=form_analysis_df([titan_df, cohere_df], measurement="spearman")

In [7]:
import plotly.express as px

fig = px.bar(final_sts_df.query("type!='classic'"), x = "type", y = "performance_retention", color = "model_id", barmode="group", 
       range_y=[final_sts_df['performance_retention'].min()*0.99, final_sts_df['performance_retention'].max()*1.01],
       category_orders={"type":["binary" "rotated-binary", "ternary", "rotated-ternary", "quaternary", "rotated-quaternary", "scalar", "rotated-scalar"]}, 
       labels = {"performance_retention":"Performance retention", "type":"Method", "model_id":"Model"},
       title = "Performance retention - STSB")

fig.add_hline(y=100)

fig.show()

## Banking77

In [8]:
from experiments import Banking77LabAssistant
banking_operator = Banking77LabAssistant.instantiate_titan_experiments(bruntime)
titan_banking_df=banking_operator.run()

cohere_banking_operator = Banking77LabAssistant.instantiate_cohere_experiments(bruntime)
cohere_banking_df=cohere_banking_operator.run()

Handling amazon.titan-embed-text-v1, quantization classic


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Handling amazon.titan-embed-text-v1, quantization binary


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Handling amazon.titan-embed-text-v1, quantization scalar


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Handling amazon.titan-embed-text-v1, quantization ternary


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Handling amazon.titan-embed-text-v1, quantization rotated-ternary


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Handling amazon.titan-embed-text-v1, quantization rotated-binary


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Handling amazon.titan-embed-text-v1, quantization rotated-scalar


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Handling amazon.titan-embed-image-v1, quantization classic


Handling amazon.titan-embed-image-v1, quantization binary


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Handling amazon.titan-embed-image-v1, quantization scalar


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Handling amazon.titan-embed-image-v1, quantization ternary


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Handling amazon.titan-embed-image-v1, quantization rotated-ternary


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Handling amazon.titan-embed-image-v1, quantization rotated-binary


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Handling amazon.titan-embed-image-v1, quantization rotated-scalar


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Handling cohere.embed-multilingual-v3, quantization classic


Handling cohere.embed-multilingual-v3, quantization scalar


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Handling cohere.embed-multilingual-v3, quantization binary


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Handling cohere.embed-multilingual-v3, quantization ternary


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Handling cohere.embed-multilingual-v3, quantization rotated-ternary


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Handling cohere.embed-multilingual-v3, quantization rotated-scalar


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Handling cohere.embed-multilingual-v3, quantization rotated-binary


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Handling cohere.embed-english-v3, quantization classic


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Handling cohere.embed-english-v3, quantization scalar


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Handling cohere.embed-english-v3, quantization binary


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Handling cohere.embed-english-v3, quantization ternary


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Handling cohere.embed-english-v3, quantization rotated-ternary


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Handling cohere.embed-english-v3, quantization rotated-scalar


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Handling cohere.embed-english-v3, quantization rotated-binary


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

In [9]:
final_banking_df = form_analysis_df([titan_banking_df, cohere_banking_df], measurement="accuracy")

In [10]:
import plotly.express as px

fig = px.bar(final_banking_df.query("type!='classic'"), x = "type", y = "performance_retention", color = "model_id", barmode="group", 
       range_y=[final_banking_df['performance_retention'].min()*0.99,final_banking_df['performance_retention'].max()*1.01],
       category_orders={"type":["binary", "rotated-binary", "ternary","rotated-ternary",  "scalar","rotated-scalar" ]}, 
       labels = {"performance_retention":"Performance retention", "type":"Method", "model_id":"Model"})

fig.add_hline(y=100)

fig.show()

## Combining both tasks

In [None]:
df = pd.read_pickle("final_analysis.pickle")

In [11]:
tmp = final_sts_df.rename({"spearman":"performance"}, axis = 1)
tmp['task']="STS-B"

tmp2 = final_banking_df.rename({"accuracy":"performance"}, axis = 1)
tmp2['task']="Banking77"

df = pd.concat([tmp, tmp2])

In [12]:
df['quantization']=df.type.apply(lambda x:True if x!="classic" else False)
df["rotation"]=df.type.apply(lambda x:True if "rotated" in x else False)

In [13]:
import plotly.express as px

px.scatter(df, x = "model_id", y = "performance_retention", size = "size_multiplier", color = "type", symbol = "task")

In [14]:
import plotly.express as px

px.scatter(df, x = "model_id", y = "performance_retention", size = "size_multiplier", color = "rotation", 
           title = "Performance retention induced by rotation")

In [15]:
px.scatter(df, x = "size_multiplier", y = "performance_retention", color = "task")

In [16]:
import plotly.express as px

fig = px.bar(df.query("type!='classic'"), x = "type", y = "performance_retention", color = "model_id", barmode="group", 
       range_y=[df['performance_retention'].min()*0.99, df['performance_retention'].max()*1.01],
       category_orders={"type":["binary" "rotated-binary", "ternary", "rotated-ternary", "scalar", "rotated-scalar"]}, 
       labels = {"performance_retention":"Performance retention", "type":"Method", "model_id":"Model"},
       title = "Performance retention", facet_col= "task")

fig.add_hline(y=100)

fig.show()

In [17]:
df

Unnamed: 0,model_id,dimension,performance,time,type,size_in_bytes,log_size,size_multiplier,performance_retention,task,quantization,rotation
0,amazon.titan-embed-text-v1,1536,0.748511,0.05,classic,6144,8.723231,1.0,100.0,STS-B,False,False
1,amazon.titan-embed-text-v1,192,0.716135,5.98,binary,192,5.257495,32.0,95.67,STS-B,True,False
2,amazon.titan-embed-text-v1,1536,0.762723,0.27,scalar,1536,7.336937,4.0,101.9,STS-B,True,False
3,amazon.titan-embed-text-v1,384,0.754167,3.39,ternary,384,5.950643,16.0,100.76,STS-B,True,False
4,amazon.titan-embed-text-v1,384,0.749431,9.17,rotated-ternary,384,5.950643,16.0,100.12,STS-B,True,True
5,amazon.titan-embed-text-v1,192,0.701355,13.9,rotated-binary,192,5.257495,32.0,93.7,STS-B,True,True
6,amazon.titan-embed-text-v1,1536,0.762892,3.54,rotated-scalar,1536,7.336937,4.0,101.92,STS-B,True,True
7,amazon.titan-embed-image-v1,1024,0.593242,0.03,classic,4096,8.317766,1.0,100.0,STS-B,False,False
8,amazon.titan-embed-image-v1,128,0.535335,3.92,binary,128,4.85203,32.0,90.24,STS-B,True,False
9,amazon.titan-embed-image-v1,1024,0.593393,0.2,scalar,1024,6.931472,4.0,100.03,STS-B,True,False


In [26]:
grouped = df.groupby(['type']).agg({"size_multiplier":np.mean,"performance_retention":np.mean}).reset_index()
print(grouped.sort_values(by="performance_retention", ascending=False).to_markdown(index = False))

| type            |   size_multiplier |   performance_retention |
|:----------------|------------------:|------------------------:|
| scalar          |                 4 |                100.986  |
| rotated-scalar  |                 4 |                100.919  |
| classic         |                 1 |                100      |
| rotated-ternary |                16 |                 99.6287 |
| ternary         |                16 |                 99.2325 |
| rotated-binary  |                32 |                 90.6025 |
| binary          |                32 |                 89.8238 |


In [20]:
display_df = df.drop(["time", "log_size"], axis = 1)[[ "task", "model_id", "type", "dimension", "size_multiplier", "performance_retention"]]

print(display_df.sort_values(by="performance_retention", ascending = False).head(10).to_markdown(index = False))
bb

| task      | model_id                     | type            |   dimension |   size_multiplier |   performance_retention |
|:----------|:-----------------------------|:----------------|------------:|------------------:|------------------------:|
| Banking77 | amazon.titan-embed-image-v1  | rotated-scalar  |        1024 |                 4 |                  106.94 |
| Banking77 | amazon.titan-embed-image-v1  | scalar          |        1024 |                 4 |                  106.07 |
| Banking77 | amazon.titan-embed-image-v1  | rotated-ternary |         256 |                16 |                  106.03 |
| Banking77 | amazon.titan-embed-image-v1  | ternary         |         256 |                16 |                  105.56 |
| Banking77 | cohere.embed-english-v3      | scalar          |        1024 |                 4 |                  102.04 |
| STS-B     | amazon.titan-embed-text-v1   | rotated-scalar  |        1536 |                 4 |                  101.92 |
| STS-B     | am

In [27]:
print(display_df.sort_values(by="performance_retention", ascending = True).head(10).to_markdown(index = False))


| task      | model_id                     | type           |   dimension |   size_multiplier |   performance_retention |
|:----------|:-----------------------------|:---------------|------------:|------------------:|------------------------:|
| Banking77 | amazon.titan-embed-image-v1  | binary         |         128 |                32 |                   85.03 |
| Banking77 | amazon.titan-embed-image-v1  | rotated-binary |         128 |                32 |                   86.33 |
| Banking77 | cohere.embed-english-v3      | binary         |         128 |                32 |                   86.91 |
| Banking77 | cohere.embed-multilingual-v3 | binary         |         128 |                32 |                   87.07 |
| Banking77 | amazon.titan-embed-text-v1   | rotated-binary |         192 |                32 |                   87.44 |
| Banking77 | cohere.embed-english-v3      | rotated-binary |         128 |                32 |                   87.8  |
| Banking77 | amazon.tit

In [None]:
df.to_pickle("final_analysis.pickle")

In [None]:
import plotly.express as px

fig = px.bar(df.query("type=='binary' | type=='scalar'"), x = "type", y = "performance_retention", color = "model_id", barmode="group", 
             facet_col = "task",
             title = "Performance retention on STS-B and Banking77 tasks",
       range_y=[df['performance_retention'].min()*0.99,df['performance_retention'].max()*1.01],
       category_orders={"type":["binary",  "scalar" ]}, 
       labels = {"performance_retention":"Performance retention", "type":"Method", "model_id":"Model"})

fig.add_hline(y=100)

fig.show()

In [None]:
import plotly.express as px

fig = px.bar(df.query("type=='binary' |type=='ternary' |type=='scalar' "), 
             x = "type", y = "performance_retention", color = "model_id", barmode="group", 
       range_y=[df['performance_retention'].min()*0.99, df['performance_retention'].max()*1.01],
       category_orders={"type":["binary" "ternary",  "scalar"]}, 
       labels = {"performance_retention":"Performance retention", "type":"Method", "model_id":"Model"},
                    facet_col = "task",
             title = "Performance retention on STS-B and Banking77 tasks: ternary, binary & scalar")

fig.add_hline(y=100)

fig.show()

In [None]:

retention_interval = { v['size_multiplier']:v['performance_retention'] for i,v in df.groupby('size_multiplier').agg({"performance_retention":lambda x: np.max(x)- np.min(x)}).reset_index().iterrows()}
retention_interval

In [None]:
df["e"] = df.apply(lambda x: retention_interval[x["size_multiplier"]], axis = 1)

In [None]:
#grouped = df.query("type=='classic' | type=='binary' |type=='ternary' |type=='scalar' ").groupby(["model_id", "type"]).agg({"performance_retention":np.mean, "size_multiplier":np.mean})
#grouped=grouped.reset_index()


import plotly.express as px

error_y=dict(type='percent', value=retention_interval.values(),visible=True)

fig = px.scatter(df, x = "size_multiplier", y = "performance_retention", 
                 color = "type", trendline="ols", trendline_scope="overall",
                 trendline_options=dict(log_x=True),
                 labels = {"performance_retention":"Performance retention", "size_multiplier":"Size multiplier", "model_id":"Model"},
                 title = "The trade-off: performance retention vs. size multiplier (with trend)",
                 facet_col="task")
#                 ,error_y = "e")


fig.show()


In [None]:
import plotly.express as px

fig = px.scatter(df.query("type=='binary' |type=='ternary' |type=='scalar' "), 
             x = "size_multiplier", y = "performance_retention", color = "type",
       range_y=[df['performance_retention'].min()*0.99, df['performance_retention'].max()*1.01],
       category_orders={"type":["binary" "ternary",  "scalar"]}, 
       labels = {"performance_retention":"Performance retention", "type":"Method", "model_id":"Model"},
                    facet_col = "task",
             title = "Performance retention on STS-B and Banking77 tasks: ternary, binary & scalar")


fig.show()

Is rotation beneficial ?

In [None]:
quantized_df = df.query("quantization==True").query("rotation==False")[["task", "model_id", "performance_retention"]]
quantized_rotated_df =df.query("quantization==True").query("rotation==True")[["task","model_id", "performance_retention"]].rename({"performance_retention":"performance_retention_rotated"}, axis = 1)
benchmark_df= quantized_df.merge(quantized_rotated_df, on = ["task","model_id"])
mask = benchmark_df['performance_retention_rotated'].values> benchmark_df['performance_retention'].values

proportion_higher = np.mean(mask)
average_improvement = np.mean(benchmark_df['performance_retention_rotated'][mask] - benchmark_df['performance_retention'][mask])

average_decrease = np.mean(benchmark_df['performance_retention_rotated'][~mask] - benchmark_df['performance_retention'][~mask])

proportion_higher, average_improvement, average_decrease


## SVD decomp

### Collecting data 

#### With titan Embed Text v1

In [None]:
model_id = "amazon.titan-embed-text-v1"
Xbanking = banking_operator.matrixes[model_id].astype(np.float32)
Xsts =  sts_titan_operator.matrixes[model_id].astype(np.float32)
X=np.concatenate([Xbanking, Xsts])
X.shape

#### Or With Cohere Embed v3

In [None]:
model_id = "cohere.embed-english-v3"
Xbanking = cohere_banking_operator.matrixes[model_id].astype(np.float32)
Xsts =  sts_cohere_operator.matrixes[model_id].astype(np.float32)
X=np.concatenate([Xbanking, Xsts])
X.shape

Instead of seeing the size reduction vs. accuracy, using unsigned integers was useful, now, in order to understand the decay rate, we value symetry. Therefore, transformations will be using _signed_ integers.

In [None]:

#Quantile quantization
X33, X66 = np.quantile(X,1/3 ), np.quantile(X,2/3)
ternary = [ np.array([2 if e>X66 else (1 if e>X33 else 0) for e in x]).astype(np.uint8) for x in X  ]
binary = [np.array([1 if e>0 else 0  for e in x]).astype(np.uint8) for x in X]

Xmin, Xmax = X.min(), X.max()
range = (Xmax - Xmin) / 255

def scalar_quantize(x):
    scaled = (x - Xmin) / range
    return scaled.astype(np.uint8)

scalar = [scalar_quantize(x) for x in X]


## Test on SVD rotations

In [None]:
U, S, Vh = np.linalg.svd(X, full_matrices=False)
Ub, Sb, Vhb = np.linalg.svd(binary, full_matrices=False)
Ut, St, Vht = np.linalg.svd(ternary, full_matrices=False)
Us, Ss, Vhs = np.linalg.svd(scalar, full_matrices=False)


In [None]:
def get_decay_rate(S, max_dim=1024):
    Snorm = np.sort(S)[::-1]
    Snorm = Snorm / Snorm[0]  
    return Snorm[:max_dim]



In [None]:
import matplotlib.pyplot as plt

plt.plot(get_decay_rate(S, max_dim=30), label = "classic")
plt.plot(get_decay_rate(Ss, max_dim=30), label = "scalar")
plt.plot(get_decay_rate(St, max_dim=30), label = "ternary")
plt.plot(get_decay_rate(Sb, max_dim=30), label = "binary")

plt.legend()
plt.title("Decay rate of singular values \n(Extracted from test set STS B+Banking77 Cohere Embed v3)")
plt.savefig("stsb-decayrate-cohere.png")

In [None]:
np.linalg.norm(U - Ub, ord = "fro"), np.linalg.norm(U - Ut, ord = "fro"), np.linalg.norm(U - Us, ord = "fro")

In [None]:
np.linalg.norm(Vh - Vhb, ord = "fro"),  np.linalg.norm(Vh - Vht, ord = "fro"), np.linalg.norm(Vh - Vhs, ord = "fro")

In [None]:
Ux, Sx, Vx = np.linalg.svd(X, full_matrices=True)


In [None]:
reconstructed = np.dot(U[:, :1536], Vh)
reconstructedbinary = np.dot(Ub[:, :1536], Vhb)
reconstructedternary = np.dot(Ut[:, :1536], Vht)
reconstructedscalar = np.dot(Us[:, :1536], Vhs)


In [None]:
np.linalg.norm(X - reconstructedbinary, ord = "fro"), np.linalg.norm(X - reconstructedternary, ord = "fro"),  np.linalg.norm(X - reconstructedscalar, ord = "fro"), np.linalg.norm(X- reconstructed, ord = "fro")