# Create Compressed Vector

The class CompressedVector uses sdsl4py to extend the functions of it to handle floats and negative numbers.

## Compressed Vector from other vector

In [1]:
import numpy as np
from cv_visualization import CompressedVector as cv

INT_WIDTH = 16
VECTOR_SIZE = 10000
original_vector = np.linspace(-1.0, 1.0, VECTOR_SIZE, dtype=np.float64)
# save the original vector to a csv file
np.savetxt('original_vector.csv', original_vector, delimiter=',')

# first we build the compressed vector, using the decimal places and int width desired (and appropiate for it)
compressed_vector = cv(
    decimal_places=4,
    int_width=INT_WIDTH
)

# then, we create the vector with the original data size
compressed_vector.create_vector(len(original_vector))

# fill the vector with the original data
compressed_vector.fill_from_vector(original_vector)

# now lets visualize the compressed vector against the original vector

import altair as alt
import pandas as pd
alt.data_transformers.disable_max_rows()

# lets move the compressed vector a bit down for better visualization
compressed_vector -= 0.5

# create a dataframe for the original vector
df_original = pd.DataFrame({
    'index': np.arange(len(original_vector)),
    'value': original_vector,
    'type': 'Original'
})

# create a dataframe for the compressed vector
df_compressed = pd.DataFrame({
    'index': np.arange(len(compressed_vector)),
    'value': compressed_vector,
    'type': 'Compressed'
})

# concatenate the dataframes
df = pd.concat([df_original, df_compressed])

# create the chart
chart = alt.Chart(df).mark_line().encode(
    x='index',
    y='value',
    color='type'
).properties(
    title='Comparison of Original and Compressed Vectors'
)

# display the chart
chart

## Compressed Vector from File

In [2]:
compressed_vector_from_file = cv(
    decimal_places=4,
    int_width=INT_WIDTH
)

compressed_vector_from_file.create_vector(len(original_vector))
compressed_vector_from_file.build_from_file(
    file_path='original_vector.csv',
    column=0, 
    delimiter=',',  # if the csv file has a different delimiter, change it here
    truncate=None # if you want to just read a part of the file, you can specify the number of lines to read here
)

# lets move it a bit for better visualization
compressed_vector_from_file += 0.5

# create a dataframe for the compressed vector from file
df_compressed_from_file = pd.DataFrame({
    'index': np.arange(len(compressed_vector_from_file)),
    'value': compressed_vector_from_file,
    'type': 'Compressed from File'
})

# concatenate the dataframes
df = pd.concat([df_original, df_compressed, df_compressed_from_file])

# create the chart
chart2 = alt.Chart(df).mark_line().encode(
    x='index',
    y='value',
    color='type'
).properties(
    title='Comparison of Original, Compressed, and Compressed from File Vectors'
)

# display the chart
chart2

Lets check how much space our compressed vectors are using:

In [3]:
print("Compressed Vector Size:", compressed_vector.size_in_bytes())
print("Compressed Vector from File Size:", compressed_vector_from_file.size_in_bytes())
print("Original Vector Size:", original_vector.nbytes)

Compressed Vector Size: 60024
Compressed Vector from File Size: 60024
Original Vector Size: 80000


Great, since the vector is using integers with 8 bits of width, we already are using less space than the original vector. However, we can `compress` our vectors to use even less space.

You can get all available compress methods with this function:

In [4]:
from cv_visualization import list_available_compression_methods

methods = list_available_compression_methods()
for i, method in enumerate(methods, 1):
    print(f"{i}. {method}")


1. enc_vector_elias_gamma
2. enc_vector_fibonacci
3. enc_vector_comma_2
4. enc_vector_elias_delta
5. vlc_vector_elias_delta
6. vlc_vector_elias_gamma
7. vlc_vector_fibonacci
8. vlc_vector_comma_2
9. dac_vector
10. No Compression


We can use any of this methods either as a string or as a sdsl4py type 

In [5]:
vlc_vector_comma_2 = compressed_vector.__copy__()
vlc_vector_comma_2.compress("vlc_vector_comma_2")

vlc_vector_fibonacci = compressed_vector_from_file.__copy__()
vlc_vector_fibonacci.compress("vlc_vector_fibonacci")

# Lets see how much space we saved
print("Compressed Vector Size:", compressed_vector.size_in_bytes())
print("Compressed Vector from File Size:", compressed_vector_from_file.size_in_bytes())
print("Original Vector Size:", original_vector.nbytes)
print("Compressed Vector Comma 2 Size:", vlc_vector_comma_2.size_in_bytes())
print("Compressed Vector Fibonacci Size:", vlc_vector_fibonacci.size_in_bytes())

Compressed Vector Size: 60024
Compressed Vector from File Size: 60024
Original Vector Size: 80000
Compressed Vector Comma 2 Size: 32291
Compressed Vector Fibonacci Size: 28731


And we can also visualize this compressed vectors!

In [6]:
# lets move the compressed vectors a bit, since the already compressed vectors cannot be modified
compressed_vector -= 0.5
compressed_vector_from_file += 0.5

# lets plot everything together
df_vlc_comma_2 = pd.DataFrame({
    'index': np.arange(len(vlc_vector_comma_2)),
    'value': vlc_vector_comma_2,
    'type': 'Compressed VLC Comma 2'
})
df_vlc_fibonacci = pd.DataFrame({
    'index': np.arange(len(vlc_vector_fibonacci)),
    'value': vlc_vector_fibonacci,
    'type': 'Compressed VLC Fibonacci'
})
def_new_compressed_vector = pd.DataFrame({
    'index': np.arange(len(compressed_vector)),
    'value': compressed_vector,
    'type': 'Compressed Vector'
})
def_new_compressed_vector_from_file = pd.DataFrame({
    'index': np.arange(len(compressed_vector_from_file)),
    'value': compressed_vector_from_file,
    'type': 'Compressed Vector from File'
})

# concatenate the dataframes
df = pd.concat([df_original, df_compressed, df_compressed_from_file, df_vlc_comma_2, df_vlc_fibonacci, def_new_compressed_vector, def_new_compressed_vector_from_file])
# create the chart
chart3 = alt.Chart(df).mark_line().encode(
    x='index',
    y='value',
    color='type'
).properties(
    title='Comparison of Original, Compressed, Compressed from File, VLC Comma 2, and VLC Fibonacci Vectors'
)
# display the chart

chart3

We sacrificed the ability to edit our values, but we win a lot of space reduction.