# Compressing table data as video

## Loading packages

In [1]:
import numpy as np
from PIL import Image
import pandas as pd
import os

In [2]:
# head -n 1000 "cosmetics_all_data.csv" > "1000lines.csv"

In [6]:
def csv_to_images(csv_path, chunk_size=1024):
    # Read CSV as raw bytes
    with open(csv_path, 'rb') as f:
        data = f.read()

    os.makedirs('images', exist_ok=True)
    total_chunks = (len(data) + chunk_size - 1) // chunk_size

    for i in range(total_chunks):
        chunk = data[i*chunk_size:(i+1)*chunk_size]
        # Pad chunk to square size
        length = len(chunk)
        side = int(np.ceil(np.sqrt(length)))
        padded = chunk + bytes(side*side - length)  # pad with zeros

        arr = np.frombuffer(padded, dtype=np.uint8).reshape((side, side))
        img = Image.fromarray(arr, mode='L')
        img.save(f'images/frame_{i:04d}.png')

    print(f"Created {total_chunks} image frames.")

csv_to_images('10000lines.csv', chunk_size=1024)

Created 1154 image frames.


In [None]:
# ffmpeg -framerate 10 -i images/frame_%04d.png -c:v libx264 -preset veryslow -crf 0 output.mkv

# mkdir frames_out

In [7]:
def images_to_csv(output_csv_path, frames_folder='frames_out'):
    import glob

    images = sorted(glob.glob(f'{frames_folder}/frame_*.png'))
    all_bytes = bytearray()

    for img_path in images:
        img = Image.open(img_path).convert('L')
        arr = np.array(img)
        all_bytes.extend(arr.flatten().tobytes())

    # Trim trailing zeros if you know original size or keep as is
    with open(output_csv_path, 'wb') as f_out:
        f_out.write(all_bytes)

images_to_csv('reconstructed.csv', 'images')