In [None]:
# find a repo of your choice, source data can be found at https://github.com/ohdearquant/liongate

dir = "/Users/lion/github/lion/"
crates = ["lion"]
output_path = "data/lion"
prefix = "whole"
postfix = "source_codes"

In [2]:
data_path = [dir + j for j in crates]
file_types = [".rs"]

In [3]:
from lionagi.libs.file.concat_files import concat_files
from datetime import datetime

filename = f"{prefix}_{datetime.now().strftime('%Y%m%d')}_{postfix}.txt"

texts, fps = concat_files(
    data_path=data_path,
    file_types=file_types,
    output_dir=output_path,
    output_filename=filename,
    return_fps=True,
    return_files=True,
)

Concatenated 160 files to data/lion/whole_20250302_source_codes.txt
The file contains 1590143 characters.


In [4]:
from pathlib import Path


path = Path(output_path) / "lion_directory.txt"

fpss = []
for i in fps:
    fpss.append(str(i).replace(dir, ""))
fpss = sorted(fpss)

_text = "\n".join(fpss)
path.write_text(_text)
print(f"Concatenated files saved to {path}")

Concatenated files saved to data/lion/lion_directory.txt


In [5]:
from pathlib import Path
from lionagi import iModel

chat_model = iModel(
    model="openrouter/google/gemini-2.0-flash-001", temperature=0.5
)

fp = Path.cwd() / "data" / "lion" / filename
compressed_fp = Path.cwd() / "data" / "lion" / f"compressed_{filename}"
compressed_fp_2 = Path.cwd() / "data" / "lion" / f"compressed^2_{filename}"

In [6]:
from timeit import default_timer as timer

from lionagi.libs.token_transform.types import (
    symbolic_compress_context,
    TokenMappingTemplate,
)

In [7]:
start = timer()

In [8]:
output_fp = await symbolic_compress_context(
    chat_model=chat_model,
    url_or_path=fp,
    encode_token_map=TokenMappingTemplate.RUST_CHINESE,
    encode_output=True,
    max_concurrent=10,
    throttle_period=0.5,
    output_path=compressed_fp,
    verbose=False,
)

In [9]:
print(f"Compressed file saved to: {output_fp}")
print(f"Compressed character number: {len(Path(output_fp).read_text())}")
print(f"First Compression Time taken: {timer() - start:.2f} seconds")

Compressed file saved to: /Users/lion/github/lion/dev/data/lion/compressed_whole_20250302_source_codes.txt
Compressed character number: 481175
First Compression Time taken: 186.29 seconds


In [10]:
output_fp_2 = await symbolic_compress_context(
    chat_model=chat_model,
    url_or_path=compressed_fp,
    encode_token_map=TokenMappingTemplate.RUST_CHINESE,
    encode_output=True,
    max_concurrent=10,
    throttle_period=0.5,
    output_path=compressed_fp_2,
    verbose=False,
)

In [11]:
len_chars = len(Path(output_fp_2).read_text())
print(f"Compressed file saved to: {output_fp_2}")
print(f"Compressed character number: {len_chars}")
print(f"Total Time taken: {timer() - start:.2f} seconds")
print(f"Compression ratio: {len_chars / len(Path(fp).read_text()):.2%}")

Compressed file saved to: /Users/lion/github/lion/dev/data/lion/compressed^2_whole_20250302_source_codes.txt
Compressed character number: 159389
Total Time taken: 252.97 seconds
Compression ratio: 10.02%
