In [1]:
from tokenizers import Tokenizer
from bastionlab.polars.policy import Policy, Aggregation, Log
from bastionlab.polars import train_test_split
import polars as pl
from bastionlab import Connection

file_path = "./SMSSpamCollection"

# Read CSV file using Polars and rename columns with `text`, `label`
df = pl.read_csv(file_path, has_header=False, sep="\t", new_columns=["label", "text"])

# Transform `spam` labels to `1` and `0` for any other column label
df = df.with_column(
    pl.when(pl.col("label") == "spam").then(1).otherwise(0).alias("label")
)

# View the first few elements of the DataFrame
df.head()

  from .autonotebook import tqdm as notebook_tqdm


label,text
i64,str
0,"""Go until juron..."
0,"""Ok lar... Joki..."
1,"""Free entry in ..."
0,"""U dun say so e..."
0,"""Nah I don't th..."


In [2]:
connection = Connection("localhost")

policy = Policy(safe_zone=Aggregation(min_agg_size=10), unsafe_handling=Log())
rdf = connection.client.polars.send_df(df, policy=policy, sanitized_columns=["text"])

ratio = 0.25
train_rdf, test_rdf = train_test_split(rdf, test_size=ratio, shuffle=True)

rs = train_rdf.column("text")

from bastionlab.tokenizers import RemoteTokenizer

tokenizer = RemoteTokenizer.from_hugging_face_pretrained("distilbert-base-uncased")
tokenizer.enable_padding(length=32)
tokenizer.enable_truncation(max_length=32)

input_ids, attention_mask = tokenizer.encode(rs)
print("\n---\n", input_ids.collect())

[{"EntryPointPlanSegment":"9a601242-46ae-4aaa-8e91-b84672da099d"},{"PolarsPlanSegment":{"Projection":{"expr":[{"Column":"text"}],"input":{"DataFrameScan":{"df":{"columns":[{"name":"label","datatype":"Int64","values":[]},{"name":"text","datatype":"Utf8","values":[]}]},"schema":{"inner":{"label":"Int64","text":"Utf8"}},"output_schema":null,"projection":null,"selection":null}},"schema":{"inner":{"text":"Utf8"}}}}}]
[{"EntryPointPlanSegment":"3bd4df21-1bf2-47ad-bea7-ffee08b5df58"},{"StringTransformerPlanSegment":{"columns":["text"],"model":"eyJ2ZXJzaW9uIjoiMS4wIiwidHJ1bmNhdGlvbiI6eyJkaXJlY3Rpb24iOiJSaWdodCIsIm1heF9sZW5ndGgiOjMyLCJzdHJhdGVneSI6Ikxvbmdlc3RGaXJzdCIsInN0cmlkZSI6MH0sInBhZGRpbmciOnsic3RyYXRlZ3kiOnsiRml4ZWQiOjMyfSwiZGlyZWN0aW9uIjoiUmlnaHQiLCJwYWRfdG9fbXVsdGlwbGVfb2YiOm51bGwsInBhZF9pZCI6MCwicGFkX3R5cGVfaWQiOjAsInBhZF90b2tlbiI6IltQQURdIn0sImFkZGVkX3Rva2VucyI6W3siaWQiOjAsImNvbnRlbnQiOiJbUEFEXSIsInNpbmdsZV93b3JkIjpmYWxzZSwibHN0cmlwIjpmYWxzZSwicnN0cmlwIjpmYWxzZSwibm9ybWFsaXplZCI6ZmFsc

GRPCException: Cancelled gRPC call: code=StatusCode.CANCELLED message=Received RST_STREAM with error code 8

In [None]:
from bastionlab import Connection
from bastionlab.torch.remote_torch import RemoteTensor
import torch

connection = Connection("localhost").client

a = torch.tensor([[1, 2, 400], [-1, 4, -10]])

RemoteTensor.send_tensor(a)