In [1]:
%reload_ext autoreload
%autoreload 2

In [18]:
from io import StringIO
from csv_importer.parsers import polars_df_to_bob
import polars as pl


csv_data = StringIO(
"""
FloatVal,IntVal,BoolVal,StringVal
1.23,10,true,Hello
4.56,20,false,World"""
)

# Create a Polars DataFrame
df = pl.read_csv(csv_data)
df

FloatVal,IntVal,BoolVal,StringVal
f64,i64,bool,str
1.23,10,True,"""Hello"""
4.56,20,False,"""World"""


In [19]:
# Convert DataFrame to Bob object

bob = polars_df_to_bob(df, name="SimpleDataFrameHello")



Hi from polars_df_to_bob


In [12]:
df = pl.DataFrame({
    "Dino": [
        [55.3846, 97.1795, 0.0],
        [51.5385, 96.0256, 0.0]
    ],
    "Star": [
        [58.2136, 91.8819, 0.0],
        [58.1961, 92.215, 0.0]
    ],
    "Is_Visible": [True, False],
    "Intensity": [10, 20],
    "My Strings": ["A", "B"]
})
df

Dino,Star,Is_Visible,Intensity,My Strings
list[f64],list[f64],bool,i64,str
"[55.3846, 97.1795, 0.0]","[58.2136, 91.8819, 0.0]",True,10,"""A"""
"[51.5385, 96.0256, 0.0]","[58.1961, 92.215, 0.0]",False,20,"""B"""


In [17]:
from csv_importer.parsers import polars_df_to_bob



polars_df_to_bob(df, "AdvancedDataFrame")


Hi from polars_df_to_bob


<databpy.object.BlenderObject at 0x386052b90>

In [None]:
import polars as pl
from io import StringIO

# JSON file setup
json_file = StringIO(
    """
    {
    "Dino": [
        [55.3846, 97.1795, 0.0],
        [51.5385, 96.0256, 0.0]
    ],
    "Star": [
        [58.2136, 91.8819, 0.0],
        [58.1961, 92.215, 0.0]
    ],
    "Is_Visible": [
        [true],
        [false]
    ],
    "Intensity": [
        [10],
        [20]
    ],
    "My Strings": [
        ["A"],
        ["B"]
    ]
    }
    """
)

# Read JSON into a DataFrame
df = pl.read_json(json_file)

print("Original DataFrame:")
print(df)

# First explode: Explode columns with `list[list]` dtype
columns_to_explode = [
    col for col in df.columns if df[col].dtype == pl.List(pl.List)
]
df = df.explode(columns_to_explode)

print("\nAfter First Explosion:")
print(df)

# Second explode: Skip "Dino" and "Star" columns, which have 3 elements in each list
columns_to_explode = [
    col for col in df.columns if df[col].dtype == pl.List and not (len(df[col][0]) == 3)
]
df = df.explode(columns_to_explode)

print("\nAfter Second Explosion:")
print(df)

In [None]:
# minimal version (use later for test)
import polars as pl
import databpy as db
import numpy as np

df = pl.DataFrame({
    "Star": [
        [58.2136, 91.8819, 0.0],
        [58.1961, 92.215, 0.0]
    ],
    "Is_Visible": [True, False],
    "Intensity": [10, 20],
})

print(df)

vertices = np.zeros((len(df), 3), dtype=np.float32)
bob = db.create_bob(vertices, name="DataWithVector")

for col in df.columns:
    data = np.vstack(df.get_column(col).to_numpy())
    bob.store_named_attribute(data, col)


In [None]:
s = pl.Series([[1, 2, 3], [4, 5, 6]], dtype=pl.Array(pl.Int64, 3))
s.to_numpy()

In [None]:
import polars as pl
s = pl.Series([[1, 2, 3], [4, 5, 6]])
s.to_numpy()

In [None]:
df = pl.DataFrame({
    "Star": [
        [58.2136, 91.8819, 0.0],
        [58.1961, 92.215, 0.0]
    ],
    "Is_Visible": [True, False],
    "Intensity": [10, 20],
})

print(df)
len(df["Star"][0]) == 3 



In [None]:
import polars as pl

# Original DataFrame
df = pl.DataFrame(
    {
        "Star": [[58.2136, 91.8819, 0.0], [58.1961, 92.215, 0.0]],
        "Other": [[1, 2], [3, 4]],
        "Is_Visible": [True, False],
        "Intensity": [10, 20],
    }
)

print(df.get_column("Star").dtype)

# Identify columns that are lists and have elements of length 3
columns_to_cast = [col for col in df.columns if isinstance(df[col], pl.List) and len(df[col][0]) == 3]
print(columns_to_cast)
# Apply casting to the identified columns
for col in columns_to_cast:
    print("hi")
    df = df.with_columns(pl.col(col).cast(pl.Array(pl.Float64, 3)).alias(col))

# Output the resulting DataFrame and schema
print(df)
print(df.schema)

In [None]:
df["Star"].to_numpy()

In [None]:
# casting coorect data type, don't need to use vstack this way
import polars as pl

# Original DataFrame
df = pl.DataFrame(
    {
        "Star": [[58.2136, 91.8819, 0.0], [58.1961, 92.215, 0.0]],
        "Is_Visible": [True, False],
        "Intensity": [10, 20],
    }
)

# Identify columns to cast
columns_to_cast = []
for col, dtype in df.schema.items():
    if dtype == pl.List(pl.Float64) and len(df[col][0]) == 3:
        columns_to_cast.append(col)

# Apply casting
for col in columns_to_cast:
    df = df.with_columns(pl.col(col).cast(pl.Array(pl.Float64, 3)).alias(col))

# Output the resulting DataFrame and schema
print(df)
df["Star"].to_numpy()

In [None]:
vertices = np.zeros((len(df), 3), dtype=np.float32)
bob = db.create_bob(vertices, name="DataWithVector")

for col in df.columns:
    data = df[col].to_numpy()
    bob.store_named_attribute(data, col)

# self-contained: setting string attributes

In [None]:

import numpy as np
import polars as pl
import databpy as db

# Minimal data setup
df = pl.DataFrame({
    "strings": ["apple", "banana", "apple", None, "orange", "banana", "kiwi", "apple", "kiwi", "banana"],
    "numbers": np.arange(10),
})

# random_verts dependent on df length
random_verts = np.random.rand(len(df), 3)
bob = db.create_bob(random_verts)

for col in df.columns:
    if df[col].dtype == pl.Utf8:
        data = df[col].fill_null("").to_numpy()
        unique, encoding = np.unique(data, return_inverse=True)
        bob.store_named_attribute(encoding, col)
        db.nodes.custom_string_iswitch(f"{bob.name}: {col}", unique, col)
    else:
        bob.store_named_attribute(df[col].to_numpy(), col)

In [None]:
# increased string limit
import string
import numpy as np
import polars as pl
import databpy as db
from csv_importer.parsers import update_bob_from_polars_df

n = 4000
random_strings = [''.join(np.random.choice(list(string.ascii_lowercase), size=10)) for _ in range(n)]
df = pl.DataFrame({"strings": random_strings, "numbers": np.arange(n)})
bob = db.create_bob(np.random.rand(n, 3))
update_bob_from_polars_df(bob, df, string_limit =4001)

In [None]:
import string
import numpy as np
import polars as pl
from csv_importer.parsers import polars_df_to_bob

n = 1000
random_strings = [''.join(np.random.choice(list(string.ascii_lowercase), size=10)) for _ in range(n)]
df = pl.DataFrame({"strings": random_strings, "numbers": np.arange(n)})
bob = polars_df_to_bob(df, name="TestBob")
