https://docs.daft.ai/en/stable/quickstart/#read-from-a-data-source

In [5]:
import daft

df = daft.from_pydict({
    "A":[1,2,3,4],
    "B":[1.5,2.5,3.5,4.5],
    "C":[True,True,False,False],
    "D":[None,None,None,None],
})

df

A Int64,B Float64,C Bool,D Null
1,1.5,True,
2,2.5,True,
3,3.5,False,
4,4.5,False,


In [11]:
daft.set_planning_config(default_io_config=daft.io.IOConfig(s3=daft.io.S3Config(anonymous=True)))

df = daft.read_parquet("s3://daft-public-data/tutorials/10-min/sample-data-dog-owners-partitioned.pq/**")
df

first_name String,last_name String,age Int64,DoB Date,country String,has_dog Bool


In [13]:
df.select("first_name", "has_dog").show()

first_name String,has_dog Bool
Ernesto,True
James,True
Wolfgang,
Shandra,True
Zaya,True


In [14]:
df.where(daft.col("age") >= 40).show()

first_name String,last_name String,age Int64,DoB Date,country String,has_dog Bool
James,Jale,62,1962-03-24,Canada,True
Shandra,Shamas,57,1967-01-02,United Kingdom,True
Zaya,Zaphora,40,1984-04-07,United Kingdom,True


In [15]:
df.show(3)

first_name String,last_name String,age Int64,DoB Date,country String,has_dog Bool
Shandra,Shamas,57,1967-01-02,United Kingdom,True
Zaya,Zaphora,40,1984-04-07,United Kingdom,True
Ernesto,Evergreen,34,1990-04-03,Canada,True


In [18]:
df.limit(3)

first_name String,last_name String,age Int64,DoB Date,country String,has_dog Bool


In [17]:
df.limit(3).show()

first_name String,last_name String,age Int64,DoB Date,country String,has_dog Bool
Wolfgang,Winter,23,2001-02-12,Germany,
Ernesto,Evergreen,34,1990-04-03,Canada,True
James,Jale,62,1962-03-24,Canada,True


In [21]:
df.exclude("DoB").show()

first_name String,last_name String,age Int64,country String,has_dog Bool
Ernesto,Evergreen,34,Canada,True
James,Jale,62,Canada,True
Shandra,Shamas,57,United Kingdom,True
Zaya,Zaphora,40,United Kingdom,True
Wolfgang,Winter,23,Germany,


In [23]:
df = df.with_column("full_name", daft.col("first_name") + " " + daft.col("last_name"))
df.select("full_name", "age", "country", "has_dog").show()

full_name String,age Int64,country String,has_dog Bool
Ernesto Evergreen,34,Canada,True
James Jale,62,Canada,True
Wolfgang Winter,23,Germany,
Shandra Shamas,57,United Kingdom,True
Zaya Zaphora,40,United Kingdom,True


In [24]:
df.select((daft.col("first_name").alias("full_name") + " " + daft.col("last_name")), "age", "country", "has_dog").show()

full_name String,age Int64,country String,has_dog Bool
Ernesto Evergreen,34,Canada,True
James Jale,62,Canada,True
Wolfgang Winter,23,Germany,
Shandra Shamas,57,United Kingdom,True
Zaya Zaphora,40,United Kingdom,True


In [26]:
df.sort(daft.col("age"), desc=False).show()

first_name String,last_name String,age Int64,DoB Date,country String,has_dog Bool,full_name String
Wolfgang,Winter,23,2001-02-12,Germany,,Wolfgang Winter
Ernesto,Evergreen,34,1990-04-03,Canada,True,Ernesto Evergreen
Zaya,Zaphora,40,1984-04-07,United Kingdom,True,Zaya Zaphora
Shandra,Shamas,57,1967-01-02,United Kingdom,True,Shandra Shamas
James,Jale,62,1962-03-24,Canada,True,James Jale


In [27]:
grouped = df.groupby("country").agg(
    daft.col("age").mean().alias("avg_age"),
    daft.col("has_dog").count()
).show()

country String,avg_age Float64,has_dog UInt64
United Kingdom,48.5,2
Canada,48.0,2
Germany,23.0,0
