What is the Scala case class equivalent in PySpark?
> Python's collections.namedtuple is pretty similar. 

Namedtuples are immutable, as they are tuples. 

In [1]:
from collections import namedtuple

In [2]:
from pyspark.sql import Row

In [6]:
foobar = Row(foo=42, bar=-42)
foo, bar = foobar

In [8]:
type(foobar)

pyspark.sql.types.Row

In [7]:
print(foo)
print(bar)

42
-42


In [14]:
from pyspark import SparkContext, SparkConf
from pyspark.sql import SQLContext, Row

In [15]:
sc = SparkContext()
sqlContext = SQLContext(sc)

In [16]:
lines = sc.textFile("./resources/people.txt")

In [17]:
parts = lines.map(lambda l: l.split(","))

In [18]:
people = parts.map(lambda p: Row(name=p[0], age=int(p[1])))

In [20]:
schemaPeople = sqlContext.createDataFrame(people)

In [21]:
schemaPeople

DataFrame[name: string, age: bigint]

In [22]:
schemaPeople.show()

+-------+---+
|   name|age|
+-------+---+
|Michael| 29|
|   Andy| 30|
| Justin| 19|
+-------+---+



In [30]:
from dataclasses import dataclass

@dataclass(frozen=True)
class PeopleSchema:
    name: str
    age: int 

peopleTest = PeopleSchema("Carl", 22)
print(peopleTest)

PeopleSchema(name='Carl', age=22)
