In [3]:
from pyspark.sql import SparkSession, Row

spark = SparkSession.builder.appName('app').getOrCreate()

class CustomType:
    def __init__(self, name, age, salary):
        self.name = name
        self.age = age
        self.salary = salary

# Create a list of CustomType objects
data = [CustomType("John", 30, 5000),
        CustomType("Mary", 25, 6000),
        CustomType("Mike", 35, 7000)]

rows = [Row(name=d.name, age=d.age, salary=d.salary) for d in data]

df = spark.createDataFrame(rows)

df.show()

+----+---+------+
|name|age|salary|
+----+---+------+
|John| 30|  5000|
|Mary| 25|  6000|
|Mike| 35|  7000|
+----+---+------+



In [4]:
from pyspark.sql import Row
from pyspark.sql import SparkSession
# Create a SparkSession
spark = SparkSession.builder.appName("Metadata").getOrCreate()
# Define a custom class to represent a row in the dataframe
class CustomType:
    def __init__(self, name, age, salary):
        self.name = name
        self.age = age
        self.salary = salary

# Create a list of CustomType objects
data = [CustomType("John", 30, 5000),
        CustomType("Mary", 25, 6000),
        CustomType("Mike", 35, 7000)]

rdd = spark.sparkContext.parallelize(data)

# Create a dataframe from the rdd
df = spark.createDataFrame(rdd)

# Show the dataframe
df.show()

25/12/03 16:23:24 WARN SparkSession: Using an existing Spark session; only runtime SQL configurations will take effect.


+---+----+------+
|age|name|salary|
+---+----+------+
| 30|John|  5000|
| 25|Mary|  6000|
| 35|Mike|  7000|
+---+----+------+



In [5]:
# Importing required modules
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType, IntegerType
from pyspark.sql import Row

# Create a SparkSession
spark = SparkSession.builder.appName("Myapp").getOrCreate()

# step 1: Define the schema for the dataframe
schema = StructType([
    StructField("name",
                StringType(), True),
    StructField("age",
                IntegerType(), True),
    StructField("salary",
                IntegerType(), True)
])

# step 2: Create a list of custom objects
data = [{"name": "John",
         "age": 30, "salary": 5000},
        {"name": "Mary",
         "age": 25, "salary": 6000},
        {"name": "Mike",
         "age": 35, "salary": 7000}]

# step 3: Create the dataframe
df = spark.createDataFrame(data, schema)

# step 4: Show the dataframe
df.show()

25/12/03 16:24:36 WARN SparkSession: Using an existing Spark session; only runtime SQL configurations will take effect.


+----+---+------+
|name|age|salary|
+----+---+------+
|John| 30|  5000|
|Mary| 25|  6000|
|Mike| 35|  7000|
+----+---+------+

