# Select

In [None]:
# What is select in PySpark?

# select is used to choose specific columns from a DataFrame.
# It can also apply transformations or expressions on the columns while selecting.
# Think of it as picking or projecting columns from your DataFrame.

In [None]:
from pyspark.sql import SparkSession
from pyspark.sql.types import *
from pyspark.sql.functions import col, lit, upper

spark = SparkSession.builder.appName("select").getOrCreate()

data = [("Alice", 25, "USA"), ("Bob", 30, "UK"), ("Charlie", 35, "India")]
df = spark.createDataFrame(data, ["name", "age", "country"])
df.show()

3️⃣ Select specific columns

In [None]:
df.select("name", "age").show()

4️⃣ Select with expressions

In [None]:
df.select(
    col("name"),
    (col("age") + 5).alias("age_plus_5"),
    upper(col("country")).alias("country_upper")
).show()


5️⃣ Select all columns dynamically

In [None]:
columns = df.columns
df.select(*columns).show()

6️⃣ Select and create new column using lit()

In [None]:
df.select(
    "name",
    "age",
    lit("Employee").alias("role")
).show()