# Alias

In [None]:
# What is alias in PySpark?

# alias is used to give a temporary name to a column or expression in a DataFrame.
# Often used with select, withColumn, groupBy, or agg.
# It does not rename the column in the original DataFrame; it’s just for the result of that operation.

In [None]:
from pyspark.sql import SparkSession
from pyspark.sql.types import *
from pyspark.sql.functions import col, upper, lit

spark = SparkSession.builder.appName("select").getOrCreate()
data = [("Alice", 25, "USA"), ("Bob", 30, "UK")]
df = spark.createDataFrame(data, ["name", "age", "country"])
df.show()

3️⃣ Using alias in select

In [None]:
df.select(
    col("name").alias("employee_name"),
    (col("age") + 5).alias("age_plus_5"),
    upper(col("country")).alias("country_upper")
).show()

4️⃣ Using alias in withColumn

In [None]:
df_new = df.withColumn("age_new", (col("age") + 10).alias("age_plus_10"))
df_new.show()

5️⃣ Using alias in groupBy / agg

In [None]:
from pyspark.sql.functions import avg

df.groupBy("country") \
  .agg(avg("age").alias("average_age")) \
  .show()