### Create NewSpark Session

In [4]:
spark = SparkSession.builder.appName('StrongAndStringFunction').getOrCreate()

In [18]:
#Sample Data
data = [("USA", "North America", 100, 50.5), ("India", "Asia", 300, 20.0), ("Germany", "Europe", 200, 30.5), ("Australia", "Oceania", 150, 60.0), ("Japan", "Asia", 120, 45.0), ("Brazil", "South America", 180, 25.0) ]

# Define Schema
columns = ["Country", "Region", "UnitsSold", "UnitPrice"]

#Create Dataframe
df = spark.createDataFrame(data=data, schema=columns)

# Display Orifinal Data Frame
df.show()



+---------+-------------+---------+---------+
|  Country|       Region|UnitsSold|UnitPrice|
+---------+-------------+---------+---------+
|      USA|North America|      100|     50.5|
|    India|         Asia|      300|     20.0|
|  Germany|       Europe|      200|     30.5|
|Australia|      Oceania|      150|     60.0|
|    Japan|         Asia|      120|     45.0|
|   Brazil|South America|      180|     25.0|
+---------+-------------+---------+---------+



## 1. Convert the first letter of each word to uppercase (initcap):

In [19]:

# Apply initcap on the new DataFrame
df.select(initcap(col("Country"))).show() # Changed df to df_sample

#Note: This transforms the first letter of each word in the Country column to uppercase.

+----------------+
|initcap(Country)|
+----------------+
|             Usa|
|           India|
|         Germany|
|       Australia|
|           Japan|
|          Brazil|
+----------------+



##2.Convert all text to lowercase (lower):

In [21]:
df.select(lower(col('Region'))).show()
#Note: Converts all letters in the Country column to lowercase.

+-------------+
|lower(Region)|
+-------------+
|north america|
|         asia|
|       europe|
|      oceania|
|         asia|
|south america|
+-------------+



## 3.Convert all text to uppercase (upper):

In [23]:
df.select(upper(col('Region'))).show()
#Note:Converts all letters in the Country column to uppercase

+-------------+
|upper(Region)|
+-------------+
|NORTH AMERICA|
|         ASIA|
|       EUROPE|
|      OCEANIA|
|         ASIA|
|SOUTH AMERICA|
+-------------+



## Concatenation Functions

In [29]:
df.select(concat(col('Region'),col('Country'))).show()

+-----------------------+
|concat(Region, Country)|
+-----------------------+
|       North AmericaUSA|
|              AsiaIndia|
|          EuropeGermany|
|       OceaniaAustralia|
|              AsiaJapan|
|    South AmericaBrazil|
+-----------------------+



## 2.Concatenate with a separator:

In [33]:
df.select(concat_ws('|', col('Region'), col('Country'))).show()

+-----------------------------+
|concat_ws(|, Region, Country)|
+-----------------------------+
|            North America|USA|
|                   Asia|India|
|               Europe|Germany|
|            Oceania|Australia|
|                   Asia|Japan|
|         South America|Brazil|
+-----------------------------+



## 3.Create a new concatenated column:

In [36]:
concated_df =df.withColumn('Concatenated_new_column', concat(col('Region'), lit(' - '),col('Country')))
concated_df.show()

#Note: This creates a new column concatenated by combining Region and Country with a space between them.

+---------+-------------+---------+---------+-----------------------+
|  Country|       Region|UnitsSold|UnitPrice|Concatenated_new_column|
+---------+-------------+---------+---------+-----------------------+
|      USA|North America|      100|     50.5|    North America - USA|
|    India|         Asia|      300|     20.0|           Asia - India|
|  Germany|       Europe|      200|     30.5|       Europe - Germany|
|Australia|      Oceania|      150|     60.0|    Oceania - Australia|
|    Japan|         Asia|      120|     45.0|           Asia - Japan|
|   Brazil|South America|      180|     25.0|   South America - B...|
+---------+-------------+---------+---------+-----------------------+



In [None]:
d