In [0]:
import pandas as pd
import numpy as np
from pyspark.sql import SparkSession
from pyspark import SparkContext
from pyspark.sql.functions import *

#-----------------------------------#

sc = SparkContext.getOrCreate()
spark =  SparkSession(sc)


In [0]:
###  Spark Create DataFrame with Examples.


#1. Spark Create DataFrame with Examples

columns  =  ('languages','users')
data = (('java','2000'),('python','30000'))
rdd  = sc.parallelize(data)

df_rdd = rdd.toDF()
df_rdd.printSchema()



In [0]:
# 2. Create DataFrame from Dictionary

data_map = { 'language':['python','java','c#'],
         'user':['30k','20k','10k'],
        'spped': ['2x','4x','8x']}


map_1 = [(k,)+(v,) for k,v in data_map.items()]


df = spark.createDataFrame(map_1,['key','val'])

df.show()


In [0]:
from databricks import koalas as ks
df = ks.DataFrame(data_map)
sdf  = df.to_spark()
sdf.show()

In [0]:
#3. Using createDataFrame() with the Row type

from pyspark.sql.types import StructType,StructField,IntegerType,StringType

data = [(1,'12102021','13102021'),
       (2,'11112021','21112021'),
       (3,'2102021','15102021')]
schema = StructType([
    StructField("id", IntegerType(), True),
    StructField("created_at", StringType(), True),
    StructField("updated_at", StringType(), True)
])

df2  = spark.createDataFrame(data= data,schema  = schema)
df2.show()

In [0]:
#4.  Create Spark DataFrame from CSV


filename  ='/FileStore/tables/Bowler_data.csv'

df  = spark.read.csv(filename)
df.show()


In [0]:
'''Other fomat supported 
Xml
json
text
tsv
avro
parquet
 HBase
jdbc
hive
'''



In [0]:
#B. Spark withColumnRenamed to Rename Column


filename  ='/FileStore/tables/Bowler_data.csv'

df  = spark.read.load(filename,format ='csv',header="true")
df.show()

 

In [0]:
#rename column

df  = df.withColumnRenamed("Econ","Economy")


In [0]:
# multiple columns

df  = df.withColumnRenamed("Mdns","Maidens") \
        .withColumnRenamed('Opposition','Opponent')

df.show()



In [0]:
df = df.withColumn('venue',col('Ground'))
df.show()

In [0]:
df  = df.drop('Ground')
df = df.withColumn('Ground',col('Venue'))
df.show()

In [0]:
df1= df.select(df["Match_ID"].alias("match_id"),col("*"))
df1.show()

In [0]:
df_test = spark.createDataFrame([("x", 1), ("y", 2)], 
                                  ["col_1", "col_2"])

In [0]:
# Approach - 1 : using withColumnRenamed function.
df_test.withColumnRenamed("col_1", "col_3").show()


In [0]:
# Approach - 2 : using alias function.
df_test.select(df_test["col_1"].alias("col5"), "col_2").show()

In [0]:
# Approach - 3 : using selectExpr function.
df_test.selectExpr("col_1 as col_3", "col_2").show()

In [0]:
# Rename all columns
# Approach - 4 : using toDF function. Here you need to pass the list of all columns present in DataFrame.
df_test.toDF("col_3", "col_2").show()