# create DataFrame

In [19]:
from pyspark.sql import SparkSession
from pyspark.sql.types import *

spark = SparkSession.builder.appName('recharge').getOrCreate()
data = [("R201623","20200511",110,"online"),
        ("R201623","20200213",2,"online"),
        ("R201623","20200119",30,"online"),
        ("R201623","20190511",45,"online"),
        ("R201623","20200212",110,"online")]
schema = StructType(
    [
        StructField("RechargeID",StringType(),True),
        StructField("Rechargedate",StringType(),True),
        StructField("Remaining_days",IntegerType(),True),
        StructField("validity",StringType(),True)
    ]
)
df = spark.createDataFrame(data=data, schema = schema)
df.printSchema()
df.show()

root
 |-- RechargeID: string (nullable = true)
 |-- Rechargedate: string (nullable = true)
 |-- Remaining_days: integer (nullable = true)
 |-- validity: string (nullable = true)

+----------+------------+--------------+--------+
|RechargeID|Rechargedate|Remaining_days|validity|
+----------+------------+--------------+--------+
|   R201623|    20200511|           110|  online|
|   R201623|    20200213|             2|  online|
|   R201623|    20200119|            30|  online|
|   R201623|    20190511|            45|  online|
|   R201623|    20200212|           110|  online|
+----------+------------+--------------+--------+



In [21]:
from pyspark.sql.functions import date_add,to_date,col,expr 

#df.select(to_date(col("Rechargedate").cast("string"),"yyyyMMdd").alias("Rechargedate")).show()
df1 = df.withColumn("date_s", to_date(col("Rechargedate").cast("string"),"yyyyMMdd"))
df1.show()
df1.printSchema()

+----------+------------+--------------+--------+----------+
|RechargeID|Rechargedate|Remaining_days|validity|    date_s|
+----------+------------+--------------+--------+----------+
|   R201623|    20200511|           110|  online|2020-05-11|
|   R201623|    20200213|             2|  online|2020-02-13|
|   R201623|    20200119|            30|  online|2020-01-19|
|   R201623|    20190511|            45|  online|2019-05-11|
|   R201623|    20200212|           110|  online|2020-02-12|
+----------+------------+--------------+--------+----------+

root
 |-- RechargeID: string (nullable = true)
 |-- Rechargedate: string (nullable = true)
 |-- Remaining_days: integer (nullable = true)
 |-- validity: string (nullable = true)
 |-- date_s: date (nullable = true)



In [23]:
df1.select("*",expr("date_add(date_s,Remaining_days)").alias("Expiry_Date")).show()

+----------+------------+--------------+--------+----------+-----------+
|RechargeID|Rechargedate|Remaining_days|validity|    date_s|Expiry_Date|
+----------+------------+--------------+--------+----------+-----------+
|   R201623|    20200511|           110|  online|2020-05-11| 2020-08-29|
|   R201623|    20200213|             2|  online|2020-02-13| 2020-02-15|
|   R201623|    20200119|            30|  online|2020-01-19| 2020-02-18|
|   R201623|    20190511|            45|  online|2019-05-11| 2019-06-25|
|   R201623|    20200212|           110|  online|2020-02-12| 2020-06-01|
+----------+------------+--------------+--------+----------+-----------+

