In [2]:
# =========================================================
# 1. STOP EXISTING SPARK SESSION
# =========================================================
try:
    spark.stop()
except:
    pass


# =========================================================
# 2. IMPORTS
# =========================================================
from pyspark.sql import SparkSession
from pyspark.sql import functions as F
from pyspark.sql import types as T


# =========================================================
# 3. CREATE SPARK SESSION (WINDOWS + JDBC FIX)
# =========================================================
spark = (
    SparkSession.builder
    .appName("Postgres_JDBC_EndToEnd")
    .config("spark.jars", r"C:\spark\postgresql-42.7.3.jar")
    .getOrCreate()
)

spark.sparkContext.setLogLevel("ERROR")


# =========================================================
# 4. CREATE SAMPLE DATAFRAME
# =========================================================
data = [
    (1,'U1','2024-01-01',100),
    (2,'U1','2024-01-05',150),
    (3,'U1','2024-01-10',200),
    (4,'U1','2024-01-15',120),
    (5,'U1','2024-01-20',180),
    (6,'U1','2024-01-25',90),
    (7,'U2','2024-01-03',50),
    (8,'U2','2024-01-15',70),
    (9,'U3','2024-02-01',300),
    (10,'U3','2024-02-05',250),
    (11,'U3','2024-02-10',200),
    (12,'U3','2024-02-15',150),
    (13,'U3','2024-02-18',100),
    (14,'U3','2024-02-20',120)
]

schema = T.StructType([
    T.StructField("order_id", T.IntegerType(), False),
    T.StructField("user_id", T.StringType(), True),
    T.StructField("order_date", T.StringType(), True),
    T.StructField("amount", T.IntegerType(), True)
])

df = spark.createDataFrame(data, schema)

df = df.withColumn("order_date", F.to_date("order_date"))

# print("===== DATAFRAME BEFORE WRITE =====")
# df.show()
# print("Row count:", df.count())


# =========================================================
# 5. POSTGRES JDBC CONFIG
# =========================================================
pg_url = "jdbc:postgresql://localhost:5432/Sachin"

pg_props = {
    "user": "postgres",
    "password": "tiger",
    "driver": "org.postgresql.Driver"
}


# =========================================================
# 6. WRITE TO POSTGRES
# =========================================================
df.write \
    .format("jdbc") \
    .option("url", pg_url) \
    .option("dbtable", "public.orders_int") \
    .option("user", pg_props["user"]) \
    .option("password", pg_props["password"]) \
    .option("driver", pg_props["driver"]) \
    .mode("overwrite") \
    .save()

print("✅ WRITE COMPLETED")


# =========================================================
# 8. STOP SPARK
# =========================================================
spark.stop()


✅ WRITE COMPLETED
