# Conexion entre Pyspark y Postgres

In [9]:
import os

from pyspark.sql import SparkSession

# driver_path = "/home/lucas/working_dir/driver_jdbc/postgresql-42.5.2.jar" # old
driver_path = "/home/lucas/working_dir/driver_jdbc/postgresql-42.2.27.jre7.jar"

os.environ['PYSPARK_SUBMIT_ARGS'] = f'--driver-class-path {driver_path} --jars {driver_path} pyspark-shell'
os.environ['SPARK_CLASSPATH'] = driver_path

# Create SparkSession 
spark = SparkSession.builder \
        .master("local") \
        .appName("Conexion entre Pyspark y Postgres") \
        .config("spark.jars", driver_path) \
        .config("spark.executor.extraClassPath", driver_path) \
        .getOrCreate()

# Postgres connection settings
pg_url = "jdbc:postgresql://172.18.0.2:5432/postgres"
pg_user = "postgres" # not recommended to have this value in the code
pg_password = "postgres" # not recommended to have this value in the code
pg_driver = "org.postgresql.Driver"

In [10]:
# Create the connection and read the table
df_agents = spark.read \
    .format("jdbc") \
    .option("url", pg_url) \
    .option("dbtable", "agents") \
    .option("user", pg_user) \
    .option("password", pg_password) \
    .option("driver", pg_driver) \
    .load()

df_agents.printSchema()
df_agents.show()

root
 |-- agentid: integer (nullable = true)
 |-- name: string (nullable = true)

+-------+------------------+
|agentid|              name|
+-------+------------------+
|      0|  Michele Williams|
|      1|    Jocelyn Parker|
|      2|Christopher Moreno|
|      3|       Todd Morrow|
|      4|       Randy Moore|
|      5|        Paul Nunez|
|      6|      Gloria Singh|
|      7|      Angel Briggs|
|      8|      Lisa Cordova|
|      9|        Dana Hardy|
|     10|           Agent X|
+-------+------------------+



In [12]:
# Create the connection and read the table
df_calls = spark.read \
    .format("jdbc") \
    .option("url", pg_url) \
    .option("dbtable", "calls") \
    .option("user", pg_user) \
    .option("password", pg_password) \
    .option("driver", pg_driver) \
    .load()

df_calls.printSchema()
df_calls.show()
df_calls.count()

root
 |-- callid: integer (nullable = true)
 |-- agentid: integer (nullable = true)
 |-- customerid: integer (nullable = true)
 |-- pickedup: integer (nullable = true)
 |-- duration: integer (nullable = true)
 |-- productsold: integer (nullable = true)

+------+-------+----------+--------+--------+-----------+
|callid|agentid|customerid|pickedup|duration|productsold|
+------+-------+----------+--------+--------+-----------+
|     0|     10|       179|       0|       0|          0|
|     1|      5|       691|       1|     116|          0|
|     2|     10|        80|       1|     165|          0|
|     3|      6|       629|       1|     128|          0|
|     4|      8|       318|       1|     205|          0|
|     5|      7|       319|       1|     225|          1|
|     6|     10|       265|       1|     211|          0|
|     7|      9|       625|       0|       0|          0|
|     8|      5|       877|       0|       0|          0|
|     9|      5|       191|       1|     145|     

9940

In [13]:
# Create the connection and read the table
df_customers = spark.read \
    .format("jdbc") \
    .option("url", pg_url) \
    .option("dbtable", "customers") \
    .option("user", pg_user) \
    .option("password", pg_password) \
    .option("driver", pg_driver) \
    .load()

df_customers.printSchema()
df_customers.show()
df_customers.count()

root
 |-- customerid: integer (nullable = true)
 |-- name: string (nullable = true)
 |-- occupation: string (nullable = true)
 |-- email: string (nullable = true)
 |-- company: string (nullable = true)
 |-- phonenumber: string (nullable = true)
 |-- age: integer (nullable = true)

+----------+------------------+--------------------+--------------------+--------------------+------------+---+
|customerid|              name|          occupation|               email|             company| phonenumber|age|
+----------+------------------+--------------------+--------------------+--------------------+------------+---+
|         0|      David Melton|          Unemployed|    DMelton@zoho.com|Morris, Winters a...|409-093-0748| 16|
|         1|  Michael Gonzalez|             Student|Gonzalez_Michael@...|  Hernandez and Sons|231-845-0673| 19|
|         2|     Amanda Wilson|             Student|Amanda.Wilson75@v...|Mooney, West and ...|844-276-4552| 18|
|         3|     Robert Thomas|Engineer, struc

1000