#### JDBC connection string

Integrated Security (Windows authentication i.e. without username and password) and download JDBC driver for reference [click here](https://learn.microsoft.com/en-us/sql/connect/jdbc/connecting-with-ssl-encryption?view=sql-server-ver16)

using java version - `JDK 20`\
using os version - `windows11`\
Put `mssql-jdbc_auth-12.4.1.x64.dll` in `jdk/bin/`\
If needs put downloaded dll, jar and jre in `spark/jars`\
If needs enable TCP/IP for SQL server -> search on windows for `mmc` i.e. SQL server configuration manager -> add snap in for SQL server Network Configuration -> Protocols for MSSQL-> enable TCP/IP


In [0]:
import findspark
findspark.init()
findspark.find()

In [0]:
from pyspark.sql import SparkSession

# Create spark session
spark = SparkSession.builder \
    .appName('Spark - SQL Server Integrated Authentication Example') \
    .master('local') \
    .getOrCreate()

In [0]:
DATABASE_NAME='LiveClassAssignment'
PORT='63739'

In [0]:
connectionUrl = f'''
jdbc:sqlserver://localhost:{PORT};
databaseName={DATABASE_NAME};
integratedSecurity=true;
encrypt=true;
trustServerCertificate=true;
'''

connectionUrl = "".join(line.strip() for line in connectionUrl.splitlines())

```sql
SELECT Department,AVG(Salary) as gross_average_salary FROM EmployeeDetails e
LEFT JOIN EmployeeBonus eb on e.Employee_id=eb.Employee_ref_id_FK
GROUP BY Department
```


In [0]:
def get_db_table_data(tableName:str):
    return spark.read.format("jdbc") \
    .option("url", connectionUrl) \
    .option("dbtable", tableName) \
    .option("driver", "com.microsoft.sqlserver.jdbc.SQLServerDriver") \
    .load()

In [0]:
emp_details_df =get_db_table_data('EmployeeDetails')
emp_bonus_df=get_db_table_data('EmployeeBonus')

In [0]:
from pyspark.sql.functions import avg,count,max,min,monotonically_increasing_id

# mssql_df.groupBy('Department')\
#     .agg(avg('salary').alias('average_salary'))\
#     .show(truncate=False)

salary_by_department = emp_details_df.join(emp_bonus_df,emp_details_df['Employee_id']==emp_bonus_df['Employee_ref_id_FK'],'left')\
    .groupBy('Department')\
    .agg(avg('salary').alias('average_salary'),
         count("*").alias("no_of_employees"),
         max('salary').alias('maximum_salary'),
         min('salary').alias('minimum_salary'),
         )\
             .withColumn("id", monotonically_increasing_id())
    # .show(truncate=False)
salary_by_department.show()

In [0]:
#TODO: need to fix this to write data to table
def load_df_to_db(df):
    mode='overwrite'
    # properties={"driver":"com.microsoft.sqlserver.jdbc.SQLServerDriver"}
    tableName='departmental_salary'
    df.write \
    .format("jdbc") \
    .mode(mode)\
    .option("url", connectionUrl) \
    .option("dbtable", tableName) \
    .option("driver", "com.microsoft.sqlserver.jdbc.SQLServerDriver") \
    .save()

In [0]:
load_df_to_db(salary_by_department)

In [0]:
spark.stop()