In [0]:
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta

# Define the number of rows
num_rows = 5

# Generate data for each column
employee_ids = np.arange(1001, 1001 + num_rows)
names = [f"Employee_{i}" for i in range(1, num_rows + 1)]
departments = random.choices(['HR', 'Finance', 'IT', 'Marketing', 'Sales'], k=num_rows)
salaries = np.random.randint(40000, 100000, size=num_rows)
hire_dates = [datetime.now() - timedelta(days=random.randint(365, 365 * 10)) for _ in range(num_rows)]
email_addresses = [f"employee{i}@example.com" for i in range(1, num_rows + 1)]
genders = random.choices(['Male','Female'], k=num_rows)
managers = np.arange(1000+num_rows,1000,-1)
last_updt_times = [datetime.now() - timedelta(days=random.randint(365,365*10)) for _ in range(num_rows)]

# Create a dictionary from the generated data
employee_data = {
    'Employee_ID': employee_ids,
    'Name': names,
    'Department': departments,
    'Salary': salaries,
    'Hire_Date': hire_dates,
    'Email': email_addresses,
    'Gender': genders,
    'Manager':managers,
    'Last_updt_time':last_updt_times
}

#Create spark DataFrame
emp_df = spark.createDataFrame(pd.DataFrame(employee_data))

emp_df.show(truncate = False)
# Display the first few rows of the DataFrame
#print(employee_df.head())

In [0]:
#%sql drop table erm.employee

In [0]:
emp_df.coalesce(1).write.mode('overwrite').format("delta").saveAsTable("erm.employee")

In [0]:
edf = spark.read.table("erm.employee")
edf.show()

In [0]:
edf.createOrReplaceTempView('employee')


In [0]:
%sql select * from employee where employee_id=1001

In [0]:
from pyspark.sql.functions import col, filter, lit
edf.filter(col('employee_id')==lit(1001)).show()

In [0]:
%sql delete from erm.employee where employee_id=1001

In [0]:
%sql select * from erm.employee where employee_id=1001

In [0]:
%sql describe history erm.employee

In [0]:
%sql select * from erm.employee version as of 0 where employee_id=1001

In [0]:
%sql restore table erm.employee to version as of 0

In [0]:
%sql select * from erm.employee where employee_id = 1001

In [0]:
from delta.tables import *
from pyspark.sql.functions import *

deltaTable = DeltaTable.forName(spark, "erm.employee")
deltaTable.delete(col('employee_id')== lit(1001))


In [0]:
%sql describe history erm.employee

In [0]:
%sql restore erm.employee to version as of 0

In [0]:
edf.show()

In [0]:
%sql update erm.employee set Gender='Male' where employee_id=1001

In [0]:
%sql select * from erm.employee where employee_id=1001

In [0]:
from delta.tables import *
from pyspark.sql.functions import *
deltaTable = DeltaTable.forName(spark,'erm.employee')
deltaTable.update(col('employee_id')==lit(1001),{'salary':lit(10000)})

In [0]:
edf.filter(col('employee_id')==lit(1001)).show()

In [0]:
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta

# Define the number of rows
num_rows = 5

# Generate data for each column
employee_ids = np.arange(1004, 1004 + num_rows)
names = [f"Employee_{i}" for i in range(4, 4+num_rows)]
departments = random.choices(['HR', 'Finance', 'IT', 'Marketing', 'Sales'], k=num_rows)
salaries = np.random.randint(40000, 100000, size=num_rows)
hire_dates = [datetime.now() - timedelta(days=random.randint(365, 365 * 10)) for _ in range(num_rows)]
email_addresses = [f"employee{i}@example.com" for i in range(4, 4+num_rows)]
genders = random.choices(['Male','Female'], k=num_rows)
managers = np.arange(1000+num_rows,1000,-1)
last_updt_times = [datetime.now() - timedelta(days=random.randint(365,365*10)) for _ in range(num_rows)]

# Create a dictionary from the generated data
new_employee_data = {
    'Employee_ID': employee_ids,
    'Name': names,
    'Department': departments,
    'Salary': salaries,
    'Hire_Date': hire_dates,
    'Email': email_addresses,
    'Gender': genders,
    'Manager':managers,
    'Last_updt_time':last_updt_times
}

#Create spark DataFrame
new_emp_df = spark.createDataFrame(pd.DataFrame(new_employee_data))

new_emp_df.show(truncate = False)
# Display the first few rows of the DataFrame
#print(employee_df.head())


In [0]:
edf.createOrReplaceTempView('employee')


In [0]:
%sql select * from employee

In [0]:
new_emp_df.createOrReplaceTempView("new_employee")

In [0]:
%sql select * from new_employee

In [0]:
%sql merge into employee e 
using new_employee n
on e.employee_id = n.employee_id
when matched then update set *
when not matched then insert *

In [0]:
%sql select * from employee

In [0]:
%sql select * from new_employee

In [0]:
%sql truncate table erm.employee

In [0]:
%sql select * from erm.employee

In [0]:
from delta.tables import *
from pyspark.sql.functions import *

srcTable = DeltaTable.forName(spark,"erm.employee")
srcTable.alias("e").merge(new_emp_df.alias("n"),"e.employee_id = n.employee_id").whenMatchedUpdateAll().whenNotMatchedInsertAll().execute()

In [0]:
%sql select * from new_employee 

In [0]:
%sql select * from erm.employee