# Test SCD Type 2 implementation

![](process-flow.drawio.svg)

1. Upload initial JSON data into S3 Raw layer
2. Process the data from raw JSON to Delta format using Glue ETL Job
3. Load the processed data locally and read in pandas dataframe
4. Change the raw json by deleting, updating and creating some records
5. Process the data again from changed raw JSON to Delta format using Glue ETL Job
6. Load the processed data locally and read in pandas dataframe, compare changes and run SQL queries

## Generate a sample employee dataset

In [69]:
import os, logging, time, sys
import pandas as pd
from faker import Faker
import boto3
from deltalake import DeltaTable
import pandasql as ps

In [None]:
s3_artifacts_bucket = "wysde-assets"

In [2]:
# fake = Faker()
# session = boto3.Session()

# s3_client = session.client('s3')
# root = logging.getLogger()
# root.setLevel(logging.WARN)

# # Genrates random user records 
# def user_records(n):
#   data = []
#   columns=['emp_id','first_name','last_name']
#   users_df = pd.DataFrame(data, columns=columns)
#   for emp_id in range(1,n):
#       first_name=fake.first_name()
#       last_name=fake.last_name()
#       record={ 'emp_id' :emp_id, 'first_name':first_name, 'last_name':last_name }
#       users_df=users_df.append({'emp_id':record['emp_id'], 'first_name':record['first_name'], 'last_name':record['last_name'] }, ignore_index=True)
#   users_key=f'users.csv'
#   users_df.to_csv(users_key, sep='|', encoding='utf-8',header=True,index=False,)
  
#   # genrate change dataset to above user records
# def users_change_records(df_key):
#   df = pd.read_csv("users.csv", sep='|')
#   for index, row in df.iterrows():
#     df.at[index,'Address']=fake.address()
#     df.at[index,'phone_number']=fake.phone_number()
#     df.at[index,'isContractor']=fake.boolean()
#   df.to_json(df_key, orient = 'records', compression = 'infer', index = 'true',lines = True)
  
# def users_scd_records(n,df_key):
#     file_exists = os.path.exists('users.csv')
#     if file_exists is False:
#         with open('users.csv', 'w') as creating_new_csv_file: 
#             pass 
#         print("Empty users.csv file created successfully")

#     if os.stat("users.csv").st_size == 0 :
#         print('users.csv file is empty. writing new user records to file..')
#         user_records(n)
#         print('user records written sucessfully..')
#     else :
#         print('Already user records exist in users.csv ...')
        
#     print('updating exsting user records... ')
#     users_change_records(df_key)
  
# current_time = time.time()
# df_key=f'/tmp/fake_emp_data_{current_time}.json'
# print(df_key)

# try:
#     users_scd_records(26,df_key)
#     if os.path.exists(df_key):
#         src_file_name=df_key
#         bucket_name=s3_artifacts_bucket
#         dest_file_name=f'glue/dataset/employee/fake_emp_data.json'
#         response = s3_client.upload_file(src_file_name, bucket_name, dest_file_name)
# except ValueError:
#     logging.info(f'failed with exception')

In [57]:
%%writefile fake_emp_data.json
{"emp_id":1,"first_name":"Melissa","last_name":"Parks","Address":"24385 Chad Springs\nGarciabury, NH 97545","phone_number":"443-656-9602","isContractor":true}
{"emp_id":2,"first_name":"Laura","last_name":"Delgado","Address":"6184 Douglas Square\nWandaland, NY 09750","phone_number":"9794790414","isContractor":false}
{"emp_id":3,"first_name":"Luis","last_name":"Barnes","Address":"479 Cross Harbor\nNorth Virginiashire, MA 61515","phone_number":"332-798-7199x8890","isContractor":true}
{"emp_id":4,"first_name":"Jonathan","last_name":"Wilson","Address":"8895 Laura Burgs\nAndrewview, VA 60923","phone_number":"2573071875","isContractor":true}
{"emp_id":5,"first_name":"Kelly","last_name":"Gomez","Address":"921 Padilla Route Apt. 747\nEast Kristinabury, WI 79952","phone_number":"651.491.8654","isContractor":true}
{"emp_id":6,"first_name":"Robert","last_name":"Smith","Address":"79708 Taylor Streets Suite 869\nSmithland, PA 95389","phone_number":"985-959-3869","isContractor":false}
{"emp_id":7,"first_name":"Glenn","last_name":"Martinez","Address":"1233 Ramos Tunnel Apt. 823\nBlanchardfurt, ID 23700","phone_number":"524.011.0553","isContractor":true}
{"emp_id":8,"first_name":"Teresa","last_name":"Estrada","Address":"27765 Regina Mountains\nWilliamport, NE 22032","phone_number":"231-333-2028x0932","isContractor":true}
{"emp_id":9,"first_name":"Karen","last_name":"Spencer","Address":"7892 Kayla Passage\nEast Erica, DC 53807","phone_number":"001-835-207-5623x0184","isContractor":false}
{"emp_id":10,"first_name":"Daniel","last_name":"Foley","Address":"24180 Thomas Shores Suite 761\nWeavertown, UT 25979","phone_number":"784.467.2738x437","isContractor":true}
{"emp_id":11,"first_name":"Amy","last_name":"Stevens","Address":"390 Bradley Shoals\nSouth Rachelville, WA 38963","phone_number":"790.557.7066x5704","isContractor":true}
{"emp_id":12,"first_name":"Nicholas","last_name":"Aguirre","Address":"8115 Miller Keys Suite 156\nSoniaborough, UT 82244","phone_number":"575-772-3211x40798","isContractor":true}
{"emp_id":13,"first_name":"John","last_name":"Valdez","Address":"249 Taylor Highway Apt. 433\nJeanettetown, TN 45093","phone_number":"(656)503-4253x839","isContractor":true}
{"emp_id":14,"first_name":"Michael","last_name":"West","Address":"57112 Jody Crossing Suite 957\nPort Billyfort, KS 27324","phone_number":"(928)743-3623x7403","isContractor":false}
{"emp_id":15,"first_name":"Perry","last_name":"Mcguire","Address":"91023 Powell Mill Suite 655\nJamesbury, DE 62571","phone_number":"923-949-8460x97575","isContractor":true}
{"emp_id":16,"first_name":"James","last_name":"Munoz","Address":"81410 Wright Place\nKristinechester, SD 35936","phone_number":"001-925-703-4024x43486","isContractor":true}
{"emp_id":17,"first_name":"Todd","last_name":"Barton","Address":"61063 Yu Meadows Suite 139\nSmithfurt, DC 10938","phone_number":"+1-685-090-8059x146","isContractor":true}
{"emp_id":18,"first_name":"Christopher","last_name":"Noble","Address":"3583 Patterson Unions Suite 601\nCarlosport, IL 71203","phone_number":"001-667-565-5446x616","isContractor":true}
{"emp_id":19,"first_name":"Sandy","last_name":"Hunter","Address":"14076 Moore Tunnel Apt. 382\nAlvarezstad, WI 02628","phone_number":"+1-838-795-7914x3698","isContractor":true}
{"emp_id":20,"first_name":"Jennifer","last_name":"Ballard","Address":"7537 Johnson Forest\nEast Frances, WY 19474","phone_number":"+1-820-278-5664x2131","isContractor":false}
{"emp_id":21,"first_name":"David","last_name":"Morris","Address":"742 Baker Ways Apt. 948\nSouth Joshua, MI 65461","phone_number":"4195487426","isContractor":false}
{"emp_id":22,"first_name":"Paula","last_name":"Jones","Address":"75701 Alexander Summit\nChambersside, NJ 08133","phone_number":"001-004-314-9492x3798","isContractor":true}
{"emp_id":23,"first_name":"Lisa","last_name":"Thompson","Address":"072 Kyle Alley\nSouth Omarburgh, GA 16645","phone_number":"(401)442-0263x68049","isContractor":true}
{"emp_id":24,"first_name":"Vickie","last_name":"Johnson","Address":"57405 Justin Knoll\nSouth Stevenburgh, IL 41148","phone_number":"199-457-6725","isContractor":true}
{"emp_id":25,"first_name":"John","last_name":"Hamilton","Address":"29077 Porter Park\nMaryburgh, LA 68931","phone_number":"001-806-212-1499","isContractor":true}


Overwriting fake_emp_data.json


In [58]:
!aws s3 cp fake_emp_data.json s3://{s3_artifacts_bucket}/glue/dataset/employee/fake_emp_data.json

upload: ./fake_emp_data.json to s3://wysde-assets/glue/dataset/employee/fake_emp_data.json


## Deploy Glue Job

In [9]:
!aws cloudformation create-stack \
--stack-name glue-cdc-job \
--template-body file://cloudformation-glue-job.yml \
--capabilities CAPABILITY_NAMED_IAM

{
    "StackId": "arn:aws:cloudformation:us-east-1:684199068947:stack/glue-cdc-job/1f777d20-cef4-11ed-b3b3-1241b32e2c8f"
}


## Run the Job

Go to the AWS Glue Jobs console and run the job.

In [59]:
!aws glue start-job-run --job-name glue-cdc-job-src-to-processed

{
    "JobRunId": "jr_ef11248fbb22e393f384ed7576502fe443660e5b84a133ff127867e65093bc1c"
}


![job-run-1](https://user-images.githubusercontent.com/62965911/228858759-c9206fe2-9e31-405f-9b1d-b6ac7caf8c13.png)

When the AWS Glue job is run for the first time, the job reads the employee dataset from the landing bucket path and ingests the data to the processed bucket as a Delta table.

In [60]:
!aws s3 ls s3://{s3_artifacts_bucket}/glue/processed/

                           PRE _delta_log/
2023-03-30 18:55:31          0 _delta_log_$folder$
2023-03-30 18:55:36       7086 part-00000-3d180e8c-25bc-41f0-8cd3-deeff3ddc93b-c000.snappy.parquet


We can read this into pandas dataframe:

In [61]:
!rm -rf processed
!aws s3 cp --recursive s3://{s3_artifacts_bucket}/glue/processed processed

download: s3://wysde-assets/glue/processed/_delta_log_$folder$ to processed/_delta_log_$folder$
download: s3://wysde-assets/glue/processed/_delta_log/00000000000000000000.json to processed/_delta_log/00000000000000000000.json
download: s3://wysde-assets/glue/processed/part-00000-3d180e8c-25bc-41f0-8cd3-deeff3ddc93b-c000.snappy.parquet to processed/part-00000-3d180e8c-25bc-41f0-8cd3-deeff3ddc93b-c000.snappy.parquet


In [62]:
dt = DeltaTable("processed")
df = dt.to_pandas()
df

Unnamed: 0,emp_id,first_name,last_name,Address,phone_number,isContractor,emp_key,start_date,end_date,isCurrent,delete_flag
0,1,Melissa,Parks,"24385 Chad Springs\nGarciabury, NH 97545",443-656-9602,True,6476bb09b97323a857dc56f6502a23f6e9c98d45ae8293...,2023-03-30,,True,False
1,2,Laura,Delgado,"6184 Douglas Square\nWandaland, NY 09750",9794790414,False,d018b725b889606b19ea28b8a192bab91f68f629d02a05...,2023-03-30,,True,False
2,3,Luis,Barnes,"479 Cross Harbor\nNorth Virginiashire, MA 61515",332-798-7199x8890,True,d60a1ba6ccbbaa9ecbf7339338c3cb2797ec3d2d0cd7ae...,2023-03-30,,True,False
3,4,Jonathan,Wilson,"8895 Laura Burgs\nAndrewview, VA 60923",2573071875,True,9a9bd70577ec1232e2ba53ebc779539874b4a9cae2755b...,2023-03-30,,True,False
4,5,Kelly,Gomez,"921 Padilla Route Apt. 747\nEast Kristinabury,...",651.491.8654,True,d8b1046115e0fbc25645d86513fbc2332dacf48e06cf57...,2023-03-30,,True,False
5,6,Robert,Smith,"79708 Taylor Streets Suite 869\nSmithland, PA ...",985-959-3869,False,be82bdc43caef8d6dea2da2dacef219346442d9c1ad86c...,2023-03-30,,True,False
6,7,Glenn,Martinez,"1233 Ramos Tunnel Apt. 823\nBlanchardfurt, ID ...",524.011.0553,True,003d6e076f8fdf6788df46a2f1ca51711ae5518758b8f8...,2023-03-30,,True,False
7,8,Teresa,Estrada,"27765 Regina Mountains\nWilliamport, NE 22032",231-333-2028x0932,True,6cc118c19ac43259264ae915121ec2d788b2655e3a63a7...,2023-03-30,,True,False
8,9,Karen,Spencer,"7892 Kayla Passage\nEast Erica, DC 53807",001-835-207-5623x0184,False,b736c180d82a370d420990f78a840dad933cab70489709...,2023-03-30,,True,False
9,10,Daniel,Foley,"24180 Thomas Shores Suite 761\nWeavertown, UT ...",784.467.2738x437,True,88cd6f8ef94532e207857c2e25c168a1ca7db73f5d5707...,2023-03-30,,True,False


The Delta table is stored with an `emp_key`, which is unique to each and every change and is used to track the changes. The `emp_key` is created for every insert, update, and delete, and can be used to find all the changes pertaining to a single `emp_id`.

The `emp_key` is created using the SHA256 hashing algorithm, as shown in the following code:

In [None]:
df.withColumn("emp_key", sha2(concat_ws("||", col("emp_id"), col("first_name"), col("last_name"), col("Address"),
                                        col("phone_number"), col("isContractor")), 256))

## Perform inserts, updates, and deletes

let’s modify our initial dataset with the following changes:

1. Change the `isContractor` flag to `false` (change it to `true` if your dataset already shows `false`) for `emp_id=12`.
2. Delete the entire row where `emp_id=8` (make sure to save the record in a text editor, because we use this record in another use case).
3. Copy the row for `emp_id=25` and insert a new row. Change the `emp_id` to be `26`, and make sure to change the values for other columns as well.

After we make these changes, the employee source dataset looks like the following code (for readability, we have only included the changed records as described in the preceding three steps):

In [64]:
%%writefile fake_emp_data.json
%%writefile fake_emp_data.json
{"emp_id":1,"first_name":"Melissa","last_name":"Parks","Address":"24385 Chad Springs\nGarciabury, NH 97545","phone_number":"443-656-9602","isContractor":true}
{"emp_id":2,"first_name":"Laura","last_name":"Delgado","Address":"6184 Douglas Square\nWandaland, NY 09750","phone_number":"9794790414","isContractor":false}
{"emp_id":3,"first_name":"Luis","last_name":"Barnes","Address":"479 Cross Harbor\nNorth Virginiashire, MA 61515","phone_number":"332-798-7199x8890","isContractor":true}
{"emp_id":4,"first_name":"Jonathan","last_name":"Wilson","Address":"8895 Laura Burgs\nAndrewview, VA 60923","phone_number":"2573071875","isContractor":true}
{"emp_id":5,"first_name":"Kelly","last_name":"Gomez","Address":"921 Padilla Route Apt. 747\nEast Kristinabury, WI 79952","phone_number":"651.491.8654","isContractor":true}
{"emp_id":6,"first_name":"Robert","last_name":"Smith","Address":"79708 Taylor Streets Suite 869\nSmithland, PA 95389","phone_number":"985-959-3869","isContractor":false}
{"emp_id":7,"first_name":"Glenn","last_name":"Martinez","Address":"1233 Ramos Tunnel Apt. 823\nBlanchardfurt, ID 23700","phone_number":"524.011.0553","isContractor":true}
{"emp_id":9,"first_name":"Karen","last_name":"Spencer","Address":"7892 Kayla Passage\nEast Erica, DC 53807","phone_number":"001-835-207-5623x0184","isContractor":false}
{"emp_id":10,"first_name":"Daniel","last_name":"Foley","Address":"24180 Thomas Shores Suite 761\nWeavertown, UT 25979","phone_number":"784.467.2738x437","isContractor":true}
{"emp_id":11,"first_name":"Amy","last_name":"Stevens","Address":"390 Bradley Shoals\nSouth Rachelville, WA 38963","phone_number":"790.557.7066x5704","isContractor":true}
{"emp_id":12,"first_name":"Nicholas","last_name":"Aguirre","Address":"8115 Miller Keys Suite 156\nSoniaborough, UT 82244","phone_number":"575-772-3211x40798","isContractor":false}
{"emp_id":13,"first_name":"John","last_name":"Valdez","Address":"249 Taylor Highway Apt. 433\nJeanettetown, TN 45093","phone_number":"(656)503-4253x839","isContractor":true}
{"emp_id":14,"first_name":"Michael","last_name":"West","Address":"57112 Jody Crossing Suite 957\nPort Billyfort, KS 27324","phone_number":"(928)743-3623x7403","isContractor":false}
{"emp_id":15,"first_name":"Perry","last_name":"Mcguire","Address":"91023 Powell Mill Suite 655\nJamesbury, DE 62571","phone_number":"923-949-8460x97575","isContractor":true}
{"emp_id":16,"first_name":"James","last_name":"Munoz","Address":"81410 Wright Place\nKristinechester, SD 35936","phone_number":"001-925-703-4024x43486","isContractor":true}
{"emp_id":17,"first_name":"Todd","last_name":"Barton","Address":"61063 Yu Meadows Suite 139\nSmithfurt, DC 10938","phone_number":"+1-685-090-8059x146","isContractor":true}
{"emp_id":18,"first_name":"Christopher","last_name":"Noble","Address":"3583 Patterson Unions Suite 601\nCarlosport, IL 71203","phone_number":"001-667-565-5446x616","isContractor":true}
{"emp_id":19,"first_name":"Sandy","last_name":"Hunter","Address":"14076 Moore Tunnel Apt. 382\nAlvarezstad, WI 02628","phone_number":"+1-838-795-7914x3698","isContractor":true}
{"emp_id":20,"first_name":"Jennifer","last_name":"Ballard","Address":"7537 Johnson Forest\nEast Frances, WY 19474","phone_number":"+1-820-278-5664x2131","isContractor":false}
{"emp_id":21,"first_name":"David","last_name":"Morris","Address":"742 Baker Ways Apt. 948\nSouth Joshua, MI 65461","phone_number":"4195487426","isContractor":false}
{"emp_id":22,"first_name":"Paula","last_name":"Jones","Address":"75701 Alexander Summit\nChambersside, NJ 08133","phone_number":"001-004-314-9492x3798","isContractor":true}
{"emp_id":23,"first_name":"Lisa","last_name":"Thompson","Address":"072 Kyle Alley\nSouth Omarburgh, GA 16645","phone_number":"(401)442-0263x68049","isContractor":true}
{"emp_id":24,"first_name":"Vickie","last_name":"Johnson","Address":"57405 Justin Knoll\nSouth Stevenburgh, IL 41148","phone_number":"199-457-6725","isContractor":true}
{"emp_id":25,"first_name":"John","last_name":"Hamilton","Address":"29077 Porter Park\nMaryburgh, LA 68931","phone_number":"001-806-212-1499","isContractor":true}
{"emp_id":26,"first_name":"Sparsh","last_name":"A","Address":"29077 Porter Park\nMaryburgh, LA 68931","phone_number":"001-806-212-1499","isContractor":false}


Overwriting fake_emp_data.json


Now, upload the changed `fake_emp_data.json` file to the same source prefix.

In [65]:
!aws s3 cp fake_emp_data.json s3://{s3_artifacts_bucket}/glue/dataset/employee/fake_emp_data.json

upload: ./fake_emp_data.json to s3://wysde-assets/glue/dataset/employee/fake_emp_data.json


In [66]:
!aws glue start-job-run --job-name glue-cdc-job-src-to-processed

{
    "JobRunId": "jr_61b80f8ba944c0d39960c279f7711c378b29467c7ea33e8986174c96b1e9893c"
}


After you upload the changed employee dataset to Amazon S3, navigate to the AWS Glue console and run the job.

In [67]:
!rm -rf processed
!aws s3 cp --recursive s3://{s3_artifacts_bucket}/glue/processed processed

download: s3://wysde-assets/glue/processed/part-00000-3d180e8c-25bc-41f0-8cd3-deeff3ddc93b-c000.snappy.parquet to processed/part-00000-3d180e8c-25bc-41f0-8cd3-deeff3ddc93b-c000.snappy.parquet
download: s3://wysde-assets/glue/processed/_delta_log_$folder$ to processed/_delta_log_$folder$
download: s3://wysde-assets/glue/processed/_delta_log/00000000000000000001.json to processed/_delta_log/00000000000000000001.json
download: s3://wysde-assets/glue/processed/_delta_log/00000000000000000000.json to processed/_delta_log/00000000000000000000.json
download: s3://wysde-assets/glue/processed/part-00000-a55e8b75-859d-48e4-9947-fde786d012b9-c000.snappy.parquet to processed/part-00000-a55e8b75-859d-48e4-9947-fde786d012b9-c000.snappy.parquet
download: s3://wysde-assets/glue/processed/part-00000-a69fe2f3-035a-407e-9830-a68094c63148-c000.snappy.parquet to processed/part-00000-a69fe2f3-035a-407e-9830-a68094c63148-c000.snappy.parquet
download: s3://wysde-assets/glue/processed/_delta_log/00000000000000

In [70]:
df = DeltaTable("processed").to_pandas()
df

Unnamed: 0,emp_id,first_name,last_name,Address,phone_number,isContractor,emp_key,start_date,end_date,isCurrent,delete_flag
0,1,Melissa,Parks,"24385 Chad Springs\nGarciabury, NH 97545",443-656-9602,True,6476bb09b97323a857dc56f6502a23f6e9c98d45ae8293...,2023-03-30,,True,False
1,2,Laura,Delgado,"6184 Douglas Square\nWandaland, NY 09750",9794790414,False,d018b725b889606b19ea28b8a192bab91f68f629d02a05...,2023-03-30,,True,False
2,3,Luis,Barnes,"479 Cross Harbor\nNorth Virginiashire, MA 61515",332-798-7199x8890,True,d60a1ba6ccbbaa9ecbf7339338c3cb2797ec3d2d0cd7ae...,2023-03-30,,True,False
3,4,Jonathan,Wilson,"8895 Laura Burgs\nAndrewview, VA 60923",2573071875,True,9a9bd70577ec1232e2ba53ebc779539874b4a9cae2755b...,2023-03-30,,True,False
4,5,Kelly,Gomez,"921 Padilla Route Apt. 747\nEast Kristinabury,...",651.491.8654,True,d8b1046115e0fbc25645d86513fbc2332dacf48e06cf57...,2023-03-30,,True,False
5,6,Robert,Smith,"79708 Taylor Streets Suite 869\nSmithland, PA ...",985-959-3869,False,be82bdc43caef8d6dea2da2dacef219346442d9c1ad86c...,2023-03-30,,True,False
6,7,Glenn,Martinez,"1233 Ramos Tunnel Apt. 823\nBlanchardfurt, ID ...",524.011.0553,True,003d6e076f8fdf6788df46a2f1ca51711ae5518758b8f8...,2023-03-30,,True,False
7,8,Teresa,Estrada,"27765 Regina Mountains\nWilliamport, NE 22032",231-333-2028x0932,True,6cc118c19ac43259264ae915121ec2d788b2655e3a63a7...,2023-03-30,2023-03-30,False,True
8,9,Karen,Spencer,"7892 Kayla Passage\nEast Erica, DC 53807",001-835-207-5623x0184,False,b736c180d82a370d420990f78a840dad933cab70489709...,2023-03-30,,True,False
9,10,Daniel,Foley,"24180 Thomas Shores Suite 761\nWeavertown, UT ...",784.467.2738x437,True,88cd6f8ef94532e207857c2e25c168a1ca7db73f5d5707...,2023-03-30,,True,False


- You will see two records for emp_id=12.
- The record for emp_id=8 that was deleted in the source as part of this run will still exist but with the following changes to the values:
    - isCurrent=false
    - end_date=’2023-03-30’
    - delete_flag=true
- The new employee record will be inserted with the following values:
    - emp_id=26
    - isCurrent=true
    - end_date=NULL (or empty string)
    - delete_flag=false

## Analytics Queries

Query 1 – Retrieve a list of all the employees who left the organization in the current month (for example, March 2023).

In [76]:
ps.sqldf("SELECT * FROM df where delete_flag=true and STRFTIME('%Y/%m', end_date) ='2023/03'")

Unnamed: 0,emp_id,first_name,last_name,Address,phone_number,isContractor,emp_key,start_date,end_date,isCurrent,delete_flag
0,8,Teresa,Estrada,"27765 Regina Mountains\nWilliamport, NE 22032",231-333-2028x0932,1,6cc118c19ac43259264ae915121ec2d788b2655e3a63a7...,2023-03-30,2023-03-30,0,1


Query 2 – Retrieve a list of new employees who joined the organization in the current month (for example, March 2023).

In [78]:
ps.sqldf("SELECT * FROM df where iscurrent=true and STRFTIME('%Y/%m', start_date) ='2023/03'")

Unnamed: 0,emp_id,first_name,last_name,Address,phone_number,isContractor,emp_key,start_date,end_date,isCurrent,delete_flag
0,1,Melissa,Parks,"24385 Chad Springs\nGarciabury, NH 97545",443-656-9602,1,6476bb09b97323a857dc56f6502a23f6e9c98d45ae8293...,2023-03-30,,1,0
1,2,Laura,Delgado,"6184 Douglas Square\nWandaland, NY 09750",9794790414,0,d018b725b889606b19ea28b8a192bab91f68f629d02a05...,2023-03-30,,1,0
2,3,Luis,Barnes,"479 Cross Harbor\nNorth Virginiashire, MA 61515",332-798-7199x8890,1,d60a1ba6ccbbaa9ecbf7339338c3cb2797ec3d2d0cd7ae...,2023-03-30,,1,0
3,4,Jonathan,Wilson,"8895 Laura Burgs\nAndrewview, VA 60923",2573071875,1,9a9bd70577ec1232e2ba53ebc779539874b4a9cae2755b...,2023-03-30,,1,0
4,5,Kelly,Gomez,"921 Padilla Route Apt. 747\nEast Kristinabury,...",651.491.8654,1,d8b1046115e0fbc25645d86513fbc2332dacf48e06cf57...,2023-03-30,,1,0
5,6,Robert,Smith,"79708 Taylor Streets Suite 869\nSmithland, PA ...",985-959-3869,0,be82bdc43caef8d6dea2da2dacef219346442d9c1ad86c...,2023-03-30,,1,0
6,7,Glenn,Martinez,"1233 Ramos Tunnel Apt. 823\nBlanchardfurt, ID ...",524.011.0553,1,003d6e076f8fdf6788df46a2f1ca51711ae5518758b8f8...,2023-03-30,,1,0
7,9,Karen,Spencer,"7892 Kayla Passage\nEast Erica, DC 53807",001-835-207-5623x0184,0,b736c180d82a370d420990f78a840dad933cab70489709...,2023-03-30,,1,0
8,10,Daniel,Foley,"24180 Thomas Shores Suite 761\nWeavertown, UT ...",784.467.2738x437,1,88cd6f8ef94532e207857c2e25c168a1ca7db73f5d5707...,2023-03-30,,1,0
9,11,Amy,Stevens,"390 Bradley Shoals\nSouth Rachelville, WA 38963",790.557.7066x5704,1,5ee734b61ff801afd114367411a4921a55c5aee6a8388e...,2023-03-30,,1,0


Query 3 – Find the history of any given employee in the organization (in this case employee 18).

In [80]:
ps.sqldf("SELECT * FROM df where emp_id=18")

Unnamed: 0,emp_id,first_name,last_name,Address,phone_number,isContractor,emp_key,start_date,end_date,isCurrent,delete_flag
0,18,Christopher,Noble,"3583 Patterson Unions Suite 601\nCarlosport, I...",001-667-565-5446x616,1,c974796550a76d5ff4553b4393e7508ba68365ba6b07df...,2023-03-30,,1,0
