In [1]:
#!pip install simple_salesforce

In [2]:
import json
from sqlalchemy import create_engine
from datetime import date
import pandas as pd
import pymysql
pymysql.install_as_MySQLdb()

In [3]:
# Make sure to use your own `config.py` file. Consider ensuring that these variable names are in sync
from config import sf_username, sf_password, sf_security_token
from config import remote_db_endpoint, remote_db_port
from config import remote_db_name, remote_db_user, remote_db_pwd

In [4]:
from simple_salesforce import Salesforce
sf = Salesforce(username=sf_username, password=sf_password, security_token=sf_security_token)

In [5]:
engine = create_engine(f"mysql://{remote_db_user}:{remote_db_pwd}@{remote_db_endpoint}:{remote_db_port}/{remote_db_name}")
conn = engine.connect()

## Prepare ETL for the Course data

In [16]:
staff_data_df = pd.read_sql("SELECT * FROM staff", conn)
staff_data_df.head()

Unnamed: 0,ID_Staff,EmployeeID,LastName,FirstName,MiddleName,BirthDate
0,1,184220,Wimberly,Sam,Nico,1995-07-05
1,2,130109,Sanford,Gemini,Blair,1992-04-22
2,3,160655,Williams,Dartanion,De Angelo,1993-05-21
3,4,159108,Popelka,Sarah,Nisan,1994-06-07


In [25]:
staff_data_df.rename(columns={
    'EmployeeID':'EmployeeID__c',
    'LastName':'LastName__c',
    'FirstName':'FirstName__c',
    'MiddleName':'MiddleName__c',  
    'BirthDate':'BirthDate__c',
}, inplace=True)


staff_data_df.head()

Unnamed: 0,ID_Staff,EmployeeID__c,LastName__c,FirstName__c,MiddleName__c,BirthDate__c
0,1,184220,Wimberly,Sam,Nico,1995-07-05
1,2,130109,Sanford,Gemini,Blair,1992-04-22
2,3,160655,Williams,Dartanion,De Angelo,1993-05-21
3,4,159108,Popelka,Sarah,Nisan,1994-06-07


In [27]:
staff_data_df=staff_data_df[['EmployeeID__c','LastName__c','FirstName__c', 'MiddleName__c','BirthDate__c']]
staff_data_df.head(10)

Unnamed: 0,EmployeeID__c,LastName__c,FirstName__c,MiddleName__c,BirthDate__c
0,184220,Wimberly,Sam,Nico,1995-07-05
1,130109,Sanford,Gemini,Blair,1992-04-22
2,160655,Williams,Dartanion,De Angelo,1993-05-21
3,159108,Popelka,Sarah,Nisan,1994-06-07


In [29]:
staff_data_records = staff_data_df.to_dict('records')
staff_data_records

[{'EmployeeID__c': '000184220',
  'LastName__c': 'Wimberly',
  'FirstName__c': 'Sam',
  'MiddleName__c': 'Nico',
  'BirthDate__c': '1995-07-05'},
 {'EmployeeID__c': '000130109',
  'LastName__c': 'Sanford',
  'FirstName__c': 'Gemini',
  'MiddleName__c': 'Blair',
  'BirthDate__c': '1992-04-22'},
 {'EmployeeID__c': '000160655',
  'LastName__c': 'Williams',
  'FirstName__c': 'Dartanion',
  'MiddleName__c': 'De Angelo',
  'BirthDate__c': '1993-05-21'},
 {'EmployeeID__c': '000159108',
  'LastName__c': 'Popelka',
  'FirstName__c': 'Sarah',
  'MiddleName__c': 'Nisan',
  'BirthDate__c': '1994-06-07'}]

In [32]:
for index, row in staff_data.iterrows():
    record = {
        #'ID_Staff__c': row['ID_Staff'],
        'EmployeeID__c': int(row['EmployeeID']),
        'LastName__c':row['LastName'],
        'Name':row['FirstName'],
        'MiddleName__c':row['MiddleName'],
        'BirthDate__c':row['BirthDate']
    }
    try:
        sf.Staff__c.create(record)
    except Exception as e:
       print(e)

In [33]:
staff_lookup_list = []

# The `Name` column in the primary key in Salesforce objects
data = sf.query_all_iter("SELECT EmployeeID__c, Name FROM Staff__c")
for row in data:
    rec = {
        'Staff__c': row['Name'],
        'EmployeeID__c': row['EmployeeID__c']
    }
    staff_lookup_list.append(rec)

In [34]:
staff_lookup_df = pd.DataFrame(staff_lookup_list)
staff_lookup_df

Unnamed: 0,Staff__c,EmployeeID__c
0,Gemini,130109
1,Dartanion,160655
2,Sam,184220
3,Sarah,159108


In [68]:
query = '''
    SELECT
        sa.*
        ,s.EmployeeID
    FROM
        staffassignment sa
        INNER JOIN staff s
        ON sa.ID_Staff = s.ID_Staff
'''
staffassignment_data_df = pd.read_sql(query, conn)
staffassignment_data_df.head()

Unnamed: 0,ID_StaffAssignment,ID_Staff,ID_Class,Role,StartDate,EndDate,EmployeeID
0,1,1,1,Teacher Assistant,2020-03-16,,184220
1,2,4,1,Teacher Assistant,2020-03-16,,159108
2,3,3,1,Instructor,2020-03-16,,160655


In [70]:
staffassignment_data_df.rename(columns={
    'Role':'Role__c',
    'StartDate':'Start_Date__c',
    'EndDate':'End_Date__c',
    'EmployeeID':'EmployeeID__c'
    
}, inplace=True)

staffassignment_data_df

Unnamed: 0,ID_StaffAssignment,ID_Staff,ID_Class,Role__c,Start_Date__c,End_Date__c,EmployeeID__c
0,1,1,1,Teacher Assistant,2020-03-16,,184220
1,2,4,1,Teacher Assistant,2020-03-16,,159108
2,3,3,1,Instructor,2020-03-16,,160655


In [71]:
staffassignment_data_df = pd.merge(staffassignment_data_df, staff_lookup_df, how='left')
#class_data_df.drop(columns = ['ID_Class','ID_Course','CourseName','CreditHours','BootCampCourse','Course_Code__c'], inplace=True)

staffassignment_data_df.head()

Unnamed: 0,ID_StaffAssignment,ID_Staff,ID_Class,Role__c,Start_Date__c,End_Date__c,EmployeeID__c,Staff__c
0,1,1,1,Teacher Assistant,2020-03-16,,184220,
1,2,4,1,Teacher Assistant,2020-03-16,,159108,
2,3,3,1,Instructor,2020-03-16,,160655,


In [72]:
staffassignment_data_df
staffassignment_data_df['Start_Date__c'] = pd.to_datetime(staffassignment_data_df['Start_Date__c']).dt.date
staffassignment_data_df['End_Date__c'] = pd.to_datetime(staffassignment_data_df['End_Date__c']).dt.date

staffassignment_data_df.head()

Unnamed: 0,ID_StaffAssignment,ID_Staff,ID_Class,Role__c,Start_Date__c,End_Date__c,EmployeeID__c,Staff__c
0,1,1,1,Teacher Assistant,2020-03-16,NaT,184220,
1,2,4,1,Teacher Assistant,2020-03-16,NaT,159108,
2,3,3,1,Instructor,2020-03-16,NaT,160655,


In [73]:
staffassignment_data_records = staffassignment_data_df.to_dict(orient='records')
staffassignment_data_records

[{'ID_StaffAssignment': 1,
  'ID_Staff': 1,
  'ID_Class': 1,
  'Role__c': 'Teacher Assistant',
  'Start_Date__c': datetime.date(2020, 3, 16),
  'End_Date__c': NaT,
  'EmployeeID__c': '000184220',
  'Staff__c': nan},
 {'ID_StaffAssignment': 2,
  'ID_Staff': 4,
  'ID_Class': 1,
  'Role__c': 'Teacher Assistant',
  'Start_Date__c': datetime.date(2020, 3, 16),
  'End_Date__c': NaT,
  'EmployeeID__c': '000159108',
  'Staff__c': nan},
 {'ID_StaffAssignment': 3,
  'ID_Staff': 3,
  'ID_Class': 1,
  'Role__c': 'Instructor',
  'Start_Date__c': datetime.date(2020, 3, 16),
  'End_Date__c': NaT,
  'EmployeeID__c': '000160655',
  'Staff__c': nan}]

In [76]:
for rec in staffassignment_data_records:

    record = {
        'EmployeeID__c': row['EmployeeID__c'],
        'Role__c':row['Role_c'],
        'StartDate__c':row['Start_Date__c']

    }
    
    try:
        sf.StaffAssignment__c.create(record)
    except Exception as e:
        print(e)

KeyError: 'Role_c'

In [77]:
for rec in staffassignment_data_records:
 
    record = {
        
        'Role__c': rec['Role__c'],
        'class_ID__c': rec['class_ID__c'],
        'Start_Date__c': str(rec['StartDate__c']),
        'ID_Staff__c':rec['ID_Staff__c']
    }
    
    try:
        sf.StaffAssignment__c.create(record)
    except Exception as e:
        print(e)

KeyError: 'class_ID__c'