In [None]:
#!pip install simple_salesforce

In [46]:
import json
from sqlalchemy import create_engine
from datetime import date
import pandas as pd
import pymysql
pymysql.install_as_MySQLdb()

In [47]:
# Make sure to use your own `config.py` file. Consider ensuring that these variable names are in sync
from config import sf_username, sf_password, sf_security_token
from config import remote_db_endpoint, remote_db_port
from config import remote_db_name, remote_db_user, remote_db_pwd

In [48]:
from simple_salesforce import Salesforce
sf = Salesforce(username=sf_username, password=sf_password, security_token=sf_security_token)

In [49]:
engine = create_engine(f"mysql://{remote_db_user}:{remote_db_pwd}@{remote_db_endpoint}:{remote_db_port}/{remote_db_name}")
conn = engine.connect()

## Prepare ETL for the Course data

In [50]:
course_data_df = pd.read_sql("SELECT * FROM course", conn)
course_data_df.head(30)

Unnamed: 0,ID_Course,CourseCode,CourseName,CreditHours,BootCampCourse
0,1,BC-DATAVIZ,Data Visualization and Analytics,12,1
1,2,BC-WEBDEV,Full Stack Web Development,12,1
2,3,BC-UIUX,User Interface/User Experience,12,1
3,4,CIS-349,Introduction to Databases,5,0
4,5,CIS-405,Database Programming,5,0
5,6,CIS-438,Database Administration,5,0
6,7,CIS-430,Business Systems Programming I,5,0
7,8,CIS-435,Business Systems Programming II,5,0


In [51]:
course_data_df.rename(columns={
    'CourseCode':'coursecode__c',
    'CourseName':'CourseName__c',
    'CreditHours':'CreditHours__c',
    'BootCampCourse':'BootCampCourse__c',    
}, inplace=True)
course_data_df.head()

Unnamed: 0,ID_Course,coursecode__c,CourseName__c,CreditHours__c,BootCampCourse__c
0,1,BC-DATAVIZ,Data Visualization and Analytics,12,1
1,2,BC-WEBDEV,Full Stack Web Development,12,1
2,3,BC-UIUX,User Interface/User Experience,12,1
3,4,CIS-349,Introduction to Databases,5,0
4,5,CIS-405,Database Programming,5,0


In [52]:
course_data_df = course_data_df[['coursecode__c', 'CourseName__c', 'CreditHours__c', 'BootCampCourse__c']]
course_data_df.head()

Unnamed: 0,coursecode__c,CourseName__c,CreditHours__c,BootCampCourse__c
0,BC-DATAVIZ,Data Visualization and Analytics,12,1
1,BC-WEBDEV,Full Stack Web Development,12,1
2,BC-UIUX,User Interface/User Experience,12,1
3,CIS-349,Introduction to Databases,5,0
4,CIS-405,Database Programming,5,0


In [53]:
course_data_records = course_data_df.to_dict('records')
course_data_records

[{'coursecode__c': 'BC-DATAVIZ',
  'CourseName__c': 'Data Visualization and Analytics',
  'CreditHours__c': 12,
  'BootCampCourse__c': 1},
 {'coursecode__c': 'BC-WEBDEV',
  'CourseName__c': 'Full Stack Web Development',
  'CreditHours__c': 12,
  'BootCampCourse__c': 1},
 {'coursecode__c': 'BC-UIUX',
  'CourseName__c': 'User Interface/User Experience',
  'CreditHours__c': 12,
  'BootCampCourse__c': 1},
 {'coursecode__c': 'CIS-349',
  'CourseName__c': 'Introduction to Databases',
  'CreditHours__c': 5,
  'BootCampCourse__c': 0},
 {'coursecode__c': 'CIS-405',
  'CourseName__c': 'Database Programming',
  'CreditHours__c': 5,
  'BootCampCourse__c': 0},
 {'coursecode__c': 'CIS-438',
  'CourseName__c': 'Database Administration',
  'CreditHours__c': 5,
  'BootCampCourse__c': 0},
 {'coursecode__c': 'CIS-430',
  'CourseName__c': 'Business Systems Programming I',
  'CreditHours__c': 5,
  'BootCampCourse__c': 0},
 {'coursecode__c': 'CIS-435',
  'CourseName__c': 'Business Systems Programming II',
 

In [54]:
for rec in course_data_records:

    record = {
        'coursecode__c': rec['coursecode__c'],
        'CourseName__c': rec['CourseName__c'],
        'CreditHours__c': rec['CreditHours__c'],
        'BootCampCourse__c': rec['BootCampCourse__c'],
    }
    
    try:
        sf.Course__C.create(record)
    except Exception as e:
        print(e)
        

## Create Course Lookup Table
You will use this later to crosswalk the course code with the primary key from the `Course` table

In [69]:
course_lookup_list = []

# The `Name` column in the primary key in Salesforce objects
data = sf.query_all_iter("SELECT coursecode__c, Name FROM Course__c")
for row in data:
    rec = {
        'Course__c': row['Name'],
        'coursecode__c': row['coursecode__c']
    }
    course_lookup_list.append(rec)

In [70]:
course_lookup_df = pd.DataFrame(course_lookup_list)
course_lookup_df

Unnamed: 0,Course__c,coursecode__c
0,a043h00000MRm32,CIS-349
1,a043h00000MRm3C,CIS-438
2,a043h00000MRm3H,CIS-430
3,a043h00000MRm3M,CIS-435
4,a043h00000MRm3R,BC-DATAVIZ
5,a043h00000MRm3W,BC-WEBDEV
6,a043h00000MRm3b,BC-UIUX
7,a043h00000MRm37,CIS-405


In [71]:
query = '''
    SELECT
        c.*
        ,co.CourseCode
    FROM
        class c
        INNER JOIN course co
        ON c.ID_Course = co.ID_Course
'''
class_data_df = pd.read_sql(query, conn)
class_data_df.head()

Unnamed: 0,ID_Class,ID_Course,Section,StartDate,EndDate,CourseCode
0,1,1,GWU-ARL-DATA-PT-09-0,2020-03-16,2020-03-14,BC-DATAVIZ
1,2,1,GWDC201805DATA3,2018-05-15,2018-11-08,BC-DATAVIZ
2,3,2,GWARL201905WEB3,2019-05-14,2019-11-07,BC-WEBDEV
3,4,3,GWARL201905UIUX3,2019-05-14,2019-11-07,BC-UIUX


In [72]:
class_data_df.rename(columns={
    'Section':'Section__c',
    'StartDate':'StartDate__c',
    'EndDate':'Enddate__c',
    'CourseCode':'coursecode__c'
}, inplace=True)

class_data_df

Unnamed: 0,ID_Class,ID_Course,Section__c,StartDate__c,Enddate__c,coursecode__c
0,1,1,GWU-ARL-DATA-PT-09-0,2020-03-16,2020-03-14,BC-DATAVIZ
1,2,1,GWDC201805DATA3,2018-05-15,2018-11-08,BC-DATAVIZ
2,3,2,GWARL201905WEB3,2019-05-14,2019-11-07,BC-WEBDEV
3,4,3,GWARL201905UIUX3,2019-05-14,2019-11-07,BC-UIUX


## Join the Class DataFrame with the Course lookup table
This join is necessary to successfully lookup the foreign key for the Course table 

In [73]:
class_data_df

Unnamed: 0,ID_Class,ID_Course,Section__c,StartDate__c,Enddate__c,coursecode__c
0,1,1,GWU-ARL-DATA-PT-09-0,2020-03-16,2020-03-14,BC-DATAVIZ
1,2,1,GWDC201805DATA3,2018-05-15,2018-11-08,BC-DATAVIZ
2,3,2,GWARL201905WEB3,2019-05-14,2019-11-07,BC-WEBDEV
3,4,3,GWARL201905UIUX3,2019-05-14,2019-11-07,BC-UIUX


In [74]:
course_lookup_df

Unnamed: 0,Course__c,coursecode__c
0,a043h00000MRm32,CIS-349
1,a043h00000MRm3C,CIS-438
2,a043h00000MRm3H,CIS-430
3,a043h00000MRm3M,CIS-435
4,a043h00000MRm3R,BC-DATAVIZ
5,a043h00000MRm3W,BC-WEBDEV
6,a043h00000MRm3b,BC-UIUX
7,a043h00000MRm37,CIS-405


In [75]:
class_data_df = pd.merge(class_data_df, course_lookup_df, how='left')
#class_data_df.drop(columns = ['ID_Class','ID_Course','Course__c','Section__c','StartDate__c','Enddate__c','coursecode__c'], inplace=True)

class_data_df

Unnamed: 0,ID_Class,ID_Course,Section__c,StartDate__c,Enddate__c,coursecode__c,Course__c
0,1,1,GWU-ARL-DATA-PT-09-0,2020-03-16,2020-03-14,BC-DATAVIZ,a043h00000MRm3R
1,2,1,GWDC201805DATA3,2018-05-15,2018-11-08,BC-DATAVIZ,a043h00000MRm3R
2,3,2,GWARL201905WEB3,2019-05-14,2019-11-07,BC-WEBDEV,a043h00000MRm3W
3,4,3,GWARL201905UIUX3,2019-05-14,2019-11-07,BC-UIUX,a043h00000MRm3b


In [88]:
class_data_df
class_data_df['StartDate__c'] = pd.to_datetime(class_data_df['StartDate__c']).dt.date
class_data_df['Enddate__c'] = pd.to_datetime(class_data_df['Enddate__c']).dt.date

class_data_df.head()


TypeError: descriptor 'strftime' of 'datetime.date' object needs an argument

In [83]:
class_data_df=class_data_df.rename(columns={
    'Course__c':'Course_ID__c'})
class_data_df

Unnamed: 0,ID_Class,ID_Course,Section__c,StartDate__c,Enddate__c,coursecode__c,Course_ID__c
0,1,1,GWU-ARL-DATA-PT-09-0,2020-03-16,2020-03-14,BC-DATAVIZ,a043h00000MRm3R
1,2,1,GWDC201805DATA3,2018-05-15,2018-11-08,BC-DATAVIZ,a043h00000MRm3R
2,3,2,GWARL201905WEB3,2019-05-14,2019-11-07,BC-WEBDEV,a043h00000MRm3W
3,4,3,GWARL201905UIUX3,2019-05-14,2019-11-07,BC-UIUX,a043h00000MRm3b


In [84]:
class_data_df = class_data_df[['Section__c','StartDate__c','Enddate__c','Course_ID__c']]
class_data_df

Unnamed: 0,Section__c,StartDate__c,Enddate__c,Course_ID__c
0,GWU-ARL-DATA-PT-09-0,2020-03-16,2020-03-14,a043h00000MRm3R
1,GWDC201805DATA3,2018-05-15,2018-11-08,a043h00000MRm3R
2,GWARL201905WEB3,2019-05-14,2019-11-07,a043h00000MRm3W
3,GWARL201905UIUX3,2019-05-14,2019-11-07,a043h00000MRm3b


In [85]:
class_data_records = class_data_df.to_dict(orient='records')
class_data_records

[{'Section__c': 'GWU-ARL-DATA-PT-09-0',
  'StartDate__c': datetime.date(2020, 3, 16),
  'Enddate__c': datetime.date(2020, 3, 14),
  'Course_ID__c': 'a043h00000MRm3R'},
 {'Section__c': 'GWDC201805DATA3',
  'StartDate__c': datetime.date(2018, 5, 15),
  'Enddate__c': datetime.date(2018, 11, 8),
  'Course_ID__c': 'a043h00000MRm3R'},
 {'Section__c': 'GWARL201905WEB3',
  'StartDate__c': datetime.date(2019, 5, 14),
  'Enddate__c': datetime.date(2019, 11, 7),
  'Course_ID__c': 'a043h00000MRm3W'},
 {'Section__c': 'GWARL201905UIUX3',
  'StartDate__c': datetime.date(2019, 5, 14),
  'Enddate__c': datetime.date(2019, 11, 7),
  'Course_ID__c': 'a043h00000MRm3b'}]

## Insert `Class` Records into Salesforce

In [86]:
for rec in class_data_records:
 
    record = {
        'Course_ID__c': rec['Course_ID__c'],
        'Section__c': rec['Section__c'],
        'StartDate__c': str(rec['StartDate__c']),
        'Enddate__c': str(rec['Enddate__c'])
    }
    
    try:
        sf.Class__C.create(record)
    except Exception as e:
        print(e)

## Example of Deleting Records

Select the IDs of the records first and then process the results.

Ultimately, you want a list of IDs in the end.


In [None]:
class_records = sf.query("SELECT Id FROM Course__c")
recs_to_delete = [{'Id': r['Id']} for r in class_records['records']]
recs_to_delete

In [None]:
sf.bulk.Course__c.delete(recs_to_delete)