In [None]:
#!pip install simple_salesforce

In [1]:
import json
from sqlalchemy import create_engine
from datetime import date
import pandas as pd
import pymysql
pymysql.install_as_MySQLdb()

In [3]:
# Make sure to use your own `config.py` file. Consider ensuring that these variable names are in sync
from config import sf_username, sf_password, sf_security_token
from config import remote_db_endpoint, remote_db_port
from config import remote_db_name, remote_db_user, remote_db_pwd

In [4]:
from simple_salesforce import Salesforce
sf = Salesforce(username=sf_username, password=sf_password, security_token=sf_security_token)

In [5]:
engine = create_engine(f"mysql://{remote_db_user}:{remote_db_pwd}@{remote_db_endpoint}:{remote_db_port}/{remote_db_name}")
conn = engine.connect()

## Prepare ETL for the Student data

In [6]:
student_data_df = pd.read_sql("SELECT * FROM student", conn)
student_data_df

Unnamed: 0,ID_Student,StudentID,LastName,FirstName,MiddleName,BirthDate,Gender
0,33,25004961,Moore,Heather,Alice,,F
1,34,25003514,Multak,Ilana,Cecille,,F
2,35,25005833,Murillo,Jessica,Dorothy,,F
3,36,25002589,Romanowski,Kandra,Genevieve,,F
4,37,25007185,Hoffer,Katherine,Lynnette,,F
5,38,25006014,Poocharoen,Pariya,Mariette,,F
6,39,25007528,Mantrala,Sriharitha,Patty,,F
7,40,25003778,Kelly,Carly,Raelene,,F
8,41,25003605,Sraha,Clementine,Kendall,,F
9,42,25002056,Abdulrahim,Jawaher,Angela,,F


In [6]:
student_data_df.rename(columns={
    'StudentID':'StudentID__c',
    'LastName':'LastName__c',
    'FirstName':'FirstName__c',
    'MiddleName':'MiddleName__c',
    'BirthDate':'BirthDate__c',
    'Gender':'Gender__c'
}, inplace=True)
student_data_df.head()

Unnamed: 0,ID_Student,StudentID__c,LastName__c,FirstName__c,MiddleName__c,BirthDate__c,Gender__c
0,33,25004961,Moore,Heather,Alice,,F
1,34,25003514,Multak,Ilana,Cecille,,F
2,35,25005833,Murillo,Jessica,Dorothy,,F
3,36,25002589,Romanowski,Kandra,Genevieve,,F
4,37,25007185,Hoffer,Katherine,Lynnette,,F


In [7]:
student_data_df = student_data_df[['StudentID__c', 'LastName__c', 'FirstName__c', 'MiddleName__c','BirthDate__c','Gender__c']]
student_data_df.dtypes

StudentID__c     object
LastName__c      object
FirstName__c     object
MiddleName__c    object
BirthDate__c     object
Gender__c        object
dtype: object

In [9]:
student_data_records = student_data_df.to_dict('records')
student_data_records

[{'StudentID__c': '25004961',
  'LastName__c': 'Moore',
  'FirstName__c': 'Heather',
  'MiddleName__c': 'Alice',
  'BirthDate__c': None,
  'Gender__c': 'F'},
 {'StudentID__c': '25003514',
  'LastName__c': 'Multak',
  'FirstName__c': 'Ilana',
  'MiddleName__c': 'Cecille',
  'BirthDate__c': None,
  'Gender__c': 'F'},
 {'StudentID__c': '25005833',
  'LastName__c': 'Murillo',
  'FirstName__c': 'Jessica',
  'MiddleName__c': 'Dorothy',
  'BirthDate__c': None,
  'Gender__c': 'F'},
 {'StudentID__c': '25002589',
  'LastName__c': 'Romanowski',
  'FirstName__c': 'Kandra',
  'MiddleName__c': 'Genevieve',
  'BirthDate__c': None,
  'Gender__c': 'F'},
 {'StudentID__c': '25007185',
  'LastName__c': 'Hoffer',
  'FirstName__c': 'Katherine',
  'MiddleName__c': 'Lynnette',
  'BirthDate__c': None,
  'Gender__c': 'F'},
 {'StudentID__c': '25006014',
  'LastName__c': 'Poocharoen',
  'FirstName__c': 'Pariya',
  'MiddleName__c': 'Mariette',
  'BirthDate__c': None,
  'Gender__c': 'F'},
 {'StudentID__c': '2500752

In [10]:
for rec in student_data_records:

    record = {
        'StudentID__c': rec['StudentID__c'],
        'LastName__c': rec['LastName__c'],
        'FirstName__c': rec['FirstName__c'],
        'MiddleName__c': rec['MiddleName__c'],
        'Gender__c': rec['Gender__c'],
    }
    
    try:
        sf.Student__c.create(record)
    except Exception as e:
        print(e)

## Create student Lookup Table


In [7]:
student_lookup_list = []

# The `Name` column in the primary key in Salesforce objects
data = sf.query_all_iter("SELECT StudentID__c, Name FROM Student__c")
for row in data:
    rec = {
        'ID_Student': row['Name'],
        'StudentID__c': row['StudentID__c']
    }
    student_lookup_list.append(rec)

In [8]:
student_lookup_df = pd.DataFrame(student_lookup_list)
student_lookup_df

Unnamed: 0,ID_Student,StudentID__c
0,a0I3h000001DfRQ,25002056
1,a0I3h000001DfRp,25006805
2,a0I3h000001DfSs,25007333
3,a0I3h000001DfSn,25006027
4,a0I3h000001DfRk,25005602
5,a0I3h000001DfRL,25003605
6,a0I3h000001DfRM,25002876
7,a0I3h000001DfS4,25005284
8,a0I3h000001DfQw,25002589
9,a0I3h000001DfR1,25007185


## Create student Lookup Table

In [13]:
class_lookup_list = []

# The `Name` column in the primary key in Salesforce objects
data = sf.query_all_iter("SELECT Section__c, Name FROM class__c")
for row in data:
    rec = {
        'class ID': row['Name'],
        'Section__c': row['Section__c']
    }
    class_lookup_list.append(rec)

In [14]:
class_lookup_df = pd.DataFrame(class_lookup_list)
class_lookup_df

Unnamed: 0,class ID,Section__c
0,a083h0000010ByB,GWDC201805DATA3
1,a083h0000010ByL,GWARL201905UIUX3
2,a083h0000010By6,GWU-ARL-DATA-PT-09-0
3,a083h0000010ByG,GWARL201905WEB3


## Join SQL

In [15]:
query = '''
    SELECT
        cp.*
        ,c.Section
        ,s.StudentID
    FROM
        classparticipant cp
        INNER JOIN class c
        ON c.ID_class  = cp.ID_class
        INNER Join student s 
        on s.id_student = cp.id_student
'''
classparticipant_data_df = pd.read_sql(query, conn)
classparticipant_data_df

Unnamed: 0,ID_ClassParticipant,ID_Student,ID_Class,StartDate,EndDate,Section,StudentID
0,1,33,1,2020-03-16,,GWU-ARL-DATA-PT-09-0,25004961
1,2,34,1,2020-03-16,,GWU-ARL-DATA-PT-09-0,25003514
2,3,35,1,2020-03-16,,GWU-ARL-DATA-PT-09-0,25005833
3,4,62,1,2020-03-16,,GWU-ARL-DATA-PT-09-0,25007334
4,5,36,1,2020-03-16,,GWU-ARL-DATA-PT-09-0,25002589
5,6,37,1,2020-03-16,,GWU-ARL-DATA-PT-09-0,25007185
6,7,38,1,2020-03-16,,GWU-ARL-DATA-PT-09-0,25006014
7,8,39,1,2020-03-16,,GWU-ARL-DATA-PT-09-0,25007528
8,9,40,1,2020-03-16,,GWU-ARL-DATA-PT-09-0,25003778
9,10,41,1,2020-03-16,,GWU-ARL-DATA-PT-09-0,25003605


In [16]:
classparticipant_data_df.rename(columns={
    'StartDate':'StartDate__c',
    'EndDate':'Enddate__c',
    'Section': 'Section__c',
    'StudentID':'StudentID__c'
}, inplace=True)

classparticipant_data_df

Unnamed: 0,ID_ClassParticipant,ID_Student,ID_Class,StartDate__c,Enddate__c,Section__c,StudentID__c
0,1,33,1,2020-03-16,,GWU-ARL-DATA-PT-09-0,25004961
1,2,34,1,2020-03-16,,GWU-ARL-DATA-PT-09-0,25003514
2,3,35,1,2020-03-16,,GWU-ARL-DATA-PT-09-0,25005833
3,4,62,1,2020-03-16,,GWU-ARL-DATA-PT-09-0,25007334
4,5,36,1,2020-03-16,,GWU-ARL-DATA-PT-09-0,25002589
5,6,37,1,2020-03-16,,GWU-ARL-DATA-PT-09-0,25007185
6,7,38,1,2020-03-16,,GWU-ARL-DATA-PT-09-0,25006014
7,8,39,1,2020-03-16,,GWU-ARL-DATA-PT-09-0,25007528
8,9,40,1,2020-03-16,,GWU-ARL-DATA-PT-09-0,25003778
9,10,41,1,2020-03-16,,GWU-ARL-DATA-PT-09-0,25003605


## Join the Class DataFrame with the Course lookup table
This join is necessary to successfully lookup the foreign key for the Course table 

In [17]:
class_lookup_df

Unnamed: 0,class ID,Section__c
0,a083h0000010ByB,GWDC201805DATA3
1,a083h0000010ByL,GWARL201905UIUX3
2,a083h0000010By6,GWU-ARL-DATA-PT-09-0
3,a083h0000010ByG,GWARL201905WEB3


In [18]:
student_lookup_df

Unnamed: 0,ID_Student,StudentID__c
0,a0I3h000001DfRQ,25002056
1,a0I3h000001DfRp,25006805
2,a0I3h000001DfSs,25007333
3,a0I3h000001DfSn,25006027
4,a0I3h000001DfRk,25005602
5,a0I3h000001DfRL,25003605
6,a0I3h000001DfRM,25002876
7,a0I3h000001DfS4,25005284
8,a0I3h000001DfQw,25002589
9,a0I3h000001DfR1,25007185


In [None]:
classparticipant_data_df

In [19]:
class_p_df = pd.merge(classparticipant_data_df, class_lookup_df, how='left')
#class_data_df.drop(columns = ['ID_Class','ID_Course','Course__c','Section__c','StartDate__c','Enddate__c','coursecode__c'], inplace=True)

class_p_df

Unnamed: 0,ID_ClassParticipant,ID_Student,ID_Class,StartDate__c,Enddate__c,Section__c,StudentID__c,class ID
0,1,33,1,2020-03-16,,GWU-ARL-DATA-PT-09-0,25004961,a083h0000010By6
1,2,34,1,2020-03-16,,GWU-ARL-DATA-PT-09-0,25003514,a083h0000010By6
2,3,35,1,2020-03-16,,GWU-ARL-DATA-PT-09-0,25005833,a083h0000010By6
3,4,62,1,2020-03-16,,GWU-ARL-DATA-PT-09-0,25007334,a083h0000010By6
4,5,36,1,2020-03-16,,GWU-ARL-DATA-PT-09-0,25002589,a083h0000010By6
5,6,37,1,2020-03-16,,GWU-ARL-DATA-PT-09-0,25007185,a083h0000010By6
6,7,38,1,2020-03-16,,GWU-ARL-DATA-PT-09-0,25006014,a083h0000010By6
7,8,39,1,2020-03-16,,GWU-ARL-DATA-PT-09-0,25007528,a083h0000010By6
8,9,40,1,2020-03-16,,GWU-ARL-DATA-PT-09-0,25003778,a083h0000010By6
9,10,41,1,2020-03-16,,GWU-ARL-DATA-PT-09-0,25003605,a083h0000010By6


In [21]:
student_lookup_df.rename(columns={
    'ID_Student':'ID_Student2'
}, inplace=True)
student_lookup_df

Unnamed: 0,ID_Student2,StudentID__c
0,a0I3h000001DfRQ,25002056
1,a0I3h000001DfRp,25006805
2,a0I3h000001DfSs,25007333
3,a0I3h000001DfSn,25006027
4,a0I3h000001DfRk,25005602
5,a0I3h000001DfRL,25003605
6,a0I3h000001DfRM,25002876
7,a0I3h000001DfS4,25005284
8,a0I3h000001DfQw,25002589
9,a0I3h000001DfR1,25007185


In [22]:
class_p_final_df = pd.merge(class_p_df, student_lookup_df, how='left')
class_p_final_df

Unnamed: 0,ID_ClassParticipant,ID_Student,ID_Class,StartDate__c,Enddate__c,Section__c,StudentID__c,class ID,ID_Student2
0,1,33,1,2020-03-16,,GWU-ARL-DATA-PT-09-0,25004961,a083h0000010By6,a0I3h000001DfQh
1,2,34,1,2020-03-16,,GWU-ARL-DATA-PT-09-0,25003514,a083h0000010By6,a0I3h000001DfQm
2,3,35,1,2020-03-16,,GWU-ARL-DATA-PT-09-0,25005833,a083h0000010By6,a0I3h000001DfQr
3,4,62,1,2020-03-16,,GWU-ARL-DATA-PT-09-0,25007334,a083h0000010By6,a0I3h000001DfRW
4,5,36,1,2020-03-16,,GWU-ARL-DATA-PT-09-0,25002589,a083h0000010By6,a0I3h000001DfQw
5,6,37,1,2020-03-16,,GWU-ARL-DATA-PT-09-0,25007185,a083h0000010By6,a0I3h000001DfR1
6,7,38,1,2020-03-16,,GWU-ARL-DATA-PT-09-0,25006014,a083h0000010By6,a0I3h000001DfR6
7,8,39,1,2020-03-16,,GWU-ARL-DATA-PT-09-0,25007528,a083h0000010By6,a0I3h000001DfRB
8,9,40,1,2020-03-16,,GWU-ARL-DATA-PT-09-0,25003778,a083h0000010By6,a0I3h000001DfRG
9,10,41,1,2020-03-16,,GWU-ARL-DATA-PT-09-0,25003605,a083h0000010By6,a0I3h000001DfRL


In [24]:
class_p_final_df.drop(columns = ['ID_ClassParticipant','ID_Student','ID_Class','Section__c','StudentID__c'], inplace=True)

class_p_final_df

Unnamed: 0,StartDate__c,Enddate__c,class ID,ID_Student2
0,2020-03-16,,a083h0000010By6,a0I3h000001DfQh
1,2020-03-16,,a083h0000010By6,a0I3h000001DfQm
2,2020-03-16,,a083h0000010By6,a0I3h000001DfQr
3,2020-03-16,,a083h0000010By6,a0I3h000001DfRW
4,2020-03-16,,a083h0000010By6,a0I3h000001DfQw
5,2020-03-16,,a083h0000010By6,a0I3h000001DfR1
6,2020-03-16,,a083h0000010By6,a0I3h000001DfR6
7,2020-03-16,,a083h0000010By6,a0I3h000001DfRB
8,2020-03-16,,a083h0000010By6,a0I3h000001DfRG
9,2020-03-16,,a083h0000010By6,a0I3h000001DfRL


In [25]:
class_p_final_df.rename(columns={
    'ID_Student2':'ID_Student'
}, inplace=True)
class_p_final_df

Unnamed: 0,StartDate__c,Enddate__c,class ID,ID_Student
0,2020-03-16,,a083h0000010By6,a0I3h000001DfQh
1,2020-03-16,,a083h0000010By6,a0I3h000001DfQm
2,2020-03-16,,a083h0000010By6,a0I3h000001DfQr
3,2020-03-16,,a083h0000010By6,a0I3h000001DfRW
4,2020-03-16,,a083h0000010By6,a0I3h000001DfQw
5,2020-03-16,,a083h0000010By6,a0I3h000001DfR1
6,2020-03-16,,a083h0000010By6,a0I3h000001DfR6
7,2020-03-16,,a083h0000010By6,a0I3h000001DfRB
8,2020-03-16,,a083h0000010By6,a0I3h000001DfRG
9,2020-03-16,,a083h0000010By6,a0I3h000001DfRL


In [26]:
class_p_final_df
class_p_final_df['StartDate__c'] = pd.to_datetime(class_p_final_df['StartDate__c']).dt.date
class_p_final_df['Enddate__c'] = pd.to_datetime(class_p_final_df['Enddate__c']).dt.date

class_p_final_df.head()


Unnamed: 0,StartDate__c,Enddate__c,class ID,ID_Student
0,2020-03-16,NaT,a083h0000010By6,a0I3h000001DfQh
1,2020-03-16,NaT,a083h0000010By6,a0I3h000001DfQm
2,2020-03-16,NaT,a083h0000010By6,a0I3h000001DfQr
3,2020-03-16,NaT,a083h0000010By6,a0I3h000001DfRW
4,2020-03-16,NaT,a083h0000010By6,a0I3h000001DfQw


In [28]:
class_p_final_df=class_p_final_df.rename(columns={
    'class ID':'class_ID__c',
    'ID_Student':'ID_Student__c'
    })
class_p_final_df

Unnamed: 0,StartDate__c,Enddate__c,class_ID__c,ID_Student__c
0,2020-03-16,NaT,a083h0000010By6,a0I3h000001DfQh
1,2020-03-16,NaT,a083h0000010By6,a0I3h000001DfQm
2,2020-03-16,NaT,a083h0000010By6,a0I3h000001DfQr
3,2020-03-16,NaT,a083h0000010By6,a0I3h000001DfRW
4,2020-03-16,NaT,a083h0000010By6,a0I3h000001DfQw
5,2020-03-16,NaT,a083h0000010By6,a0I3h000001DfR1
6,2020-03-16,NaT,a083h0000010By6,a0I3h000001DfR6
7,2020-03-16,NaT,a083h0000010By6,a0I3h000001DfRB
8,2020-03-16,NaT,a083h0000010By6,a0I3h000001DfRG
9,2020-03-16,NaT,a083h0000010By6,a0I3h000001DfRL


In [29]:
class_p_records = class_p_final_df.to_dict(orient='records')
class_p_records

[{'StartDate__c': datetime.date(2020, 3, 16),
  'Enddate__c': NaT,
  'class_ID__c': 'a083h0000010By6',
  'ID_Student__c': 'a0I3h000001DfQh'},
 {'StartDate__c': datetime.date(2020, 3, 16),
  'Enddate__c': NaT,
  'class_ID__c': 'a083h0000010By6',
  'ID_Student__c': 'a0I3h000001DfQm'},
 {'StartDate__c': datetime.date(2020, 3, 16),
  'Enddate__c': NaT,
  'class_ID__c': 'a083h0000010By6',
  'ID_Student__c': 'a0I3h000001DfQr'},
 {'StartDate__c': datetime.date(2020, 3, 16),
  'Enddate__c': NaT,
  'class_ID__c': 'a083h0000010By6',
  'ID_Student__c': 'a0I3h000001DfRW'},
 {'StartDate__c': datetime.date(2020, 3, 16),
  'Enddate__c': NaT,
  'class_ID__c': 'a083h0000010By6',
  'ID_Student__c': 'a0I3h000001DfQw'},
 {'StartDate__c': datetime.date(2020, 3, 16),
  'Enddate__c': NaT,
  'class_ID__c': 'a083h0000010By6',
  'ID_Student__c': 'a0I3h000001DfR1'},
 {'StartDate__c': datetime.date(2020, 3, 16),
  'Enddate__c': NaT,
  'class_ID__c': 'a083h0000010By6',
  'ID_Student__c': 'a0I3h000001DfR6'},
 {'Sta

## Insert `Staff Assignment` Records into Salesforce

In [30]:
for rec in class_p_records:
 
    record = {
        'ID_Student__c': rec['ID_Student__c'],
        'class_ID__c': rec['class_ID__c'],
        'StartDate__c': str(rec['StartDate__c'])
    }
    
    try:
        sf.classparticipant__c.create(record)
    except Exception as e:
        print(e)