In [81]:
import pandas as pd
import numpy as np

def loader(filename, columns = None):
    filepath = 'Data/{}.csv'
    df = pd.read_csv(filepath.format(filename))
    df = df[columns] if columns != None else df
    return df

def merger(df_left, df_right, on_left, on_right, how = 'inner', drop_keys = True, suffix = ('_left', '_right')):
    merged = df_left.merge(df_right, how = how, left_on = on_left, right_on = on_right, suffixes = suffix)
    if drop_keys: merged.drop(columns=[on_left, on_right], inplace = True) 
    return merged

In [2]:
def get_UsersTeams():
    #Revisar schemas y relationships de tablas 
    meetingGroup = loader('MeetingGroup', ['id','name'])
    userMeetingGroup = loader('UserMeetingGroup', ['userId','meetingGroupId'])
    userMeetingGroup = merger(userMeetingGroup, meetingGroup, 'meetingGroupId','id')

    users = loader('User', ['id', 'firstName', 'middleName'])
    users['firstName'] = users['firstName'].fillna('')
    users['middleName'] = users['middleName'].fillna('')
    users['fullName'] = users['firstName'] + users['middleName']

    userMeetingGroup = merger(users, userMeetingGroup, 'id','userId', how='left', drop_keys= False)
    userMeetingGroup.drop(columns=['firstName', 'middleName', 'userId'], inplace=True)
    userMeetingGroup.rename(columns = {'fullName':'userName', 'name':'Team', 'id':'userId'},inplace=True)
    return userMeetingGroup

In [3]:
coaching = loader('Coaching', ['id','startTime','status','orgId','state','whoId','coachId','type','numberOfStudents','observationLength'])
coaching['startTime'] = pd.to_datetime(coaching.startTime, format='%Y-%m-%d %H:%M:%S')
coaching

Unnamed: 0,id,startTime,status,orgId,state,whoId,coachId,groupId,type,numberOfStudents,observationLength,observationType
0,4162,2022-03-07 20:57:00,delete,1000163,concluded,3807,1259,,observestudentengage,20,900.0,
1,4163,2022-03-07 21:08:56,delete,1000163,concluded,3807,3739,,observestudentengage,20,900.0,
2,4164,2022-03-07 21:09:01,delete,1000163,concluded,3808,3745,,observestudentengage,20,900.0,
3,4165,2022-03-07 21:09:22,delete,1000163,concluded,3779,3395,,observestudentengage,20,900.0,
4,4172,2022-03-08 16:33:21,delete,1000163,concluded,3807,1259,,observestudentengage,20,900.0,
...,...,...,...,...,...,...,...,...,...,...,...,...
209,4634,2022-05-02 13:49:13,active,1000163,concluded,3800,3749,,observestudentengage,13,900.0,
210,4635,2022-05-02 13:59:56,active,1000163,concluded,3756,3748,,observestudentengage,24,900.0,
211,4637,2022-05-02 16:04:14,active,1000163,concluded,3769,3747,,observestudentengage,6,900.0,
212,4638,2022-05-02 17:18:54,active,1000163,concluded,3815,3749,,observestudentengage,13,900.0,


In [4]:
engagement = loader('CoachingEngagement',['id','name','type'])
engagement.rename(columns={'id':'engagementId','name':'engagementName','type':'engagementType'},inplace=True)
engagement 

Unnamed: 0,engagementId,engagementName,engagementType
0,1,Active,engagement
1,2,Passive,engagement
2,3,Off-Task,engagement
3,4,Down Time,engagement
4,5,Listening,learningtask
5,6,Doing/Demonstrating,learningtask
6,7,Reading,learningtask
7,8,Speaking,learningtask
8,9,Writing,learningtask
9,10,,learningtask


In [34]:
coachingData = loader('CoachingData', ['status', 'coachingId', 'engagementId','userId','studentNum'])
#add filter for status
#get observations per observationId
coachingDataStudentNum = coachingData[['coachingId','studentNum']]
coachingDataStudentNum = coachingDataStudentNum.groupby('coachingId').max('studentNum')
coachingDataStudentNum.reset_index(inplace=True)

#get observations per engagement type
coachingDataEngagement = coachingData[['coachingId','engagementId','status']].groupby(['coachingId','engagementId']).count()
coachingDataEngagement.reset_index(inplace=True)
coachingDataEngagement = merger(coachingDataEngagement, engagement, 'engagementId','engagementId')
# coachingDataEngagement = coachingDataEngagement[coachingDataEngagement.engagementType=='engagement'].drop(columns=['engagementType'])
coachingDataEngagement = coachingDataEngagement.pivot(index='coachingId', columns='engagementName',values='status')
coachingDataEngagement.reset_index(inplace=True)
coachingDataEngagement.fillna(0,inplace=True)
coachingDataEngagement

engagementName,coachingId,Active,Doing/Demonstrating,Down Time,Independent Work Session,Listening,No Grouping,None,Off-Task,Partner,Passive,Reading,Small Group,Speaking,Whole Class,Writing
0,4162,2.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2.0,1.0,0.0,0.0,2.0,1.0,0.0
1,4163,7.0,3.0,0.0,0.0,3.0,0.0,1.0,1.0,0.0,4.0,0.0,5.0,4.0,7.0,1.0
2,4164,8.0,0.0,2.0,0.0,5.0,0.0,0.0,3.0,4.0,4.0,0.0,0.0,5.0,13.0,7.0
3,4165,8.0,0.0,0.0,5.0,6.0,0.0,0.0,2.0,4.0,5.0,0.0,0.0,4.0,6.0,5.0
4,4172,3.0,5.0,0.0,0.0,1.0,0.0,0.0,1.0,3.0,3.0,1.0,3.0,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
206,4634,23.0,41.0,1.0,41.0,0.0,0.0,0.0,3.0,0.0,9.0,0.0,0.0,0.0,0.0,0.0
207,4635,31.0,30.0,0.0,30.0,5.0,0.0,0.0,2.0,0.0,2.0,0.0,0.0,0.0,5.0,0.0
208,4637,26.0,3.0,2.0,1.0,5.0,0.0,0.0,6.0,23.0,5.0,7.0,15.0,24.0,0.0,0.0
209,4638,13.0,0.0,8.0,0.0,19.0,0.0,1.0,0.0,0.0,17.0,0.0,0.0,0.0,41.0,21.0


In [35]:
coachingAdded = merger(coaching, coachingDataStudentNum, 'id', 'coachingId', how = 'left', drop_keys=False)
coachingAdded = merger(coachingAdded, coachingDataEngagement, 'id', 'coachingId', how = 'left', drop_keys=False)
coachingAdded.drop(columns= ['coachingId_left', 'coachingId_right'], inplace=True)
coachingAdded['month'] = coachingAdded['startTime'].dt.month
coachingAdded
#studentObsrvation = sum(numberOfStudents) -> sum of students being observed

Unnamed: 0,id,startTime,status,orgId,state,whoId,coachId,groupId,type,numberOfStudents,...,None,Off-Task,Partner,Passive,Reading,Small Group,Speaking,Whole Class,Writing,month
0,4162,2022-03-07 20:57:00,delete,1000163,concluded,3807,1259,,observestudentengage,20,...,0.0,0.0,2.0,1.0,0.0,0.0,2.0,1.0,0.0,3
1,4163,2022-03-07 21:08:56,delete,1000163,concluded,3807,3739,,observestudentengage,20,...,1.0,1.0,0.0,4.0,0.0,5.0,4.0,7.0,1.0,3
2,4164,2022-03-07 21:09:01,delete,1000163,concluded,3808,3745,,observestudentengage,20,...,0.0,3.0,4.0,4.0,0.0,0.0,5.0,13.0,7.0,3
3,4165,2022-03-07 21:09:22,delete,1000163,concluded,3779,3395,,observestudentengage,20,...,0.0,2.0,4.0,5.0,0.0,0.0,4.0,6.0,5.0,3
4,4172,2022-03-08 16:33:21,delete,1000163,concluded,3807,1259,,observestudentengage,20,...,0.0,1.0,3.0,3.0,1.0,3.0,0.0,1.0,0.0,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
209,4634,2022-05-02 13:49:13,active,1000163,concluded,3800,3749,,observestudentengage,13,...,0.0,3.0,0.0,9.0,0.0,0.0,0.0,0.0,0.0,5
210,4635,2022-05-02 13:59:56,active,1000163,concluded,3756,3748,,observestudentengage,24,...,0.0,2.0,0.0,2.0,0.0,0.0,0.0,5.0,0.0,5
211,4637,2022-05-02 16:04:14,active,1000163,concluded,3769,3747,,observestudentengage,6,...,0.0,6.0,23.0,5.0,7.0,15.0,24.0,0.0,0.0,5
212,4638,2022-05-02 17:18:54,active,1000163,concluded,3815,3749,,observestudentengage,13,...,1.0,0.0,0.0,17.0,0.0,0.0,0.0,41.0,21.0,5


In [15]:
coachingAdded.to_csv('Output/data.csv')

In [37]:
test = coachingAdded[coachingAdded['month']==5]
test

Unnamed: 0,id,startTime,status,orgId,state,whoId,coachId,groupId,type,numberOfStudents,...,None,Off-Task,Partner,Passive,Reading,Small Group,Speaking,Whole Class,Writing,month
209,4634,2022-05-02 13:49:13,active,1000163,concluded,3800,3749,,observestudentengage,13,...,0.0,3.0,0.0,9.0,0.0,0.0,0.0,0.0,0.0,5
210,4635,2022-05-02 13:59:56,active,1000163,concluded,3756,3748,,observestudentengage,24,...,0.0,2.0,0.0,2.0,0.0,0.0,0.0,5.0,0.0,5
211,4637,2022-05-02 16:04:14,active,1000163,concluded,3769,3747,,observestudentengage,6,...,0.0,6.0,23.0,5.0,7.0,15.0,24.0,0.0,0.0,5
212,4638,2022-05-02 17:18:54,active,1000163,concluded,3815,3749,,observestudentengage,13,...,1.0,0.0,0.0,17.0,0.0,0.0,0.0,41.0,21.0,5
213,4644,2022-05-03 13:47:20,active,1000163,concluded,3765,3749,,observestudentengage,25,...,0.0,2.0,0.0,25.0,0.0,0.0,0.0,42.0,0.0,5


In [38]:
test.sum(axis = 0)

  test.sum(axis = 0)


id                                                                      23188
status                                         activeactiveactiveactiveactive
orgId                                                                 5000815
state                           concludedconcludedconcludedconcludedconcluded
whoId                                                                   18905
coachId                                                                 18742
groupId                                                                   0.0
type                        observestudentengageobservestudentengageobserv...
numberOfStudents                                                           81
observationLength                                                      4500.0
observationType                                                           0.0
studentNum                                                              198.0
Active                                                          

In [88]:
coaching_t = loader('CoachingData')
coaching_t = coaching_t[coaching_t.type.str.contains('learningintention')]
coaching_t = coaching_t[~coaching_t.note.isna()]
coaching_t.pivot(index='coachingId', columns='type',values='note').fillna(0)
coaching_t[coaching_t.coachingId == 4635] 

type,learningintentioninstructionrelevant,learningintentionposted,learningintentionreferredteacher,learningintentionstudentsaware
coachingId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
4163,0,0,1,0
4165,1,1,1,1
4190,0,0,1,0
4193,0,0,1,1
4197,0,1,1,0
4198,1,1,1,1
4200,1,0,1,1
4204,0,1,1,0
4205,0,1,0,0
4207,1,1,1,1


In [118]:
coaching_new = loader('CoachingData')
coaching_new = coaching_new[coaching_new.type.str.contains('interaction')]
coaching_new = coaching_new.pivot_table(index='coachingId', columns='type',values='count', aggfunc='sum')
coaching_new.reset_index(inplace=True)
coaching_new = coaching_new.iloc[205:]


type,coachingId,interactionacademicaffirmative,interactionacademiccorrective,interactionactivestudentresponse,interactionbehavioralaffirmative,interactionbehavioralcorrective,interactioncannotdetermine,interactionharsh
205,4634,1.0,0.0,0.0,0.0,0.0,0.0,0.0
206,4635,9.0,8.0,5.0,18.0,5.0,0.0,0.0
207,4637,0.0,1.0,0.0,0.0,0.0,0.0,0.0
208,4638,1.0,0.0,7.0,0.0,0.0,0.0,0.0
209,4644,0.0,0.0,1.0,0.0,0.0,0.0,0.0


In [128]:
behavioral = (18+5)/5
academic = (1+9+8+1+1)/5
total =  behavioral + academic
print(academic, behavioral, academic/total, behavioral/total)

4.0 4.6 0.46511627906976744 0.5348837209302325


In [125]:
affirmative = (1+9+1+18)/5
corrective = (5+8+1)/5
total2 = affirmative + corrective
print(affirmative/total2, corrective/total2)

0.6744186046511628 0.3255813953488372


In [132]:
meetinggroup = loader('MeetingGroup')
meetinggroup[meetinggroup['name'] == 'Beginning']

Unnamed: 0,id,dt,ct,status,name,orgId,groupType,color
11,433,2022-05-04 18:29:08,2022-05-04 18:29:08,active,Beginning,1000163,coachinggroup,#87CEEB


In [134]:
usermeetinggroup = loader('UserMeetingGroup')
usermeetinggroup[usermeetinggroup['meetingGroupId']==433]

Unnamed: 0,id,dt,ct,status,userId,meetingGroupId
2347,2861,2022-05-04 18:29:08,2022-05-04 18:29:08,active,3828,433
2348,2862,2022-05-04 18:29:08,2022-05-04 18:29:08,active,3753,433
2349,2863,2022-05-04 18:29:08,2022-05-04 18:29:08,active,3812,433
2350,2864,2022-05-04 18:29:08,2022-05-04 18:29:08,active,3830,433
2351,2865,2022-05-04 18:29:08,2022-05-04 18:29:08,active,3798,433
2352,2866,2022-05-04 18:29:08,2022-05-04 18:29:08,active,3824,433
2353,2867,2022-05-04 18:29:08,2022-05-04 18:29:08,active,3766,433
2354,2868,2022-05-04 18:29:08,2022-05-04 18:29:08,active,3829,433
2355,2869,2022-05-04 18:29:08,2022-05-04 18:29:08,active,3752,433
2356,2870,2022-05-04 18:29:08,2022-05-04 18:29:08,active,3783,433


In [138]:
users = loader('User')
users.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4057 entries, 0 to 4056
Data columns (total 24 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   id                           4057 non-null   int64  
 1   dt                           4057 non-null   object 
 2   ct                           4057 non-null   object 
 3   status                       4057 non-null   object 
 4   username                     4056 non-null   object 
 5   password                     4052 non-null   object 
 6   passwordResetHash            59 non-null     object 
 7   email                        3638 non-null   object 
 8   firstName                    4057 non-null   object 
 9   middleName                   16 non-null     object 
 10  lastName                     4056 non-null   object 
 11  cellPhone                    72 non-null     object 
 12  homePhone                    10 non-null     object 
 13  birthday          

In [140]:
user_team = merger(usermeetinggroup, users, 'userId', 'id', drop_keys=False)
user_team[user_team['meetingGroupId']==433]

Unnamed: 0,id_left,dt_left,ct_left,status_left,userId,meetingGroupId,id_right,dt_right,ct_right,status_right,...,sessionLocked,hardPromptPassReset,timezone,emailNotification,oauthStateOutlook,accessTokenOutlook,refreshTokenOutlook,tokenExpiresOutlook,redirectUriOutlook,outlookToggleAuthentication
249,2891,2022-05-04 18:29:08,2022-05-04 18:29:08,active,1259,433,1259,2020-08-10 09:27:28,2019-05-20 13:56:33,active,...,0,0,,1,,,,,,0
2249,2881,2022-05-04 18:29:08,2022-05-04 18:29:08,active,3759,433,3759,2022-03-07 13:08:01,2022-03-07 13:08:01,active,...,0,0,,0,,,,,,0
2257,2867,2022-05-04 18:29:08,2022-05-04 18:29:08,active,3766,433,3766,2022-03-07 13:09:21,2022-03-07 13:09:21,active,...,0,0,,0,,,,,,0
2260,2869,2022-05-04 18:29:08,2022-05-04 18:29:08,active,3752,433,3752,2022-03-07 13:06:01,2022-03-07 13:06:00,active,...,0,0,,0,,,,,,0
2372,2861,2022-05-04 18:29:08,2022-05-04 18:29:08,active,3828,433,3828,2022-03-07 13:24:43,2022-03-07 13:24:43,active,...,0,0,,0,,,,,,0
2374,2862,2022-05-04 18:29:08,2022-05-04 18:29:08,active,3753,433,3753,2022-03-07 13:06:35,2022-03-07 13:06:35,active,...,0,0,,0,,,,,,0
2376,2863,2022-05-04 18:29:08,2022-05-04 18:29:08,active,3812,433,3812,2022-03-07 13:20:50,2022-03-07 13:20:50,active,...,0,0,,0,,,,,,0
2378,2864,2022-05-04 18:29:08,2022-05-04 18:29:08,active,3830,433,3830,2022-03-07 13:25:03,2022-03-07 13:25:02,active,...,0,0,,0,,,,,,0
2380,2865,2022-05-04 18:29:08,2022-05-04 18:29:08,active,3798,433,3798,2022-03-07 13:17:30,2022-03-07 13:17:30,active,...,0,0,,0,,,,,,0
2382,2866,2022-05-04 18:29:08,2022-05-04 18:29:08,active,3824,433,3824,2022-03-07 13:23:43,2022-03-07 13:23:43,active,...,0,0,,0,,,,,,0
