In [None]:
# Purpose:
# This program uses the actionLog file from DeepThought F20 condition3 (adaptive condition)
# Calculates:
#  the average total tutor time 
#  average posttest time per student 
#  average level end posttest per student 
#  average training time per student
#
# Preprocessing:
# Remove the rows with erroneous IDs: hkaakat, sukhan
# Sort the rows based on userId, and actionCount
#
# Author:
# Nazia Alam

In [None]:
#imports
import pandas as pd
import numpy as np
import re
import statsmodels.api as sm
import pylab as py
import matplotlib.pyplot as plt

In [None]:
#Read file for condition3: actionLog_L7_3_F20.csv
#Read only the necessary columns from the csv file
col_list = ["id","userID","courseID","actionCount","currentProblem","actionTime","stepTime","problemTime","sessionTime"]
df = pd.read_csv("D:/Courses/Fall2021/CSC890/Fall2020 work/Analysis/Dataset/Condition3/actionLog_L7_3_F20.csv", usecols=col_list)
print(df.shape)
#print(df.head())


In [None]:
# Preprocessing
# Remove the rows with erroneous IDs: hkaakat, sukhan

# Get indexes for the rows that should be dropped
indexNumbers = df.loc[(df['userID'] == 'hkaakat') | (df['userID'] =='sukhan')].index

# Delete these row indexes 
df.drop(indexNumbers , inplace=True)
print(df.shape)
#print(df)

In [None]:
# Preprocessing
# Sort the rows based on userId, and actionCount
dfNew = df.sort_values(by=['userID','actionCount'])
print(dfNew.shape)
print(dfNew.head)


In [None]:
# Create the student list 
result = pd.DataFrame(columns = ['userID', 'totalTutorTime'])
result['userID'] = dfNew['userID'].unique()
print(result.shape)
print(result)

In [None]:
# find the number of steps where action time was capped
count = 0
for i in range(len(dfNew)):
     if(dfNew['actionTime'].iloc[i]>300):
         count = count + 1
print(count)

In [None]:
# Find the updated actionTime
# For each actionTime, cap it to 5 minute
dfNew['updatedActionTime'] = np.where(dfNew['actionTime'] >300 , 300, dfNew['actionTime'])
print(dfNew['updatedActionTime'])
dfNew.to_csv("actionLog_3_F20_PreprocessedTimeAnalysis.csv")

In [None]:
# Find the posttest action time
dfNew['posttestActionTime'] = np.where(dfNew['currentProblem'] >=7 ,  dfNew['updatedActionTime'],0)
result['posttestTime'] = (dfNew.groupby('userID')['posttestActionTime'].sum()).values

In [None]:
# Find the pretest action time
dfNew['pretestActionTime'] = np.where((dfNew['currentProblem'] ==1.3) | (dfNew['currentProblem'] ==1.4) ,  dfNew['updatedActionTime'],0)
result['pretestTime'] = (dfNew.groupby('userID')['pretestActionTime'].sum()).values

In [None]:
# Find the transfer task  action time
dfNew['transferTaskActionTime'] = np.where(dfNew['currentProblem'] ==7.6 ,  dfNew['updatedActionTime'],0)
result['transferTaskTime'] = (dfNew.groupby('userID')['transferTaskActionTime'].sum()).values

In [None]:
# Find the level end posttest action time
dfNew['levelEndPosttest'] = np.where(((dfNew['currentProblem']==2.8) | (dfNew['currentProblem']==3.8) | (dfNew['currentProblem']==4.8) | (dfNew['currentProblem']==5.8) | (dfNew['currentProblem']==6.8)  ),  dfNew['updatedActionTime'],0)
result['levelEndPosttest'] = (dfNew.groupby('userID')['levelEndPosttest'].sum()).values

In [None]:
# Find the training action time
dfNew['trainingTime'] = np.where(((dfNew['currentProblem']!=2.8) & (dfNew['currentProblem']!=3.8) & (dfNew['currentProblem']!=4.8) & (dfNew['currentProblem']!=5.8) & (dfNew['currentProblem']!=6.8) & (dfNew['currentProblem']<7) & (dfNew['currentProblem']>=2) ),  dfNew['updatedActionTime'],0)
result['trainingTime'] = (dfNew.groupby('userID')['trainingTime'].sum()).values

In [None]:
# Find the total tutor time per student in condition3
result['totalTutorTime'] = (dfNew.groupby('userID')['actionTime'].sum()).values
result['updatedTotalTutorTime'] = (dfNew.groupby('userID')['updatedActionTime'].sum()).values
result['totalPosttestTime'] = result['levelEndPosttest'] + result['posttestTime']
print(result.head())
result.to_csv("Result/actionLog_3_F20_ResultTimeAnalysis.csv")

In [None]:
print(result.describe()/60)

In [None]:
# Find the average and SD of total tutor time per student in condition3
avgTutorTime = np.mean(result['totalTutorTime'])
print(avgTutorTime)
print("in minute",(avgTutorTime/60))

print("total tutor time")
# Find the average and SD of total tutor time per student in condition3 using updated actionTime
updatedAvgTutorTime = np.mean(result['updatedTotalTutorTime'])
updatedSDTutorTime = np.std(result['updatedTotalTutorTime'])
print(updatedAvgTutorTime)
print("in minute",(updatedAvgTutorTime/60))
print("in minute sd",(updatedSDTutorTime/60))

print("final posttest")
# Find the average and SD of  posttest time per student in condition3 using updated actionTime
avgPosttestTime = np.mean(result['posttestTime'])
SDPosttestTime = np.std(result['posttestTime'])
print(avgPosttestTime)
print("in minute",(avgPosttestTime/60))
print("in minute SD",(SDPosttestTime/60))

print("level end")
# Find the average and SD level end posttest time per student in condition3 using updated actionTime
avgLevelEndPosttest = np.mean(result['levelEndPosttest'])
SDLevelEndPosttest = np.std(result['levelEndPosttest'])
print(avgLevelEndPosttest)
print("in minute",(avgLevelEndPosttest/60))
print("in minute SD",(SDLevelEndPosttest/60))

print("total posttest")
#avgTotalPosttestTime = np.mean(result['posttestTime']+result['levelEndPosttest'])
#SDTotalPosttestTime = np.std(result['posttestTime']+result['levelEndPosttest'])

avgTotalPosttestTime = np.mean(result['totalPosttestTime'])
SDTotalPosttestTime = np.std(result['totalPosttestTime'])
print(avgTotalPosttestTime)
print("in minute",(avgTotalPosttestTime/60))
print("in minute SD",(SDTotalPosttestTime/60))

print("transfer task")
# Find the average and SD transfer task posttest time per student in condition3 using updated actionTime
avgTransferPosttest = np.mean(result['transferTaskTime'])
SDTransferPosttest = np.std(result['transferTaskTime'])
print(avgTransferPosttest)
print("in minute",(avgTransferPosttest/60))
print("in minute SD",(SDTransferPosttest/60))


print("training")
# Find the average and SD training time per student in condition3 using updated actionTime
avgTrainingTime = np.mean(result['trainingTime'])
SDTrainingTime = np.std(result['trainingTime'])
print(avgTrainingTime)
print("in minute",(avgTrainingTime/60))
print("in minute SD",(SDTrainingTime/60))

In [None]:
import matplotlib.pyplot as plt
import numpy as np

plt.hist(result['levelEndPosttest'], density=True, bins=15) 



In [None]:
import scipy
import matplotlib.pyplot
scipy.stats.probplot(result['levelEndPosttest'], dist="norm", plot=matplotlib.pyplot)


In [None]:
scipy.stats.probplot(result['totalPosttestTime'], dist="norm", plot=matplotlib.pyplot)