In [None]:
# Purpose:
# This program uses the actionLog file from DeepThought F20 condition3 (adaptive condition)
# Calculates:
#  the average # of steps per student for total tutor
#  the average # of training steps per student for total tutor
#  the average # of level end steps per student
#  the average # of posttest steps per student
#
# Preprocessing:
# Remove the rows with erroneous IDs: hkaakat, sukhan
# Sort the rows based on userId, and actionCount
#
# Author:
# Nazia Alam

In [None]:
#imports
import pandas as pd
import numpy as np
import re

In [None]:
#Read file for condition3: actionLog_L7_3_F20.csv
#Read only the necessary columns from the csv file
col_list = ["id","userID","courseID","actionCount","currentProblem","stepCount","stepPreState","stepPostState","actionCode","currentProblemType"]
df = pd.read_csv("D:/Courses/Fall2021/CSC890/Fall2020 work/Analysis/Dataset/Condition3/actionLog_L7_3_F20.csv", usecols=col_list)
print(df.shape)
print(df.head())


In [None]:
# Preprocessing
# Remove the rows with erroneous IDs: hkaakat, sukhan

# Get indexes for the rows that should be dropped
indexNumbers = df.loc[(df['userID'] == 'hkaakat') | (df['userID'] =='sukhan')].index

# Delete these row indexes 
df.drop(indexNumbers , inplace=True)
print(df.shape)
#print(df)

In [None]:
# Preprocessing
# Sort the rows based on userId, and actionCount
dfNew = df.sort_values(by=['userID','actionCount'])
print(dfNew.shape)
print(dfNew.head)
#dfNew.to_csv("actionLog_3_F20_PreprocessedStepAnalysis.csv")

In [None]:
# Create the student list 
result = pd.DataFrame(columns = ['userID', 'totalSteps'])
result['userID'] = dfNew['userID'].unique()
print(result.shape)
print(result)

In [None]:
# Find the number of steps per student
dfNew = dfNew.assign(stateChange=0)
dfNew = dfNew.reset_index()
print(dfNew.head())

In [None]:
# Find the number of steps per student
dfNew['stateChange'] = 0
dfNew = dfNew.astype({"stepPreState": str, "stepPostState": str})

for i in range(len(dfNew)-1):

    if((dfNew['userID'].iloc[i]==dfNew['userID'].iloc[i+1]) and (dfNew['currentProblem'].iloc[i]==dfNew['currentProblem'].iloc[i+1]) and (dfNew['currentProblemType'].iloc[i]=="PS") ):
        if(dfNew['stepPostState'].iloc[i]!=dfNew['stepPostState'].iloc[i+1]):
            if(dfNew['stepPostState'].iloc[i]=="nan" or dfNew['stepPostState'].iloc[i+1]=="nan"):
                continue
            #if(np.isnan(dfNew['stepPostState'].iloc[i])==False and np.isnan(dfNew['stepPostState'].iloc[i+1])==False):
            else:    
                dfNew['stateChange'].iloc[i] = 1
                     

In [None]:

dfNew.to_csv("actionLog_3_F20_PreprocessedStepAnalysis.csv")

In [None]:
# Find the number of steps per student
result['totalSteps'] = (dfNew.groupby('userID')['stateChange'].sum()).values
print(result.head())

In [None]:
# Find the number of training steps per student
dfNew['trainingSteps'] = np.where(((dfNew['currentProblem']!=2.8) & (dfNew['currentProblem']!=3.8) & (dfNew['currentProblem']!=4.8) & (dfNew['currentProblem']!=5.8) & (dfNew['currentProblem']!=6.8) & (dfNew['currentProblem']<7) & (dfNew['currentProblem']>=2) ),  dfNew['stateChange'],0)
result['trainingSteps'] = (dfNew.groupby('userID')['trainingSteps'].sum()).values
print(result.head())

In [None]:
# Find the number of level end steps per student
dfNew['levelEndSteps'] = np.where( ( (dfNew['currentProblem']==2.8) | (dfNew['currentProblem']==3.8) | (dfNew['currentProblem']==4.8) | (dfNew['currentProblem']==5.8) | (dfNew['currentProblem']==6.8) ),  dfNew['stateChange'],0)
result['levelEndSteps'] = (dfNew.groupby('userID')['levelEndSteps'].sum()).values
print(result.head())

In [None]:
# Find the number of posttest steps per student
dfNew['posttestSteps'] = np.where( (dfNew['currentProblem']>=7 ),  dfNew['stateChange'],0)
result['posttestSteps'] = (dfNew.groupby('userID')['posttestSteps'].sum()).values
print(result.head())

In [None]:
# Find the number of steps per student in transfer task
dfNew['posttestStepsT'] = np.where( (dfNew['currentProblem']==7.6 ),  dfNew['stateChange'],0)
result['posttestStepsT'] = (dfNew.groupby('userID')['posttestStepsT'].sum()).values
print(result.head())

In [None]:
result.to_csv("Result/actionLog_3_F20_ResultStepAnalysis.csv")

In [None]:
# Find average # of steps per student
print("total steps")
avgTotalSteps = np.mean(result['totalSteps'])
print(avgTotalSteps)
SDTotalSteps = np.std(result['totalSteps'])
print(SDTotalSteps)

# Find average # of training steps per student
print("training steps")
avgTrainingSteps = np.mean(result['trainingSteps'])
print(avgTrainingSteps)
SDTrainingSteps = np.std(result['trainingSteps'])
print(SDTrainingSteps)

# Find average # of level end steps per student
print("level end steps")
avgLevelEndSteps = np.mean(result['levelEndSteps'])
print(avgLevelEndSteps)
SDLevelEndSteps = np.std(result['levelEndSteps'])
print(SDLevelEndSteps)

# Find average # of posttest steps per student
print("posttest steps")
avgPosttestSteps = np.mean(result['posttestSteps'])
print(avgPosttestSteps)
SDPosttestSteps = np.std(result['posttestSteps'])
print(SDPosttestSteps)


# Find average # of total posttest steps per student
print("total posttest steps")
avgTotalPosttestSteps = np.mean(result['posttestSteps']+result['levelEndSteps'])
print(avgTotalPosttestSteps)
SDTotalPosttestSteps = np.std(result['posttestSteps']+result['levelEndSteps'])
print(SDTotalPosttestSteps)

# Find average # of steps per student in transfer task
print("transfer task")
avgPosttestStepsT = np.mean(result['posttestStepsT'])
print(avgPosttestStepsT)
SDPosttestStepsT = np.std(result['posttestStepsT'])
print(SDPosttestStepsT)