In [26]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

### Read Data

In [27]:
def read_data(file_path):
    '''Read Excel File'''
    
    df = pd.read_excel(file_path, engine = 'openpyxl')

    return df

In [28]:
df = read_data("data/cleaned_data.xlsx")
df.head()

Unnamed: 0,last_modified_time,exam_start_time,extra_time_mins,incident_time_mins,candidate_id,max_question_score,question_number,question_title,question_duration_seconds,auto_score_per_question,candidate_response_code,candidate_response_text,total_score
0,2024-12-11 15:57:52,2024-12-11 14:00:02,0,0,17107,2.4,1.1,Tallsystemer,85,2.4,simpleChoice_1368096164456,Heksadesimale tall,71.61
1,2024-12-11 15:57:52,2024-12-11 14:00:02,0,0,17107,2.4,1.1,Tallsystemer,85,2.4,simpleChoice_IA172399520449584402464-f255-46e6...,Oktale tall,71.61
2,2024-12-11 15:57:52,2024-12-11 14:00:02,0,0,17107,2.4,1.1,Tallsystemer,85,2.4,simpleChoice_IA17239952044953d28e157-ac4c-49db...,Titallsystemet,71.61
3,2024-12-11 15:57:52,2024-12-11 14:00:02,0,0,17107,2.4,1.1,Tallsystemer,85,2.4,simpleChoice_IA1723995204495960f601e-73d5-48f8...,Heksadesimale tall,71.61
4,2024-12-11 15:57:52,2024-12-11 14:00:02,0,0,17107,2.4,1.1,Tallsystemer,85,2.4,simpleChoice_IA17239952636882cb13f25-bd73-4352...,Titallsystemet,71.61


In [29]:
def prep_total_time_data(df):
    '''Extract relevant data for total time analysis and create new df'''
   
    exam_time_df = df.groupby('candidate_id', as_index=False).first()[
    ['candidate_id', 'exam_start_time', 'last_modified_time', 'extra_time_mins', 'incident_time_mins', 'total_score']
    ]

    return exam_time_df

In [36]:
def prep_time_per_task(df):
    '''Extract time relevant data for each student per question and create new df'''
       
    task_time_df = df.groupby('candidate_id', as_index=False).first()[
    ['candidate_id', 'question_number', 'question_title', 'question_duration_seconds', 'incident_time_mins', 'auto_score_per_question', 'max_question_score']
    ]
		
    return task_time_df

In [37]:
task_time_df = prep_time_per_task(df)
task_time_df.head()

Unnamed: 0,candidate_id,question_number,question_title,question_duration_seconds,incident_time_mins,auto_score_per_question,max_question_score
0,17104,1.1,Tallsystemer,295,0,2.4,2.4
1,17105,1.1,Tallsystemer,255,0,2.1,2.4
2,17106,1.1,Tallsystemer,129,0,0.9,2.4
3,17107,1.1,Tallsystemer,85,0,2.4,2.4
4,17112,1.1,Tallsystemer,272,0,1.2,2.4


In [38]:
def add_time_usage_stats(df, max_minutes = 210):
    """
    Adds time usage columns to the DataFrame (df):
      - Brukt_tid: total time used in minutes
      - Tid_igjen: remaining time in minutes
      - Prosent_brukt: percent of allowed time used
    """
    df = df.copy() 
    
    # Calculate how much time was used (in minutes), rounded to 2 decimal places
    df["Total_time_used_mins"] = round((df['last_modified_time'] - df['exam_start_time']).dt.total_seconds() / 60 ,2)

    # Calculate how much time was left (maximum allowed minus used), rounded to 2 decimals 
    df["Remaining_time_mins"] = round(max_minutes - df["Total_time_used_mins"], 2)

    # Calculate percentage of allowed time used, rounded to 2 decimals
    df["%Time_used"] = round((df["Total_time_used_mins"] / max_minutes) * 100, 2)
    
    return df

## Execution

In [42]:
# Read our data file
df = read_data("data/cleaned_data.xlsx")
df.head()

Unnamed: 0,last_modified_time,exam_start_time,extra_time_mins,incident_time_mins,candidate_id,max_question_score,question_number,question_title,question_duration_seconds,auto_score_per_question,candidate_response_code,candidate_response_text,total_score
0,2024-12-11 15:57:52,2024-12-11 14:00:02,0,0,17107,2.4,1.1,Tallsystemer,85,2.4,simpleChoice_1368096164456,Heksadesimale tall,71.61
1,2024-12-11 15:57:52,2024-12-11 14:00:02,0,0,17107,2.4,1.1,Tallsystemer,85,2.4,simpleChoice_IA172399520449584402464-f255-46e6...,Oktale tall,71.61
2,2024-12-11 15:57:52,2024-12-11 14:00:02,0,0,17107,2.4,1.1,Tallsystemer,85,2.4,simpleChoice_IA17239952044953d28e157-ac4c-49db...,Titallsystemet,71.61
3,2024-12-11 15:57:52,2024-12-11 14:00:02,0,0,17107,2.4,1.1,Tallsystemer,85,2.4,simpleChoice_IA1723995204495960f601e-73d5-48f8...,Heksadesimale tall,71.61
4,2024-12-11 15:57:52,2024-12-11 14:00:02,0,0,17107,2.4,1.1,Tallsystemer,85,2.4,simpleChoice_IA17239952636882cb13f25-bd73-4352...,Titallsystemet,71.61


In [40]:
# Extracting exam total time data
exam_time_df = prep_total_time_data(df)
exam_time_df.head()

Unnamed: 0,candidate_id,exam_start_time,last_modified_time,extra_time_mins,incident_time_mins,total_score
0,17104,2024-12-11 14:00:02,2024-12-11 17:55:47,0,0,85.84
1,17105,2024-12-11 14:00:01,2024-12-11 17:48:51,0,0,65.08
2,17106,2024-12-11 14:00:02,2024-12-11 17:46:31,0,0,35.64
3,17107,2024-12-11 14:00:02,2024-12-11 15:57:52,0,0,71.61
4,17112,2024-12-11 14:00:12,2024-12-11 18:00:05,0,0,36.95


In [39]:
# Calculate total time used, remain, and percentage of time used per student
exam_time_df = add_time_usage_stats(exam_time_df)
exam_time_df.head()

Unnamed: 0,candidate_id,exam_start_time,last_modified_time,extra_time_mins,incident_time_mins,total_score,Total_time_used_mins,Remaining_time_mins,%Time_used
0,17104,2024-12-11 14:00:02,2024-12-11 17:55:47,0,0,85.84,235.75,-25.75,112.26
1,17105,2024-12-11 14:00:01,2024-12-11 17:48:51,0,0,65.08,228.83,-18.83,108.97
2,17106,2024-12-11 14:00:02,2024-12-11 17:46:31,0,0,35.64,226.48,-16.48,107.85
3,17107,2024-12-11 14:00:02,2024-12-11 15:57:52,0,0,71.61,117.83,92.17,56.11
4,17112,2024-12-11 14:00:12,2024-12-11 18:00:05,0,0,36.95,239.88,-29.88,114.23
