In [63]:
import re
import os
import numpy as np
import pandas as pd
import seaborn as sns
import scipy.stats as stats
import matplotlib.pyplot as plt

In [64]:
results_dirs = ['../thesis-university-impl/results/simulation_2023-05-17 15:58:22',
                '../thesis-university-impl/results/simulation_2023-05-17 16:50:54',
                '../thesis-university-impl/results/simulation_2023-05-17 17:02:55',
                '../thesis-university-impl/results/simulation_2023-05-17 17:26:54',
                '../thesis-university-impl/results/simulation_2023-05-17 17:30:26']

In [65]:
needs_dir = '/needs'
metadata_path = '/meta.csv'
calendar_path = '/calendar.txt'
positions_path = '/positions.txt'
traces_path = '/traces.txt'

In [66]:
actions = ["Inactive", "Walking", "PlayingGame", "Eating", "DrinkingCoffee", 
           "Studying", "Working", "WaitingLecture", "AttendingLecture", "GivingLecture"]

In [67]:
lectures = ["Computer Logic Lecture", "Data Structures and Algorithms Lecture", "Software Engineering Lecture", 
            "Microbiology Lecture", "Pharmacology Lecture", "Cell Biology and Biochemistry Lecture",
           "Introduction to Law Lecture", "European Law",
           "Foundations and Applications of Economics Lecture", "Business Statistics Lecture"]


In [68]:
def sample_time_to_real(sample_time, time_step):
    return (time_step*sample_time) / (60*60)

In [69]:
def read_csv_files(directory):
    dfs = []

    for filename in os.listdir(directory):
        if filename.endswith('.csv'):
            file_path = os.path.join(directory, filename)
            df = pd.read_csv(file_path)
            dfs.append(df)

    return dfs

In [70]:
def parse_behavioral_traces(file_path):
    traces = []
    
    with open(file_path, 'r') as file:
        for line in file:
            line = line.strip()
            trace = []
            matches = re.findall(r'\[([\w\d]+)\(([\w\d\s.,]*)\)](\d+)', line)

            start_time = 0
            for match in matches:
                action_name = match[0]
                components = match[1].split(',') if match[1] else []
                duration = int(match[2])
                
                end_time = start_time + duration - 1
                action = {
                    'name': action_name,
                    'components': components,
                    'duration': duration,
                    'start': start_time,
                    'end': end_time
                }
                
                trace.append(action)
                start_time = end_time + 1
            
            traces.append(trace)
    
    return traces

In [71]:
def extract_task_names(calendar_file):
    task_names = []

    # Read the calendar file and extract task names for each agent
    with open(calendar_file, 'r') as file:
        for line in file:
            line = line.strip()
            agent_tasks = []
            tasks = line.split(',')

            for task in tasks:
                if task:
                    task_name = task.split(': ')[1]
                    agent_tasks.append(task_name)

            task_names.append(agent_tasks)

    return task_names

In [108]:
def check_attended_lectures(traces, tasks):
    waited = []
    attended = []
    
    for i in range(0, len(traces)):
        wait = []
        att = []
        
        for trace in traces[i]:
            if trace['name'] == 'WaitingLecture':
                lecture = trace['components'][0]
                if lecture in tasks[i]:
                    wait.append(lecture)
                    
            if trace['name'] == 'AttendingLecture':
                lecture = trace['components'][0]
                if lecture in tasks[i]:
                    att.append(lecture)
        
        waited.append(wait)
        attended.append(att)
    
    return waited, attended

In [109]:
def calculate_lecture_attendance(scheduled_lectures, waited_lectures, attended_lectures, all_lectures):
    lecture_attendance = {lecture: 0 for lecture in all_lectures}
    lecture_waited_attendance = {lecture: 0 for lecture in all_lectures}
    lecture_actual_attendance = {lecture: 0 for lecture in all_lectures}

    for agent_scheduled, agent_waited, agent_attended in zip(scheduled_lectures, waited_lectures, attended_lectures):
        for lecture in agent_scheduled:
            lecture_attendance[lecture] += 1
            if lecture in agent_waited:
                lecture_waited_attendance[lecture] += 1
            if lecture in agent_attended:
                lecture_actual_attendance[lecture] += 1

    return lecture_attendance, lecture_waited_attendance, lecture_actual_attendance

In [89]:
# Read data 
metadata = []
traces = []

for results in results_dirs:
    meta = pd.read_csv(results + metadata_path)
    tcs = parse_behavioral_traces(results + traces_path)
    
    metadata.append(meta)
    traces.append(tcs)

In [90]:
# New dataframe to contain duration of each action
new_metadata = []

for meta in metadata:
    new_meta = pd.DataFrame.copy(meta)
    for action in actions:
        new_meta[action] = 0
    new_meta['total_time_active'] = 0
    new_meta.at['sequence_length'] = 0
    new_metadata.append(new_meta)

In [91]:
# Calculate duration for each action 
for tcs, meta in zip(traces, new_metadata):
    for i in range(0, len(tcs)):
        for action in tcs[i]:
            action_name = action['name']
            duration = action['duration']

            meta.at[i, action_name] += duration
            meta.at[i, 'total_time_active'] += duration if action_name != "Inactive" else 0
        meta.at[i, 'sequence_length'] = len(tcs[i])

In [92]:
# students_metadata = new_metadata[new_metadata['type'] == 'student'].copy().drop(['workaholic', "Working", "GivingLecture"], axis=1)
# admin_metadata = new_metadata[new_metadata['type'] == 'admin'].copy().drop(['driven', "Studying", "AttendingLecture"], axis=1)
# professors_metadata = new_metadata[new_metadata['type'] == 'professor'].copy().drop(['driven', "Studying", "AttendingLecture"], axis=1)

In [110]:
def calculate_attendance(expected, actual):
    percentages = {}
    for key in expected:
        if key in actual:
            percentage = (actual[key] / expected[key]) * 100
            percentages[key] = percentage
    return percentages

In [111]:
def calculate_average(dictionary):
    values = [value for value in dictionary.values() if value > 0]
    total = sum(values)
    count = len(values)
    average = total / count
    return average

In [116]:
percentages = []
for result, meta, tcs in zip(results_dirs, new_metadata, traces):
    students_metadata = meta[meta['type'] == 'student'].copy().drop(['workaholic', "Working", "GivingLecture"], axis=1)

    agent_tasks = extract_task_names(result + calendar_path)
    waited, attended = check_attended_lectures(tcs, agent_tasks)
    
#     students_metadata['#lectures_scheduled'] = [len(l) for l in agent_tasks[:600]]
#     students_metadata['#lectures_waited'] = [len(l) for l in waited[:600]]
#     students_metadata['#lectures_attended'] = [len(l) for l in attended[:600]]
    
    expected, wait, actual = calculate_lecture_attendance(agent_tasks[:600], waited[:600], attended[:600], lectures)
    
    
    total = calculate_attendance(expected, actual)
    
    print(total)
    
    p = calculate_average(total)
    percentages.append(p)

percentages

{'Computer Logic Lecture': 84.21052631578947, 'Data Structures and Algorithms Lecture': 82.27848101265823, 'Software Engineering Lecture': 0.0, 'Microbiology Lecture': 0.0, 'Pharmacology Lecture': 80.0, 'Cell Biology and Biochemistry Lecture': 84.61538461538461, 'Introduction to Law Lecture': 82.17821782178217, 'European Law': 80.76923076923077, 'Foundations and Applications of Economics Lecture': 80.61224489795919, 'Business Statistics Lecture': 83.50515463917526}
{'Computer Logic Lecture': 85.5072463768116, 'Data Structures and Algorithms Lecture': 80.82191780821918, 'Software Engineering Lecture': 0.0, 'Microbiology Lecture': 0.0, 'Pharmacology Lecture': 86.81318681318682, 'Cell Biology and Biochemistry Lecture': 0.0, 'Introduction to Law Lecture': 0.0, 'European Law': 68.83116883116884, 'Foundations and Applications of Economics Lecture': 83.78378378378379, 'Business Statistics Lecture': 78.88888888888889}
{'Computer Logic Lecture': 84.05797101449275, 'Data Structures and Algorithm

[82.27115500899745,
 80.77436541700985,
 80.60123471753194,
 84.45031872183141,
 85.32550477109261]

In [115]:
np.mean(percentages)

82.68451572729265

In [113]:
total = calculate_attendance(expected, actual)
total

{'Computer Logic Lecture': 89.58333333333334,
 'Data Structures and Algorithms Lecture': 88.23529411764706,
 'Software Engineering Lecture': 83.13253012048193,
 'Microbiology Lecture': 0.0,
 'Pharmacology Lecture': 86.41975308641975,
 'Cell Biology and Biochemistry Lecture': 85.71428571428571,
 'Introduction to Law Lecture': 83.49514563106796,
 'European Law': 74.4186046511628,
 'Foundations and Applications of Economics Lecture': 90.9090909090909,
 'Business Statistics Lecture': 86.02150537634408}

In [29]:
calculate_average(total)

78.79826873404204

In [30]:
df = students_metadata
total_students = len(df)

attended_all = len(df[(df['#lectures_scheduled'] > 0) & (df['#lectures_scheduled'] == df['#lectures_attended'])])
percentage_attended_all = (attended_all / total_students) * 100

attended_none = len(df[df['#lectures_attended'] == 0])
percentage_attended_none = (attended_none / total_students) * 100

attended_one_or_two = len(df[((df['#lectures_scheduled'] - df['#lectures_attended']) == 1) & (df['#lectures_attended'] > 0)])
percentage_attended_one_or_two = (attended_one_or_two / total_students) * 100

print("Percentage of students attended all their scheduled lectures: {:.2f}%".format(percentage_attended_all))
print("Percentage of students attended none of their scheduled lectures: {:.2f}%".format(percentage_attended_none))
print("Percentage of students who missed only one of their scheduled lectures: {:.2f}%".format(percentage_attended_one_or_two))


Percentage of students attended all their scheduled lectures: 63.00%
Percentage of students attended none of their scheduled lectures: 25.00%
Percentage of students who missed only one of their scheduled lectures: 11.83%
