In [None]:
Estimating Diligence with CAE

## Setup

Testing CAA Library

In [1]:
import sys
sys.path.append("../lib")

In [2]:
import math
import random
import uuid
import os
import copy
import itertools
from collections.abc import Iterable
from datetime import datetime as dt

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from scipy.optimize import Bounds
from scipy.optimize import minimize
from scipy import optimize

from sklearn.cluster import DBSCAN

import statsmodels.api as sm


In [3]:
import logging

#logging.basicConfig(level=logging.DEBUG)
logging.basicConfig(level=logging.INFO)
#logging.basicConfig(level=logging.WARNING)

logger = logging.getLogger("main")

In [4]:
# logging.getLogger().setLevel(logging.DEBUG)
logging.getLogger().setLevel(logging.INFO)
#logging.getLogger().setLevel(logging.WARNING)
logger.debug("Test debug")
logger.info("Test info")
logger.warning("Test warning")

INFO:main:Test info


In [5]:
from tutor.domain import Domain
from tutor.curriculum_factory import CurriculumFactory
from tutor.simple_curriculum import SimpleCurriculum
from tutor.tutor import SimpleTutor
from tutor.action import Attempt, HintRequest

from learner.selfeff_learner import SelfEfficacyLearner
from learner.modular_learner import ModularLearner
from learner.binary_skill_cog import BinarySkillCognition
from learner.decider import *

from simulate.modlearner_simulation import ModLearnerSimulation
from simulate.simulation import SimulationBatch
from simulate.self_eff_simulation import SelfEffSimulation
from simulate.modlearner_simulation import ModLearnerSimulation

from log_db import mongo
from log_db.curriculum_mapper import DB_Curriculum_Mapper
from log_db.learner_mapper import DBLearnerMapper

from analytics.batch import *
from analytics.student_stats import StudentStatCalc
from analytics.featurization import *
from analytics.cae import *

In [6]:
from CanonicalAutocorrelationAnalysis.model.caa import CAAComputation
from CanonicalAutocorrelationAnalysis.model.caaObject import *
from CanonicalAutocorrelationAnalysis.model.utils import l1Norm, l2Norm, r2Compute

In [7]:
# Get path to current project directory
cwd = os.path.abspath(".")
base_dir = os.path.abspath(os.path.join(cwd, os.pardir))
logger.debug("Base directory for the project:\n%s" % base_dir)

In [8]:
# Setup connection to database
data_out = "sim-%s" % str(uuid.uuid4())
data_path = os.path.join(base_dir,"test", "data", data_out)
logger.info("Writing simulation results to directory: %s" % data_path)
db_name = "motivsim"
db_params  = mongo.get_db_params(db_name)
logger.info("got db params: %s" % str(db_params))
db_util = mongo.Data_Utility(data_path, db_params)
db = db_util.db

INFO:analytics.cae:Writing simulation results to directory: /rdata/Sandbox/MotivSim/test/data/sim-00664f3a-5bf6-4d52-b06c-f329cf0d7f1e
INFO:analytics.cae:got db params: {'settingId': 'motivsim', 'url': 'localhost', 'port': '27017', 'name': 'motivsim', 'user': '', 'pswd': ''}


In [9]:
clear_db = False
if clear_db:
    logger.info("Clearing database before starting new simulation")
    #db_util.clear_db()
else:
    logger.info("Skipping Clearing database")

INFO:analytics.cae:Skipping Clearing database


## 1. Simulate Data

In [10]:
num_students=20
sim_batch_desc = "Test BIRT Batch"

In [11]:
def gen_test_curric(db, db_params):
    domain_params = {'m_l0': 0.45,
                     'sd_l0': 0.155,
                     'm_t': 0.25,
                     'sd_t': 0.13,#0.03,
                     'm_s': 0.155,
                     'sd_s': 0.055,
                     'm_g': 0.15,#0.6,
                     'sd_g': 0.105
                    }
    curric_params = {'num_units': 2,
                     'mean_sections': 4,
                     'stdev_sections': 2,
                     'mean_unit_kcs': 22,
                     'stdev_unit_kcs': 23,
                     'section_kcs_lambda': 6,
                     'mean_steps': 10,
                     'stdev_steps': 4,
                     'mean_prob_kcs': 6,
                     'stdev_prob_kcs': 3,
                     'num_practice': 100
                    }

    domain, curric = CurriculumFactory.gen_curriculum(domain_params, curric_params)
    db.domains.insert_one(domain.to_dict())
    db.kcs.insert_many([kc.__dict__ for kc in domain.kcs])
    curric_util = DB_Curriculum_Mapper(db_params)
    curric_util.write_to_db(curric)

    return domain, curric

In [12]:
def gen_students(num_students, domain, curric, persist=True):
    stus = []
    for i in range(num_students):
        cog = BinarySkillCognition(domain)
        ev_decider = EVDecider()
        decider = DiligentDecider(ev_decider)
        stu = ModularLearner(domain, cog, decider)
        stus.append(stu)

    return stus


def sim_students(db, num_students, domain, curric):
    students = gen_students(num_students, domain, curric)
    logger.info(f"Persisting {len(students)} initialized students to db")
    db.students.insert_many([stu.to_dict() for stu in students])
    # Init simulation batch
    batch = SimulationBatch(sim_batch_desc)

    # Simulate Students
    for i, stu in enumerate(students):
        logger.info("Simulating student #%i" % i)
        sim = ModLearnerSimulation(domain, curric, stu)
        batch.add_sim(sim)
        sim.run()

    logger.info("Inserting %i simulated students to db" % len(students))
    result = db.finalsimstudents.insert_many([stu.to_dict() for stu in students])
    logger.info("Db insert success: %s" % result.acknowledged)

    logger.info("Inserting simulation batch to db")
    result = db.simbatches.insert_one(batch.to_dict())
    logger.info("Db insert success: %s" % result.acknowledged)

    return batch, students

In [13]:
simbatch = db.simbatches.find_one({"desc": sim_batch_desc})
if simbatch is None:
    logger.info("Generating new simulation. None found in db")

    # generate simualted data for test
    domain, curric = gen_test_curric(db, db_params)
    batch, students = sim_students(db, num_students, domain, curric)
    logger.info(f"Simulated {len(students)} in batch with id: {batch._id}")

else:
    logger.info(f"Found simulation batch: {str(simbatch)}")
    lmapper = DBLearnerMapper(db)
    students = [lmapper.get_modlearner_from_db(sid) for sid in simbatch['student_ids']]
    batch = SimulationBatch.from_dict(simbatch)
    logger.info(f"Recovered {len(students)} students from batch with id: {batch._id}")


INFO:analytics.cae:Found simulation batch: {'_id': '95273d3e-f9b2-4edc-8ed9-117c62dea350', 'run_time': datetime.datetime(2020, 12, 20, 22, 59, 19, 77000), 'desc': 'Test BIRT Batch', 'student_ids': ['f97f9d07-5048-4434-81f2-18bc23331c32', '367f9ea2-9c2a-46aa-b747-172f00cc994f', '5bc27c57-33e7-4479-84d0-744c3e38096c', 'e486a130-8382-4a24-86d0-e26081651938', 'fb149dff-426a-4d1f-9cc5-2b4ea8d2178d', '8ff912ee-f2a2-4224-970f-1d29cf213f44', 'fd5d46fb-8bb0-480b-8fa1-4b66c3d71db8', '3384137f-26f4-477b-83b8-cead72b10646', '6d1e7c67-7127-45e3-88f4-462446be123a', '18b22ae5-534d-4616-bb88-07847a3e9d1b', 'a3badde6-1754-4360-84a0-042e4ba64350', '40709376-ccdf-44dc-868a-ed6f06bf9461', 'a88c1ff5-9e6b-4194-9b1e-edbf78f93260', 'fa5deb56-4126-4a97-9708-7e7c52f0e8d7', '5e4b9b36-52ec-4af5-862d-2de1aeaeaccf', 'a72add58-1751-46fb-b70b-1c1a5512aa03', '809b51a7-069a-47be-adac-a93dfa8f100d', '8d2fbcdf-60f3-4ea8-9a0a-ed16566ebc06', 'f852d44e-a2db-42f0-ac14-06830becb6ae', '88504e70-a0b4-4eec-b6b2-f563623bb071']}
I

## 1. Simulating learners

In [10]:
num_students = 200

In [11]:
def simulate_students(domain, curric, students, batch):    
    for i, stu in enumerate(students):
        logger.info("Simulating student #%i" % i)
        sim = ModLearnerSimulation(domain, curric, stu)
        batch.add_sim(sim)
        sim.run()

### Generate Curriculum

In [12]:
domain_params = {'m_l0': 0.45,
                 'sd_l0': 0.155,
                 'm_t': 0.25,
                 'sd_t': 0.13,#0.03,
                 'm_s': 0.155,
                 'sd_s': 0.055,
                 'm_g': 0.15,#0.6,
                 'sd_g': 0.105
}
curric_params = {'num_units': 5,
                 'mean_sections': 4,
                 'stdev_sections': 2,
                 'mean_unit_kcs': 22,
                 'stdev_unit_kcs': 23,
                 'section_kcs_lambda': 6,
                 'mean_steps': 10,
                 'stdev_steps': 4,
                 'mean_prob_kcs': 6,
                 'stdev_prob_kcs': 3,
                 'num_practice': 100
}

In [13]:
domain, curric = CurriculumFactory.gen_curriculum(domain_params, curric_params)
db.domains.insert_one(domain.to_dict())
db.kcs.insert_many([kc.__dict__ for kc in domain.kcs])
curric_util = DB_Curriculum_Mapper(db_params)
curric_util.write_to_db(curric)

INFO:tutor.cogtutor_curriculum:Generated 5 units with with a total of 134 kcs
INFO:log_db.curriculum_mapper:Writing curriculum with id, c1a7b9d2-5c80-48c1-9963-4f038d39e8d3, to db
INFO:log_db.curriculum_mapper:Writing 3875 problem to db
INFO:log_db.curriculum_mapper:Writing 15806 steps to db


### Simple Diligent students

In [14]:
stus_1 = []
for i in range(num_students):
    cog = BinarySkillCognition(domain)
    ev_decider = EVDecider()
    decider = DiligentDecider(ev_decider)
    stu = ModularLearner(domain, cog, decider)
    stus_1.append(stu)
    logger.debug("inserting new student to db: %s" % str(stu.to_dict()))
    db.students.insert_one(stu.to_dict())

In [15]:
# Init simulation batch
batch = SimulationBatch("Simple diligent students")

In [16]:
# Simulate Students
simulate_students(domain, curric, stus_1, batch)

INFO:learner.decider:Simulating student #0
INFO:simulate.simulation:Starting simulation. Logging student into new session
INFO:tutor.tutor:Completed last unit. No more units in curriculum
INFO:simulate.simulation:Ending simulation. Logging out of active session
INFO:learner.decider:Simulating student #1
INFO:simulate.simulation:Starting simulation. Logging student into new session
INFO:tutor.tutor:Completed last unit. No more units in curriculum
INFO:simulate.simulation:Ending simulation. Logging out of active session
INFO:learner.decider:Simulating student #2
INFO:simulate.simulation:Starting simulation. Logging student into new session
INFO:tutor.tutor:Completed last unit. No more units in curriculum
INFO:simulate.simulation:Ending simulation. Logging out of active session
INFO:learner.decider:Simulating student #3
INFO:simulate.simulation:Starting simulation. Logging student into new session
INFO:tutor.tutor:Completed last unit. No more units in curriculum
INFO:simulate.simulation:E

In [17]:
logger.info("Inserting %i simulated students to db" % len(stus_1))
result = db.finalsimstudents.insert_many([stu.to_dict() for stu in stus_1])
logger.info("Db insert success: %s" % result.acknowledged)

logger.info("Inserting simulation batch to db")
result = db.simbatches.insert_one(batch.to_dict())
logger.info("Db insert success: %s" % result.acknowledged)

INFO:learner.decider:Inserting 200 simulated students to db
INFO:learner.decider:Db insert success: True
INFO:learner.decider:Inserting simulation batch to db
INFO:learner.decider:Db insert success: True


### Diligent Students with variable values

In [18]:
stus_2 = []
for i in range(num_students):
    cog = BinarySkillCognition(domain)
    ev_decider = RandValDecider()
    decider = DiligentDecider(ev_decider)
    stu = ModularLearner(domain, cog, decider)
    stus_2.append(stu)
    logger.debug("inserting new student to db: %s" % str(stu.to_dict()))
    db.students.insert_one(stu.to_dict())

In [19]:
# Init simulation batch
batch = SimulationBatch("Diligent Students with variable values")

In [20]:
# Simulate Students
simulate_students(domain, curric, stus_2, batch)

INFO:learner.decider:Simulating student #0
INFO:simulate.simulation:Starting simulation. Logging student into new session
INFO:tutor.tutor:Completed last unit. No more units in curriculum
INFO:simulate.simulation:Ending simulation. Logging out of active session
INFO:learner.decider:Simulating student #1
INFO:simulate.simulation:Starting simulation. Logging student into new session
INFO:tutor.tutor:Completed last unit. No more units in curriculum
INFO:simulate.simulation:Ending simulation. Logging out of active session
INFO:learner.decider:Simulating student #2
INFO:simulate.simulation:Starting simulation. Logging student into new session
INFO:tutor.tutor:Completed last unit. No more units in curriculum
INFO:simulate.simulation:Ending simulation. Logging out of active session
INFO:learner.decider:Simulating student #3
INFO:simulate.simulation:Starting simulation. Logging student into new session
INFO:tutor.tutor:Completed last unit. No more units in curriculum
INFO:simulate.simulation:E

In [21]:
logger.info("Inserting %i simulated students to db" % len(stus_2))
result = db.finalsimstudents.insert_many([stu.to_dict() for stu in stus_2])
logger.info("Db insert success: %s" % result.acknowledged)

logger.info("Inserting simulation batch to db")
result = db.simbatches.insert_one(batch.to_dict())
logger.info("Db insert success: %s" % result.acknowledged)

INFO:learner.decider:Inserting 200 simulated students to db
INFO:learner.decider:Db insert success: True
INFO:learner.decider:Inserting simulation batch to db
INFO:learner.decider:Db insert success: True


### Diligent Students with domain-level self-efficacy

In [22]:
stus_3 = []
for i in range(num_students):
    cog = BinarySkillCognition(domain)
    ev_decider = DomainSelfEffDecider()
    decider = DiligentDecider(ev_decider)
    stu = ModularLearner(domain, cog, decider)
    stus_3.append(stu)
    logger.debug("inserting new student to db: %s" % str(stu.to_dict()))
    db.students.insert_one(stu.to_dict())

In [23]:
# Init simulation batch
batch = SimulationBatch("Diligent Students with domain-level self-efficacy")

In [24]:
# Simulate Students
simulate_students(domain, curric, stus_3, batch)

INFO:learner.decider:Simulating student #0
INFO:simulate.simulation:Starting simulation. Logging student into new session
INFO:tutor.tutor:Completed last unit. No more units in curriculum
INFO:simulate.simulation:Ending simulation. Logging out of active session
INFO:learner.decider:Simulating student #1
INFO:simulate.simulation:Starting simulation. Logging student into new session
INFO:tutor.tutor:Completed last unit. No more units in curriculum
INFO:simulate.simulation:Ending simulation. Logging out of active session
INFO:learner.decider:Simulating student #2
INFO:simulate.simulation:Starting simulation. Logging student into new session
INFO:tutor.tutor:Completed last unit. No more units in curriculum
INFO:simulate.simulation:Ending simulation. Logging out of active session
INFO:learner.decider:Simulating student #3
INFO:simulate.simulation:Starting simulation. Logging student into new session
INFO:tutor.tutor:Completed last unit. No more units in curriculum
INFO:simulate.simulation:E