## Setup

In [3]:
import sys
sys.path.append("../lib")

In [4]:
import math
import random
import uuid
import os
import copy
from collections.abc import Iterable
from datetime import datetime as dt

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.optimize import Bounds
from scipy.optimize import minimize
from scipy import optimize

import statsmodels.api as sm

from scipy.stats import pearsonr


In [5]:
import logging

#logging.basicConfig(level=logging.DEBUG)
logging.basicConfig(level=logging.INFO)
#logging.basicConfig(level=logging.WARNING)

logger = logging.getLogger("main")

In [6]:
#logging.getLogger().setLevel(logging.DEBUG)
logging.getLogger().setLevel(logging.INFO)
#logging.getLogger().setLevel(logging.WARNING)
logger.debug("Test debug")
logger.info("Test info")
logger.warning("Test warning")

INFO:main:Test info


In [7]:
from tutor.domain import Domain
from tutor.curriculum_factory import CurriculumFactory
from tutor.simple_curriculum import SimpleCurriculum
from tutor.tutor import SimpleTutor
from tutor.action import Attempt, HintRequest

from learner.selfeff_learner import SelfEfficacyLearner
from learner.modular_learner import ModularLearner
from learner.binary_skill_cog import BinarySkillCognition
from learner.decider import *

from simulate.modlearner_simulation import ModLearnerSimulation
from simulate.simulation import SimulationBatch

from analytics.student_stats import StudentStatCalc

from log_db import mongo
from log_db.curriculum_mapper import DB_Curriculum_Mapper

In [8]:
# Get path to current project directory
cwd = os.path.abspath(".")
base_dir = os.path.abspath(os.path.join(cwd, os.pardir))
logger.debug("Base directory for the project:\n%s" % base_dir)

In [9]:
# Setup connection to database
data_out = "sim-%s" % str(uuid.uuid4())
data_path = os.path.join(base_dir,"test", "data", data_out)
logger.info("Writing simulation results to directory: %s" % data_path)
db_name = "motivsim"
db_params  = mongo.get_db_params(db_name)
logger.info("got db params: %s" % str(db_params))
db_util = mongo.Data_Utility(data_path, db_params)
db = db_util.db

INFO:learner.decider:Writing simulation results to directory: /rdata/Sandbox/MotivSim/test/data/sim-770ad4bf-8ad8-4d3a-9f37-faf7f256163e
INFO:learner.decider:got db params: {'settingId': 'motivsim', 'url': 'localhost', 'port': '27017', 'name': 'motivsim', 'user': '', 'pswd': ''}


In [10]:
# Test db connection
db_util.peak()

INFO:log_db.mongo:collection name, kcs, has 54782 documents
INFO:log_db.mongo:collection name, simbatches, has 0 documents
INFO:log_db.mongo:collection name, domains, has 4 documents
INFO:log_db.mongo:collection name, students, has 0 documents
INFO:log_db.mongo:collection name, units, has 2222 documents
INFO:log_db.mongo:collection name, decisions, has 0 documents
INFO:log_db.mongo:collection name, curriculums, has 4 documents
INFO:log_db.mongo:collection name, steps, has 17687097 documents
INFO:log_db.mongo:collection name, finalsimstudents, has 0 documents
INFO:log_db.mongo:collection name, problems, has 5854901 documents
INFO:log_db.mongo:collection name, actions, has 0 documents
INFO:log_db.mongo:collection name, sections, has 9097 documents
INFO:log_db.mongo:collection name, tutor_events, has 0 documents


In [9]:
logger.info("Clearing database before starting new simulation")
#db_util.clear_db()

INFO:learner.decider:Clearing database before starting new simulation
INFO:log_db.mongo:Clearing all documents from collection kcs
INFO:log_db.mongo:Clearing all documents from collection simbatches
INFO:log_db.mongo:Clearing all documents from collection domains
INFO:log_db.mongo:Clearing all documents from collection students
INFO:log_db.mongo:Clearing all documents from collection units
INFO:log_db.mongo:Clearing all documents from collection decisions
INFO:log_db.mongo:Clearing all documents from collection curriculums
INFO:log_db.mongo:Clearing all documents from collection steps
INFO:log_db.mongo:Clearing all documents from collection finalsimstudents
INFO:log_db.mongo:Clearing all documents from collection problems
INFO:log_db.mongo:Clearing all documents from collection actions
INFO:log_db.mongo:Clearing all documents from collection sections
INFO:log_db.mongo:Clearing all documents from collection tutor_events


## 1. Simulating learners

In [9]:
num_students = 100

In [10]:
def simulate_students(domain, curric, students, batch):    
    for i, stu in enumerate(students):
        logger.info("Simulating student #%i" % i)
        sim = ModLearnerSimulation(domain, curric, stu)
        batch.add_sim(sim)
        sim.run()

### Generate Curriculum

In [49]:
domain_params = {'m_l0': 0.45,
                 'sd_l0': 0.155,
                 'm_t': 0.25,
                 'sd_t': 0.13,#0.03,
                 'm_s': 0.155,
                 'sd_s': 0.055,
                 'm_g': 0.15,#0.6,
                 'sd_g': 0.105
}
curric_params = {'num_units': 2000,
                 'mean_sections': 4,
                 'stdev_sections': 2,
                 'mean_unit_kcs': 22,
                 'stdev_unit_kcs': 23,
                 'section_kcs_lambda': 6,
                 'mean_steps': 10,
                 'stdev_steps': 4,
                 'mean_prob_kcs': 6,
                 'stdev_prob_kcs': 3,
                 'num_practice': 400
}

In [None]:
domain, curric = CurriculumFactory.gen_curriculum(domain_params, curric_params)
db.domains.insert_one(domain.to_dict())
db.kcs.insert_many([kc.__dict__ for kc in domain.kcs])
curric_util = DB_Curriculum_Mapper(db_params)
curric_util.write_to_db(curric)

INFO:tutor.cogtutor_curriculum:Generated 2000 units with with a total of 49514 kcs
INFO:log_db.curriculum_mapper:Writing curriculum with id, ffc30713-7a00-4278-94a2-77a5f658e7cc, to db
INFO:log_db.curriculum_mapper:Writing 5290491 problem to db
INFO:log_db.curriculum_mapper:Writing 21457710 steps to db


## Test

In [11]:
results = db.steps.find(batch_size=10)


In [12]:
doc = results.next()
print(doc)

{'_id': 'a6f4e1db-c736-4833-a31e-010929722901', 'domain_id': '27a5878e-7b67-4f48-b62f-60ad51e3e44d', 'curric_id': '8e8e1448-8d65-49dc-a5bc-dd80e7d19489', 'unit_id': 'e227e022-6f39-4bac-851f-bf264f6319cc', 'section_id': '578d68fd-1b8a-4585-91ce-014413f6fbfd', 'prob_id': '32db457c-90d4-4754-b90f-89703a018857', 'kcs': ['7e1bb892-f299-4510-8101-d0b2a325fbe5'], 'hints_avail': 3, 'm_time': 11, 'sd_time': 3}


In [9]:
db_col = db.steps
batch_size=30000
fields = ["unit_id", "_id"]

In [10]:
d = db_col.find()
i = 0
subset = []
frames = []
start = dt.now()
loop_start = dt.now()
for doc in d:
    subset.append(doc)
    if len(subset) == batch_size:
        frames.append(pd.DataFrame(subset).loc[:, fields])
        logger.debug(f"number of subset frames: {len(frames)}")
        subset = []  
        loop_end = dt.now()
        loop_time = (loop_end - loop_start).total_seconds()
        logger.debug(f"Loop time: {loop_time}")
        loop_start = dt.now()

all_data = pd.concat(frames, axis=0)
end = dt.now()
time = (end - start).total_seconds()
logger.info(f"Loop time: {time}")
logger.info(f"shape of data: {all_data.shape}")


INFO:learner.decider:Loop time: 167.755521
INFO:learner.decider:shape of data: (17670000, 2)


In [1]:
start = dt.now()
d = pd.DataFrame(db_col.find()).loc[:,fields]
end = dt.now()
time = (end - start).total_seconds()
logger.info(f"Total time: {time}")
logger.info(f"data shape: {d.shape}")

NameError: name 'dt' is not defined

In [22]:
for result in results:
    print(result)
#    d = pd.Series(result)
 #   print(d.shape)

{'_id': '5fd18634-ff86-47e1-b091-a84d70b4a572', 'domain_id': '27a5878e-7b67-4f48-b62f-60ad51e3e44d', 'pl0': 0.22425012062275157, 'pt': 0.3255077059387803, 'ps': 0.1941964595395631, 'pg': 0.006928364686691141, 'm_time': 16.331502857292428, 'sd_time': 4.082875714323107}
{'_id': '2a2ae82c-37e8-40ea-9a21-e23308716d0f', 'domain_id': '27a5878e-7b67-4f48-b62f-60ad51e3e44d', 'pl0': 0.27815690491235945, 'pt': 0.32137828519841105, 'ps': 0.23157394083439503, 'pg': 0.1458800326947557, 'm_time': 8.074861128313358, 'sd_time': 2.0187152820783396}
{'_id': 'cddbccfe-f0ea-4e39-8732-a1884f98467b', 'domain_id': '27a5878e-7b67-4f48-b62f-60ad51e3e44d', 'pl0': 0.5705319160573025, 'pt': 0.5409362301415263, 'ps': 0.15228669908943124, 'pg': 0.20201680991929477, 'm_time': 12.406695482070008, 'sd_time': 3.101673870517502}
{'_id': '1cbee65c-746b-4f13-937c-83fc1b2eb1c1', 'domain_id': '27a5878e-7b67-4f48-b62f-60ad51e3e44d', 'pl0': 0.56554436003508, 'pt': 0.01, 'ps': 0.17303112202351784, 'pg': 0.09116299274217804, 'm