In [1]:
import pandas as pd
import numpy as np

In [2]:
domain_df = pd.read_csv('domain.csv')
domain_df.fillna(method='ffill', inplace=True)
domain_df.head()

Unnamed: 0,Domain1,Domain2,Domain3
0,다항식,다항식의 연산,다항식의 정리
1,다항식,다항식의 연산,"다항식의 덧셈, 뺄셈"
2,다항식,다항식의 연산,"다항식의 곱셈, 나눗셈"
3,다항식,다항식의 연산,곱셈공식
4,다항식,인수분해,인수분해의 기본 공식


In [3]:
class Treenode:
    def __init__(self, name, parent, level):
        self.name = name
        self.level = level
        self.parent = parent
        self.child = []
        self.aux = 0
        
    def __repr__(self):
        return 'level=' + str(self.level) + ' ' + self.name
    
    
class DomainTree:
    
    def __init__(self, df):
        self.tree = self.construct_tree(df)
    
    @staticmethod
    def construct_tree(df):
        tree = Treenode('Head', None, level=-1)
        for d1 in df.Domain1.unique().tolist():
            d1Node = Treenode(d1, tree, level=0)
            tree.child.append(d1Node)
            for d2 in df.Domain2[df.Domain1 == d1].unique().tolist():
                d2Node = Treenode(d2, d1Node, level=1)
                d1Node.child.append(d2Node)
                for d3 in df.Domain3[df.Domain1 == d1][df.Domain2 == d2].unique().tolist():
                    d3Node = Treenode(d3, d2Node, level=2)
                    d2Node.child.append(d3Node)
        return tree
    
    def gen_random_domain(self):
        dtree = self.tree
        d1 = np.random.randint(len(dtree.child))
        d2 = np.random.randint(len(dtree.child[d1].child))
        d3 = np.random.randint(len(dtree.child[d1].child[d2].child))
        return (dtree.child[d1].name, 
                dtree.child[d1].child[d2].name,
                dtree.child[d1].child[d2].child[d3].name)
    
    def traverse(self, func):
        '''
        level=2인 모든 노드들에 대해 func를 적용한다.
        '''
        
        for d1 in self.tree.child:
            for d2 in d1.child:
                for d3 in d2.child:
                    func(d3)
                    
    def set_aux(self, value):
        '''
        level=2인 모든 노드들의 aux에 value값을 넣어준다.
        '''
        def __set_aux(d):
            d.aux = value
        self.traverse(__set_aux)
        
    def set_aux_if(self, value, func):
        '''
        level=2인 노드들 중 func(node) 반환값이 참인 노드들의 aux에 value값을 넣어준다.
        '''
        def __set_aux_if(d):
            if func(d):
                d.aux = value
        self.traverse(__set_aux_if)
    
    def to_df(self):
        dtree_dict = {'domain1':[], 'domain2':[], 'domain3':[], 'aux':[]}
        dtree = self.tree
        for d1Node in dtree.child:
            for d2Node in d1Node.child:
                for d3Node in d2Node.child:
                    dtree_dict['domain1'].append(d1Node.name)
                    dtree_dict['domain2'].append(d2Node.name)
                    dtree_dict['domain3'].append(d3Node.name)
                    dtree_dict['aux'].append(d3Node.aux)
        return pd.DataFrame(dtree_dict, columns=['domain1', 'domain2', 'domain3', 'aux'])
                    

In [4]:
class Problem:
    
    def __init__(self, pcode, domain, difficulty, error, answer):
        self.pcode = pcode
        self.domain = domain
        self.difficulty = difficulty
        self.error = error
        self.answer = answer
        
        self.depth = 3
        self.complexity = len(domain)
    
    def __repr__(self):
        repr_str = '{pcode=' + str(self.pcode) + '\n'
        repr_str = repr_str + 'domain=' + str(self.domain).replace('),', '),\n\t') + '\n'
        repr_str = repr_str + 'depth=' + str(self.depth) + '\n'
        repr_str = repr_str + 'complexity=' + str(self.complexity) + '\n'
        repr_str = repr_str + 'difficulty=' + str(self.difficulty) + '\n'
        repr_str = repr_str + 'error=' + str(self.error) + '\n'
        repr_str = repr_str + 'answer=' + str(self.answer) + '}\n'
        
        return repr_str
    
    @classmethod
    def gen_random_problem(cls, dtree, pcode=0):
        _complexity = np.random.randint(0, 100)
        if _complexity < 80:
            complexity = 1
        elif _complexity < 90:
            complexity = 2
        else:
            complexity = 3
            
        domain_list = []
        for i in range(complexity):
            domain_list.append(dtree.gen_random_domain())
        
        # error값을 알 수 없으므로 difficulty를 오답률로 사용한다.
        # complexity 올라갈 수록 difficulty올라간다고 가정
        if complexity == 1:
            difficulty = np.random.randint(5, 66)
        elif complexity == 2:
            difficulty = np.random.randint(20, 76)
        elif complexity == 3:
            difficulty = np.random.randint(40, 86)
        
        answer = np.random.randint(1, 5)
        
        return cls(pcode, domain_list, difficulty, 0, answer)


In [5]:
domainTree = DomainTree(domain_df)
NUM_OF_PROBLEMS = 100
                        
problem_list = []
problem_dict = {'pcode':[], 'domain':[], 'depth':[], 'complexity':[], 'difficulty':[], 'error':[], 'answer':[]}
for i in range(NUM_OF_PROBLEMS):
    p = Problem.gen_random_problem(domainTree, i)
    problem_list.append(p)
    problem_dict['pcode'].append(p.pcode)
    problem_dict['domain'].append(p.domain)
    problem_dict['depth'].append(p.depth)
    problem_dict['complexity'].append(p.complexity)
    problem_dict['difficulty'].append(p.difficulty)
    problem_dict['error'].append(p.error)
    problem_dict['answer'].append(p.answer)
    
problem_df = pd.DataFrame(problem_dict, columns=['pcode', 'domain', 'depth', 'complexity', 'difficulty', 'error', 'answer'])
problem_df.head()

Unnamed: 0,pcode,domain,depth,complexity,difficulty,error,answer
0,0,"[(방정식과 부등식, 일차, 이차방정식, 일차방정식의 해법), (집합과 명제, 집합...",3,3,59,0,1
1,1,"[(집합과 명제, 명제의 증명, 절대부등식의 활용)]",3,1,55,0,4
2,2,"[(다항식, 항등식과 미정계수, 다항식의 나눗셈과 항등식)]",3,1,38,0,1
3,3,"[(실수와 허수, 실수, 실수), (경우의 수, 순열과 조합, 순열), (집합과 명...",3,3,80,0,4
4,4,"[(경우의 수, 경우의 수, 경우의 수)]",3,1,29,0,1


In [7]:
class PSAgent:
    '''
    1. 도메인 별 정답률
    2. self report 컨셉
    3. difficulty 반영
    '''    
    def __init__(self):
        self.p_domain = DomainTree(domain_df).to_df()
        self.p_domain.rename(columns={'aux': 'correct'}, inplace=True)
        self.p_domain['report1'] = self.p_domain['correct'].copy()
        self.p_domain['report2'] = self.p_domain['correct'].copy()
        self.p_domain['report3'] = self.p_domain['correct'].copy()
        
        self.complexity_weak = 0.9  # complexity 1 올라갈 때 감소하는 정답률
        

    def set_p_domain(self, domain1_name: str, values: tuple):
        '''
        domain1_name: e.g. '다항식', type=str
        values:  e.g. (0.8, 0.4, 0.5, 0.3), type=tuple (or list)
        
        domain1에 domain1_name 글자가 있으면 correct, report1, report2, report3
        를 value로 설정하는 함수
        '''
        filtered_rows = self.p_domain['domain1'].str.contains(domain1_name)
        self.p_domain.loc[filtered_rows, 'correct'] = values[0]
        self.p_domain.loc[filtered_rows, 'report1'] = values[1]
        self.p_domain.loc[filtered_rows, 'report2'] = values[2]
        self.p_domain.loc[filtered_rows, 'report3'] = values[3]
        
    def solve(problem):
        pass
        
    

In [8]:
agent = PSAgent()
agent.set_p_domain('다항식', (0.1, 0.2, 0.3, 0.4))
agent.p_domain

Unnamed: 0,domain1,domain2,domain3,correct,report1,report2,report3
0,다항식,다항식의 연산,다항식의 정리,0.1,0.2,0.3,0.4
1,다항식,다항식의 연산,"다항식의 덧셈, 뺄셈",0.1,0.2,0.3,0.4
2,다항식,다항식의 연산,"다항식의 곱셈, 나눗셈",0.1,0.2,0.3,0.4
3,다항식,다항식의 연산,곱셈공식,0.1,0.2,0.3,0.4
4,다항식,인수분해,인수분해의 기본 공식,0.1,0.2,0.3,0.4
5,다항식,인수분해,기본 공식의 활용,0.1,0.2,0.3,0.4
6,다항식,인수분해,다항식의 최대공약수와 최소공배수,0.1,0.2,0.3,0.4
7,다항식,항등식과 미정계수,항등식의 성질과 미정계수법,0.1,0.2,0.3,0.4
8,다항식,항등식과 미정계수,다항식의 나눗셈과 항등식,0.1,0.2,0.3,0.4
9,다항식,나머지 정리,나머지 정리,0.1,0.2,0.3,0.4
