In [1]:
%load_ext autoreload
%autoreload 2
from crew.models import *
from crew.util import *
from crew.analysis import *
import pandas as pd
import numpy as np
import scipy as sp
import scipy.stats

In [2]:
from crew.forms import * 

In [10]:
def show_form_errors(form):
    print(form.errors)
    for name in form.fields:
        print(name, form.fields[name].errors)

In [3]:
form = AnalysisSegForm({
        'semester': 1,
        'exam': 1,
        'category': 'L',
        'grade': 1,
        'school_props':['BB W', 'BB Y', 'BX W', 'BX Y'],
        'show_total': 1,
        'show_subjects': [1,3],
        })
form.is_valid()
form.cleaned_data

{'category': 'L',
 'exam': <Exam: 一考>,
 'grade': 1,
 'school_props': [['BB', 'W'], ['BB', 'Y'], ['BX', 'W'], ['BX', 'Y']],
 'semester': <Semester: 2015上>,
 'show_subjects': [<Subject: 语文>, <Subject: 物理>],
 'show_total': True}

In [14]:
form = AnalysisAvgForm({
        'semester': 1,
        'exam': 1,
        'category': 'L',
        'grade': 1,
        'school_props':['BB W', 'BB Y', 'BX W', 'BX Y'],
        'show_subjects': [1,3],
        })
print(form.is_valid())
form.cleaned_data
aa = AverageAnalysis(form)
res_df = aa.get_df()

True


In [57]:
form = AnalysisAvgCmpForm({
        'semester': 1,
        'exam': 1,
        'category': 'L',
        'grade': 1,
        'school_props':['BB W', 'BB Y', 'BX W', 'BX Y'],
        'show_subjects': [1,2, 3],
        'semester_cmp': 1,
        'exam_cmp': 2,
        })
print(form.is_valid())
print(form.cleaned_data)
aa = AverageCmpAnalysis(form)
res_df, res_df_cmp = aa.get_df()

True
{'semester_cmp': <Semester: 2015上>, 'exam_cmp': <Exam: 二考>, 'school_props': [['BB', 'W'], ['BB', 'Y'], ['BX', 'W'], ['BX', 'Y']], 'grade': 1, 'category': 'L', 'semester': <Semester: 2015上>, 'exam': <Exam: 一考>, 'show_subjects': [<Subject: 语文>, <Subject: 数学>, <Subject: 物理>]}
语文
数学
物理
总分
语文
数学
物理
总分


In [11]:
def get_subject_pk(name):
    return Subject.objects.get(name=name).pk
def get_level_forms():
    form = AnalysisLevelForm({
            'semester': 1,
        'exam': 1,
        'category': 'L',
        'grade': 1,
        'school_props':['BB W', 'BB Y', 'BX W', 'BX Y'],
        'show_subjects': [1,2, 3],
        })
    rank_form = LevelRankForm({
            'level_a_rank':50, 'level_b_rank': 120, 'use_rank': False 
        })
    score_formset = LevelSubjectScoreFormSet({
            'form-TOTAL_FORMS': '3', 'form-INITIAL_FORMS': '3', 'form-MAX_NUM_FORMS': '3',
            'form-0-level_a_score': 120, 'form-0-level_b_score': 100, 'form-0-subject_name':'语文', 'form-0-subject_pk': get_subject_pk('语文'),
            'form-1-level_a_score': 130, 'form-1-level_b_score': 110, 'form-1-subject_name':'数学', 'form-1-subject_pk': get_subject_pk('数学'),
            'form-2-level_a_score': 60, 'form-2-level_b_score': 40, 'form-2-subject_name':'物理', 'form-2-subject_pk': get_subject_pk('物理'),
            'form-3-level_a_score': 300, 'form-3-level_b_score': 200, 'form-3-subject_name':'总分', 'form-3-subject_pk': -1,
        })
    return form, rank_form, score_formset
form, rank_form, score_formset = get_level_forms()
print(form.is_valid(), rank_form.is_valid(), score_formset.is_valid())
print(rank_form.cleaned_data)

True True True
{'level_b_rank': 120, 'use_rank': 'False', 'level_a_rank': 50}


In [4]:
aa = LevelAnalysis(form, rank_form, score_formset)
aa.get_df_list()

[     school grade  class_idx  (level_a_count, 总分)  (level_a_ratio, 总分)  \
 0  博学(0657)  高一年级          1                 19.0             0.301587   
 1  博学(0657)  高一年级          2                 10.0             0.294118   
 2  博白(0646)  高一年级          1                  6.0             0.285714   
 3  博白(0646)  高一年级          2                  7.0             0.269231   
 4  博白(0646)  高一年级          3                  9.0             0.169811   
 
    (level_b_count, 总分)  (level_b_ratio, 总分)  (max_score, 总分)  (mean, 总分)  \
 0                 43.0             0.682540            335.0  252.888889   
 1                 20.0             0.588235            328.0  244.470588   
 2                 13.0             0.619048            327.0  246.952381   
 3                 16.0             0.615385            320.0  249.230769   
 4                 32.0             0.603774            312.0  241.641509   
 
    (mean_diff, 总分)  (total_count, 总分)  (valid_count, 总分)  (level_a_rank, 总分)  \
 0 

In [7]:
df_list = aa.get_df_list()
agg_list = aa.get_agg_list()

In [10]:
agg_list[0].to_dict()

{('level_a_count', '总分'): 51.0,
 ('level_a_ratio', '总分'): 0.25888324873096447,
 ('level_b_count', '总分'): 124.0,
 ('level_b_ratio', '总分'): 0.62944162436548223,
 ('max_score', '总分'): 335.0,
 ('mean', '总分'): 247.29441624365481,
 ('mean_diff', '总分'): 247.29441624365481,
 ('total_count', '总分'): 199.0,
 ('valid_count', '总分'): 197.0}

In [21]:
d.columns = list(zip(d.columns.get_level_values(0), d.columns.get_level_values(1)))

In [22]:
d.columns

Index([         ('school', ''),           ('grade', ''),
             ('class_idx', ''), ('level_a_count', '物理'),
       ('level_a_ratio', '物理'), ('level_b_count', '物理'),
       ('level_b_ratio', '物理'),     ('max_score', '物理'),
                ('mean', '物理'),     ('mean_diff', '物理'),
         ('total_count', '物理'),   ('valid_count', '物理'),
        ('level_a_rank', '物理'),  ('level_b_rank', '物理'),
                    ('物理', '')],
      dtype='object')

In [17]:
from crew.analysis.util import GROUP_KEYS
df = record_df.copy()



def group_stat(df, subject, mean):
    score_series = df['score', subject]
    rank_series = df['rank', subject]
    group_mean = score_series.mean()
    level_a_count = df['is_level_a', subject].sum()
    level_b_count = df['is_level_b', subject].sum()
    valid_count = score_series.count()
    return pd.Series({
            ('valid_count', subject): valid_count,
            ('total_count', subject): len(df),
            ('mean', subject): group_mean,
            ('mean_diff', subject): group_mean - mean,
            ('max', subject): score_series.max(),
            ('level_a_count', subject): level_a_count,
            ('level_a_ratio', subject): level_a_count/valid_count,
            ('level_b_count', subject): level_b_count,
            ('level_b_ratio', subject): level_a_count/valid_count,
        })

subject_df_list = []
for subject in [s.name for s in aa.show_subjects]+['总分']:
    df['rank', subject] = rank_series(df['score', subject])
    df['is_level_a', subject] = df['rank', subject]>=aa.settings['level_a_rank'] if aa.settings['use_rank'] else df['score', subject]>=aa.settings[subject]['level_a_score']
    df['is_level_b', subject] = df['rank', subject]>=aa.settings['level_b_rank'] if aa.settings['use_rank'] else df['score', subject]>=aa.settings[subject]['level_b_score']
    df['level', subject] = 
    mean = df['score', subject].mean()
    subject_df = df.groupby(GROUP_KEYS).apply(group_stat, subject=subject, mean=mean)


In [20]:
np.logical_and(df['rank', subject]<aa.settings['level_a_rank'], df['rank', subject]>=aa.settings['level_b_rank'])

1      False
2      False
3      False
4      False
5      False
6      False
7      False
8      False
9      False
10     False
11     False
12     False
13     False
14     False
15     False
16     False
17     False
18     False
19     False
20     False
21     False
22     False
23     False
24     False
25     False
26     False
27     False
28     False
29     False
30     False
       ...  
170    False
171    False
172    False
173    False
174    False
175    False
176    False
177    False
178    False
179    False
180    False
181    False
182    False
183    False
184    False
185    False
186    False
187    False
188    False
189    False
190    False
191    False
192    False
193    False
194    False
195    False
196    False
197    False
198    False
199    False
Name: (rank, 总分), dtype: bool

[<Subject: 语文>, <Subject: 数学>, <Subject: 物理>]

In [37]:
teacher_df.columns

Index(['数学', '班主任', '语文'], dtype='object', name='subject')

In [7]:
aa.get_df_list()

KeyError: '物理'