Here we extract some useful rules from the Student evaluations dataset.

In [4]:
data_file = "../data_sets/StudentEvaluations.csv"

import pandas as pd
import numpy as np
df = pd.read_csv(data_file)

We will convert all attributes except age to categorical.

In [5]:
# Class size to bins
bins = [0, 15, 30, 50, 100, np.inf]
names = [ '<15', '15-30',  '30-50', '50-100', '100+']

df['cls_students'] = pd.cut(df['cls_students'], bins, labels=names)

# Beauty average to bins
bins = [0, 3, 6, 9, np.inf]
names = [ '<3', '3-6',  '6-9', '9+']

df['bty_avg'] = pd.cut(df['bty_avg'], bins, labels=names)

# Professor score to bins
bins = [0, 2, 3, 4, 4.5, np.inf]
names = [ '<2', '2-3',  '3-4', '4-4.5', '4.5+']

df['prof_eval'] = pd.cut(df['prof_eval'], bins, labels=names)

# Course score to class label
bins = [0, 2, 3, 4, 4.5, np.inf]
names = [ 'bad', 'fair',  'average', 'good', 'excellent']

df['course_eval'] = pd.cut(df['course_eval'], bins, labels=names)

If we are not using the rules for classification, we can process each class label separately.

In [6]:
columns_list = df.columns.to_numpy().tolist()

class_labels = df[columns_list[-1]].unique()

print(class_labels)

[good, average, fair, excellent]
Categories (4, object): [fair < average < good < excellent]


In [9]:
from rule_learner import *
from operator import attrgetter

for class_label in class_labels:    
    rules = learn_rules(columns_list, df, [class_label], 30, 0.6)
    print()
    print("--- rules for class %s ---" % (class_label))

    # sort rules by accuracy descending
    rules.sort(key=attrgetter('accuracy', 'coverage'), reverse=True)
    for rule in rules[:20]:
        print(rule)

--- rules for class good ---
If [prof_eval=4-4.5, age>=54, cls_level=upper, language=english, ethnicity=not minority] then good. Coverage:32, accuracy: 0.75
If [prof_eval=4-4.5, cls_level=lower, age<50, ethnicity=not minority, language=english] then good. Coverage:31, accuracy: 0.6451612903225806
If [prof_eval=4-4.5, cls_students=15-30, language=english, age>=37] then good. Coverage:31, accuracy: 0.6451612903225806
--- rules for class average ---
If [prof_eval=3-4, cls_students=50-100, age<62, language=english] then average. Coverage:31, accuracy: 0.967741935483871
If [prof_eval=3-4, rank=tenured, cls_students=15-30, age>=40] then average. Coverage:30, accuracy: 0.9666666666666667
If [prof_eval=3-4, gender=female, age<57, language=english] then average. Coverage:32, accuracy: 0.9375
If [prof_eval=3-4, bty_avg=3-6, ethnicity=not minority, age<63] then average. Coverage:32, accuracy: 0.9375
If [prof_eval=3-4, age>=29] then average. Coverage:32, accuracy: 0.75
If [prof_eval=4-4.5, cls_stu