# Rule Based Classifier

In [39]:
import numpy as np
import re

In [40]:

def is_number(s):
    try:
        complex(s) # for int, long, float and complex
    except ValueError:
        return False

    return True
def extract_operator(q,operands):
    for operand in operands:
        q=q.replace(operand,"")
    return q

def numericalize_table(records):
    records = records.copy()
    str_to_num ={}
    number=0
    for i in range(len(records[0])):
        for ele in set(records[:,i]):
            if not is_number(ele):
                str_to_num[ele] = number
                number+=1

    for i,row in enumerate(records) :
        for j, col in enumerate(row) :
            if not is_number(col):
                records[i][j] = int(str_to_num[col])
    records=records.astype(float)
    
    return records,str_to_num


def select_row(col_idx, col_val,operator,records):
    selected_records=None
    if operator == "==":
        selected_records = records[records[:,col_idx] == col_val]
    elif operator == "<=":
        selected_records = records[records[:,col_idx] <= col_val]
    elif operator == "<":
        selected_records = records[records[:,col_idx] < col_val]
    elif operator == ">":
        selected_records = records[records[:,col_idx] > col_val]
    elif operator == ">=":
        selected_records = records[records[:,col_idx] >= col_val]
    return selected_records

def organize_query(q,cols,str_to_num):
    q= q.replace(" ", "")
    operands = re.split("[\=\<\<=\>\>=]+", q)
    operator = extract_operator(q,operands)
    col_idx = cols.index(operands[0])
    col_val = operands[1]
    if not is_number(col_val):
        col_val = str_to_num[col_val]
    col_val = float(col_val)
    return col_idx,col_val,operator

def rule_base_classifier(cols,records,query):
    records = records.copy()
    str_to_num = {}
    number = 0

    records, str_to_num =numericalize_table(records)

    for_coverage = records.copy()
    for_accuracy = records.copy()
    
    for q in query[:-1]:
        col_idx, col_val, operator = organize_query(q,cols,str_to_num)
        for_coverage = select_row(organize_query(q,cols,str_to_num),for_coverage)
        
    for q in query:
        col_idx, col_val, operator = organize_query(q,cols,str_to_num)
        for_accuracy = select_row(col_idx, col_val,operator,for_accuracy)
    
    
    #print(for_coverage)
    #print(for_accuracy)
    print(query)
    print("Coverage = {}/{} = {}".format(len(for_coverage),len(records),round(len(for_coverage)/len(records),3)))       
    if len(for_coverage) :
        print("Accuracy = {}/{} = {}".format(len(for_accuracy),len(for_coverage),round(len(for_accuracy)/len(for_coverage),3)))
    else :
        print("Accuracy = 0/0")
    print("\n")
            

# Usage

# Exercise-9 Case

<img src="images/rule_based.png" />

In [41]:

Cols = ["Outlook","Temp","Humidity","Windy", "Play"]
Records = np.array([
    ["sunny",85,85,"false","No"],
    ["sunny",80,90,"true","No"],
    ["overcast",83,78,"false","Yes"],
    ["rain",70,96,"false","Yes"],
    ["rain",68,80,"false","Yes"],
    ["rain",65,70,"true","No"],
    ["overcast",64,65,"true","Yes"],
    ["sunny",72,95,"false","No"],
    ["sunny",69,70,"false","Yes"],
    ["rain",75,80,"false","Yes"],
    ["sunny",75,70,"true","Yes"],
    ["overcast",72,90,"true","Yes"],
    ["overcast",81,75,"false","Yes"],
    ["rain",71,80,"true","No"],
])




<img src="images/rule_based_sol.png" />

In [42]:
#important notice
#case sensitive

#table's r4 and r5 seem wrong
#for r4 there should be three records
#["rain",70,96,"false","Yes"],
#["rain",68,80,"false","Yes"],
#["rain",75,80,"false","Yes"],
#for r5 none


Query = ["Outlook==sunny","Humidity<=75","Play==Yes"]
rule_base_classifier(Cols,Records,Query)

Query = ["Outlook==sunny","Humidity>75","Play==No"]
rule_base_classifier(Cols,Records,Query)

Query = ["Outlook==overcast","Play==Yes"]
rule_base_classifier(Cols,Records,Query)

Query = ["Outlook==rain","Windy==false","Temp<=75","Play==Yes"]
rule_base_classifier(Cols,Records,Query)

Query = ["Outlook==rain","Windy==false","Temp>75","Play==No"]
rule_base_classifier(Cols,Records,Query)

Query = ["Outlook==rain","Windy==true","Play==No"]
rule_base_classifier(Cols,Records,Query)


TypeError: select_row() missing 2 required positional arguments: 'operator' and 'records'

# Trial Exam Case

<img src="images/rule_based_exam.png" />

In [None]:

Cols = ["Age","Income","Gender","Class"]
Records = np.array([
    [22,20,"male","Yes"],
    [33,70,"female","No"],
    [42,100,"male","Yes"],
    [28,75,"male","Yes"],
    [31,90,"female","No"],
    [17,5,"male","No"],
    [54,120,"female","No"],
    [40,200,"female","Yes"],
    [25,60,"male","Yes"],
    [23,55,"male","No"],
])

Query = ["Age<=40","Gender==male","Class==Yes"]
rule_base_classifier(Cols,Records,Query)

