# Data processing

In [1]:
import pandas as pd

In [2]:
pd.set_option('max_rows', 100)

In [3]:
df = pd.read_csv(
    'https://query.data.world/s/x7o2hyszg2n6sm3uupohr3xgurgmfe',
    header=None,
    names=[
           'class', 
           'cap_shape',
           'cap_surface', 
           'cap_color', 
           'bruises',
           'odor',
           'gill_attachment',
           'gill_spacing',
           'gill_size', 
           'gill_color', 
           'stalk_shape', 
           'stalk_root', 
           'stalk_surface_above_ring', 
           'stalk_surface_below_ring', 
           'stalk_color_above_ring', 
           'stalk_color_below_ring', 
           'veil_type',
           'veil_color',
           'ring_number', 
           'ring_type',
           'spore_print_color', 
           'population',
           'habitat'
    ]
)

In [4]:
class_mapping = {
    'p': 'poisonous',
    'e': 'edible'
}

# 1. cap-shape: bell=b,conical=c,convex=x,flat=f, knobbed=k,sunken=s 
cap_shape_mapping = {    
    'b': 'bell',
    'c': 'conical',
    'x': 'convex',
    'f': 'flat',
    'k': 'knobbed',
    's': 'sunken'
}

# 2. cap-surface: fibrous=f,grooves=g,scaly=y,smooth=s 
cap_surface_mapping = {
    'f': 'fibrous',
    'g': 'grooves',
    'y': 'scaly',
    's': 'smooth'
}

# 3. cap-color: brown=n,buff=b,cinnamon=c,gray=g,green=r, pink=p,purple=u,red=e,white=w,yellow=y
cap_color_mapping = {
    'n': 'brown', 
    'b': 'buff',
    'c': 'cinnamon', 
    'g': 'gray',
    'r': 'green',
    'p': 'pink', 
    'u': 'purple',
    'e': 'red',
    'w': 'white',
    'y': 'yellow'
}

# 4. bruises?: bruises=t,no=f 
bruises_mapping = {
    't': True,
    'f': False
}

# 5. odor: almond=a,anise=l,creosote=c,fishy=y,foul=f, musty=m,none=n,pungent=p,spicy=s 
odor_mapping = {
    'a': 'almond',
    'l': 'anise', 
    'c': 'creosote', 
    'y': 'fishy',
    'f': 'foul',
    'm': 'musty',
    'n': 'none',
    'p': 'pungent',
    's': 'spicy'
}

# 6. gill-attachment: attached=a,descending=d,free=f,notched=n 
gill_attachment_mapping = {
    'a': 'attached',
    'd': 'descending',
    'f': 'free',
    'n': 'notched'
}

# 7. gill-spacing: close=c,crowded=w,distant=d 
gill_spacing_mapping = {
    'c': 'close',
    'w': 'crowded',
    'd': 'distant'
}

# 8. gill-size: broad=b,narrow=n 
gill_size_mapping = {
    'b': 'broad',
    'n': 'narrow'
}

# 9. gill-color: black=k,brown=n,buff=b,chocolate=h,gray=g, green=r,orange=o,pink=p,purple=u,red=e, white=w,yellow=y 
gill_color_mapping = {
    'k': 'black',
    'n': 'brown',
    'b': 'buff',
    'h': 'chocolate',
    'g': 'gray',
    'r': 'green',
    'o': 'orange',
    'p': 'pink', 
    'u': 'purple',
    'e': 'red',
    'w': 'white', 
    'y': 'yellow'
}

# 10. stalk-shape: enlarging=e,tapering=t 
stalk_shape_mapping = {
    'e': 'enlarging',
    't': 'tapering'
}

# 11. stalk-root: bulbous=b,club=c,cup=u,equal=e, rhizomorphs=z,rooted=r,missing=? 
stalk_root_mapping = {
    'b': 'bulbous',
    'c': 'club',
    'u': 'cup', 
    'e': 'equal',
    'z': 'rhizomorphs', 
    'r': 'rooted',
    '?': 'missing'
}

# 12. stalk-surface-above-ring: fibrous=f,scaly=y,silky=k,smooth=s 
stalk_surface_above_ring_mapping = {
    'f': 'fibrous',
    'y': 'scaly',
    'k': 'silky', 
    's': 'smooth'
}

# 13. stalk-surface-below-ring: fibrous=f,scaly=y,silky=k,smooth=s 
stalk_surface_below_ring_mapping = {
    'f': 'fibrous',
    'y': 'scaly',
    'k': 'silky', 
    's': 'smooth'
}

# 14. stalk-color-above-ring: brown=n,buff=b,cinnamon=c,gray=g,orange=o, pink=p,red=e,white=w,yellow=y 
stalk_color_above_ring_mapping = {
    'n': 'brown',
    'b': 'buff',
    'c': 'cinnamon', 
    'g': 'gray',
    'o': 'orange',
    'p': 'pink',
    'e': 'red',
    'w': 'white',
    'y': 'yellow'
}

# 15. stalk-color-below-ring: brown=n,buff=b,cinnamon=c,gray=g,orange=o, pink=p,red=e,white=w,yellow=y 
stalk_color_below_ring_mapping = {
    'n': 'brown',
    'b': 'buff',
    'c': 'cinnamon', 
    'g': 'gray',
    'o': 'orange',
    'p': 'pink',
    'e': 'red',
    'w': 'white',
    'y': 'yellow'
}

# 16. veil-type: partial=p,universal=u
veil_type_mapping = {
    'p': 'partial',
    'u': 'universal'
}

# 17. veil-color: brown=n,orange=o,white=w,yellow=y
veil_color_mapping = {
    'n': 'brown',
    'o': 'orange',
    'w': 'white',
    'y': 'yellow'
}

# 18. ring-number: none=n,one=o,two=t
ring_number_mapping = {
    'n': 0,
    'o': 1,
    't': 2
}

# 19. ring-type: cobwebby=c,evanescent=e,flaring=f,large=l, none=n,pendant=p,sheathing=s,zone=z
ring_type_mapping = {
    'c': 'cobwebby',
    'e': 'evanescent',
    'f': 'flaring',
    'l': 'large',
    'n': 'none',
    'p': 'pendant',
    's': 'sheathing',
    'z': 'zone'
}

# 20. spore-print-color: black=k,brown=n,buff=b,chocolate=h,green=r, orange=o,purple=u,white=w,yellow=y
spore_print_color_mapping = {
    'k': 'black',
    'n': 'brown',
    'b': 'buff',
    'h': 'chocolate',
    'r': 'green',
    'o': 'orange',
    'u': 'purple',
    'w': 'white',
    'y': 'yellow'
}

# 21. population: abundant=a,clustered=c,numerous=n, scattered=s,several=v,solitary=y
population_mapping = {
    'a': 'abundant',
    'c': 'clustered',
    'n': 'numerous',
    's': 'scattered',
    'v': 'several',
    'y': 'solitary'
}

# 22. habitat: grasses=g,leaves=l,meadows=m,paths=p, urban=u,waste=w,woods=d
habitat_mapping = {
    'g': 'grasses',
    'l': 'leaves',
    'm': 'meadows',
    'p': 'paths',
    'u': 'urban',
    'w': 'waste',
    'd': 'woods'
}

df['class'] = [class_mapping[value] for value in df['class']]
df['cap_shape'] = [cap_shape_mapping[value] for value in df['cap_shape']]
df['cap_surface'] = [cap_surface_mapping[value] for value in df['cap_surface']]
df['cap_color'] = [cap_color_mapping[value] for value in df['cap_color']]
df['bruises'] = [bruises_mapping[value] for value in df['bruises']]
df['odor'] = [odor_mapping[value] for value in df['odor']]
df['gill_attachment'] = [gill_attachment_mapping[value] for value in df['gill_attachment']]
df['gill_spacing'] = [gill_spacing_mapping[value] for value in df['gill_spacing']]
df['gill_size'] = [gill_size_mapping[value] for value in df['gill_size']]
df['gill_color'] = [gill_color_mapping[value] for value in df['gill_color']]
df['stalk_shape'] = [stalk_shape_mapping[value] for value in df['stalk_shape']]
df['stalk_root'] = [stalk_root_mapping[value] for value in df['stalk_root']]
df['stalk_surface_above_ring'] = [stalk_surface_above_ring_mapping[value] for value in df['stalk_surface_above_ring']]
df['stalk_surface_below_ring'] = [stalk_surface_below_ring_mapping[value] for value in df['stalk_surface_below_ring']]
df['stalk_color_above_ring'] = [stalk_color_above_ring_mapping[value] for value in df['stalk_color_above_ring']]
df['stalk_color_below_ring'] = [stalk_color_below_ring_mapping[value] for value in df['stalk_color_below_ring']]
df['veil_type'] = [veil_type_mapping[value] for value in df['veil_type']]
df['veil_color'] = [veil_color_mapping[value] for value in df['veil_color']]
df['ring_number'] = [ring_number_mapping[value] for value in df['ring_number']]
df['ring_type'] = [ring_type_mapping[value] for value in df['ring_type']]
df['spore_print_color'] = [spore_print_color_mapping[value] for value in df['spore_print_color']]
df['population'] = [population_mapping[value] for value in df['population']]
df['habitat'] = [habitat_mapping[value] for value in df['habitat']]

df

Unnamed: 0,class,cap_shape,cap_surface,cap_color,bruises,odor,gill_attachment,gill_spacing,gill_size,gill_color,...,stalk_surface_below_ring,stalk_color_above_ring,stalk_color_below_ring,veil_type,veil_color,ring_number,ring_type,spore_print_color,population,habitat
0,poisonous,convex,smooth,brown,True,pungent,free,close,narrow,black,...,smooth,white,white,partial,white,1,pendant,black,scattered,urban
1,edible,convex,smooth,yellow,True,almond,free,close,broad,black,...,smooth,white,white,partial,white,1,pendant,brown,numerous,grasses
2,edible,bell,smooth,white,True,anise,free,close,broad,brown,...,smooth,white,white,partial,white,1,pendant,brown,numerous,meadows
3,poisonous,convex,scaly,white,True,pungent,free,close,narrow,brown,...,smooth,white,white,partial,white,1,pendant,black,scattered,urban
4,edible,convex,smooth,gray,False,none,free,crowded,broad,black,...,smooth,white,white,partial,white,1,evanescent,brown,abundant,grasses
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8119,edible,knobbed,smooth,brown,False,none,attached,close,broad,yellow,...,smooth,orange,orange,partial,orange,1,pendant,buff,clustered,leaves
8120,edible,convex,smooth,brown,False,none,attached,close,broad,yellow,...,smooth,orange,orange,partial,brown,1,pendant,buff,several,leaves
8121,edible,flat,smooth,brown,False,none,attached,close,broad,brown,...,smooth,orange,orange,partial,orange,1,pendant,buff,clustered,leaves
8122,poisonous,knobbed,scaly,brown,False,fishy,free,close,narrow,buff,...,silky,white,white,partial,white,1,evanescent,white,several,leaves


# Conditional probability calculation

In [5]:
def get_conditional_probability(df, event, assumptions):
    grouped_df = df.groupby(assumptions)[event]
    return grouped_df.value_counts()

In [6]:
get_conditional_probability(df, 'stalk_shape', ['class', 'odor'])

class      odor      stalk_shape
edible     almond    enlarging       352
                     tapering         48
           anise     enlarging       352
                     tapering         48
           none      tapering       2496
                     enlarging       912
poisonous  creosote  enlarging       192
           fishy     tapering        576
           foul      enlarging      1296
                     tapering        864
           musty     enlarging        36
           none      enlarging       120
           pungent   enlarging       256
           spicy     tapering        576
Name: stalk_shape, dtype: int64

In [7]:
df[df['odor'] == 'none']

Unnamed: 0,class,cap_shape,cap_surface,cap_color,bruises,odor,gill_attachment,gill_spacing,gill_size,gill_color,...,stalk_surface_below_ring,stalk_color_above_ring,stalk_color_below_ring,veil_type,veil_color,ring_number,ring_type,spore_print_color,population,habitat
4,edible,convex,smooth,gray,False,none,free,crowded,broad,black,...,smooth,white,white,partial,white,1,evanescent,brown,abundant,grasses
14,edible,convex,fibrous,brown,False,none,free,crowded,broad,brown,...,fibrous,white,white,partial,white,1,evanescent,black,abundant,grasses
15,edible,sunken,fibrous,gray,False,none,free,close,narrow,black,...,smooth,white,white,partial,white,1,pendant,brown,solitary,urban
16,edible,flat,fibrous,white,False,none,free,crowded,broad,black,...,smooth,white,white,partial,white,1,evanescent,brown,abundant,grasses
28,edible,flat,fibrous,brown,False,none,free,close,narrow,black,...,smooth,white,white,partial,white,1,pendant,black,solitary,urban
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8115,edible,convex,smooth,brown,False,none,attached,close,broad,yellow,...,smooth,orange,orange,partial,orange,1,pendant,orange,several,leaves
8119,edible,knobbed,smooth,brown,False,none,attached,close,broad,yellow,...,smooth,orange,orange,partial,orange,1,pendant,buff,clustered,leaves
8120,edible,convex,smooth,brown,False,none,attached,close,broad,yellow,...,smooth,orange,orange,partial,brown,1,pendant,buff,several,leaves
8121,edible,flat,smooth,brown,False,none,attached,close,broad,brown,...,smooth,orange,orange,partial,orange,1,pendant,buff,clustered,leaves


In [8]:
none_odor_df = df[df['odor'] == 'none']
none_odor_df[none_odor_df['class'] == 'poisonous']

Unnamed: 0,class,cap_shape,cap_surface,cap_color,bruises,odor,gill_attachment,gill_spacing,gill_size,gill_color,...,stalk_surface_below_ring,stalk_color_above_ring,stalk_color_below_ring,veil_type,veil_color,ring_number,ring_type,spore_print_color,population,habitat
4106,poisonous,bell,smooth,buff,True,none,free,close,broad,gray,...,smooth,white,white,partial,white,2,pendant,green,several,meadows
4331,poisonous,knobbed,scaly,brown,False,none,free,close,narrow,white,...,scaly,white,brown,partial,white,1,evanescent,white,several,woods
4364,poisonous,bell,scaly,white,True,none,free,crowded,narrow,white,...,smooth,white,white,partial,white,1,pendant,white,clustered,leaves
4494,poisonous,flat,fibrous,yellow,False,none,free,close,narrow,white,...,scaly,white,yellow,partial,white,1,evanescent,white,several,woods
4549,poisonous,flat,smooth,white,True,none,free,close,broad,gray,...,smooth,white,white,partial,white,2,pendant,green,several,meadows
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7401,poisonous,conical,scaly,yellow,False,none,free,crowded,narrow,yellow,...,scaly,yellow,yellow,partial,yellow,1,evanescent,white,clustered,leaves
7483,poisonous,flat,scaly,yellow,False,none,free,crowded,narrow,yellow,...,scaly,yellow,yellow,partial,yellow,1,evanescent,white,clustered,leaves
7600,poisonous,bell,scaly,yellow,False,none,free,crowded,narrow,white,...,scaly,yellow,yellow,partial,yellow,1,evanescent,white,clustered,leaves
7706,poisonous,conical,scaly,yellow,False,none,free,crowded,narrow,white,...,scaly,yellow,yellow,partial,yellow,1,evanescent,white,clustered,leaves


# Network validation

In [18]:
import pysmile
import pysmile_license

In [19]:
net = pysmile.Network()

In [66]:
net.read_file('network_v2.xdsl')

In [None]:
threshold = 0.99941

In [93]:
correct_count = 0
incorrect_count = 0

for _, row in df.iterrows():
    category = ''
    for key, value in row.items():
        if key == 'class':
            category = value
            continue
        net.set_evidence(key, value)
    net.update_beliefs()
    edible, poisonous = net.get_node_value('Class')
    prediction = 'unknown'
    if edible > threshold:
        prediction = 'edible'
    elif poisonous > threshold:
        prediction = 'poisonous'

    if category == prediction:
        correct_count += 1
    elif prediction != 'unknown':
        print(category, edible, poisonous, row['odor'])
        incorrect_count += 1
        
    net.clear_all_evidence()

In [99]:
print(f'Correct: {correct_count}')
print(f'Incorrect: {incorrect_count}')
print(f'Inconclusive: {len(df) - correct_count - incorrect_count}')

Correct: 8085
Incorrect: 0
Inconclusive: 39


In [100]:
print(f'Accuracy: {correct_count / len(df)}')

Accuracy: 0.9951994091580503
