#Probability map function

In [1]:
# create a class for a probability map - if you are new to Python just evaluate and skip this cell
# This was coded by Jenny Harlow

import copy
class ProbyMap(object):                # class definition
    'Probability map class'
    def __init__(self, sspace, probs): # constructor
        self.__probmap = {}  # default probmap is empty
        # make checks on the objects given as sspace and probs
        try:                           
            sspace_set = set(sspace) # check that we can make the sample space into a set
            assert len(sspace_set) == len(sspace) # and not lose any elements
            prob_list = list(probs) # and we can make the probs into a list
            probsum = sum(prob_list) # and we can sum the probs
            assert abs(probsum - 1) < 1e-10 # and the probs sum to 1
            assert len(prob_list) == len(sspace_set) # and there is proby for each event
            
            self.__probmap = dict(zip(list(sspace),prob_list))    # map from sspace to probs
                    
        except TypeError as diag: # if there any problems with types
            init_error = 1
            print (str(diag))
            
        except AssertionError as e:
            init_error = 1
            print ("Check sample space and probabilities")
            
                  
    def P(self, events):
        '''Return the probability of an event or set of events.
        
        events is set of events in the sample space to calculate the probability for.'''
        
        retvalue = 0
        try:                              
            events_set = set(events) # check we can make a set out of the events
            assert len(events_set) == len(events) # and not lose any events
            assert events_set <= set(self.__probmap.keys()) # events subset of sample space
        
            for ev in events:     # add each mapped probability to the return value
                retvalue += self.__probmap[ev]
                
        except TypeError as diag:
            print (str(diag))      
        
        except AssertionError:
            print ("Check your events")
        
        return retvalue
        
    def __str__(self):                            # redefine printable string rep
        'Printable representation of the object.'
        num_keys = len(self.__probmap.keys())
        counter = 0
        retval = '{'
        for each_key in self.__probmap:
            counter += 1
            retval += str(each_key)
            retval += ': '
            retval += "%.3f" % self.__probmap[each_key]
            if counter < num_keys:
                retval += ', '
        retval += '}'        
                
        return retval
        
    __repr__ = __str__
        
    def get_probmap(self):                        # get a deep copy of the proby map
        return copy.deepcopy(self.__probmap)      # getter cannot alter object's map
    
    probmap = property(get_probmap)               # allow read access via .probmap
    
    def get_ref_probmap(self):                    # get a reference to the real probmap
        return self.__probmap                     # getter can alter the object's map
        
    
    ref_probmap = property(get_ref_probmap)       # allow access via .ref_probmap
    
    @staticmethod
    def dictExp(big_map, small_map):
        '''Internal helper function for __pow__(...).
        
        Takes two proby map dictionaries and returns one mult by other.'''
        new_bl = {}
        for sle in small_map:
            for ble in big_map:
                new_key = str(ble) + ' ' + str (sle)
                new_bl[new_key] = big_map[ble]*small_map[sle]
        return new_bl
        
    def __pow__(self, x):
        '''probability map exponentiated.'''
        try:
            assert isinstance(x, Integer)
            pmap = copy.deepcopy(self.__probmap) # copy the probability map dictionary
            new_pmap = copy.deepcopy(self.__probmap) # and another copy
            for i in range(x-1):
                new_pmap = self.dictExp(new_pmap, pmap)
            
            return ProbyMap(new_pmap.keys(), new_pmap.values())       
            
        except AssertionError as e:
            print ("cannot raise to non-integer power")
            return None

#### Example 4: Experiments, outcomes, sample spaces, events, and the probability of events

Let's go back to the well-mixed fruit bowl experiment.  The fruit bowl contains:

- 2 oranges
- 3 apples
- 1 lemon

The experiment is to take one piece of fruit from the bowl and the outcome is the type of fruit we get. 

The sample space is $\Omega = \{orange, apple, lemon\}$

We can use the Python list to create this sample space (a list is a bit easier to use than a set, but using a list means that we are responsible for making sure that each element contained in it is unique).

In [4]:
samplespace= ['orange', 'apple', 'lemon']

In [5]:
probabilities= [2/6, 3/6, 1/6]

In [6]:
probmapfruit= ProbyMap(sspace=samplespace, probs= probabilities)
probmapfruit

{orange: 0.333, apple: 0.500, lemon: 0.167}

In [10]:
probmapfruit.P(['apple'])

0.5

In [11]:
probmapfruit.P(['apple', 'lemon'])

0.6666666666666666

In [12]:
def powerset(samplespace):
    from itertools import chain, combinations
    s= list(samplespace)
    return list(chain.from_iterable(combinations(s, r) for r in range (len(s)+1)))

In [13]:
subset= powerset(samplespace)
subset

[(),
 ('orange',),
 ('apple',),
 ('lemon',),
 ('orange', 'apple'),
 ('orange', 'lemon'),
 ('apple', 'lemon'),
 ('orange', 'apple', 'lemon')]

In [14]:
for i in subset:
    print(probmapfruit.P(i))

0
0.3333333333333333
0.5
0.16666666666666666
0.8333333333333333
0.5
0.6666666666666666
0.9999999999999999


In [15]:
alphaspace = ['A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q',
              'R','S','T','U','V','W','X','Y','Z']
alphaRelFreqs = [73/1000,9/1000,30/1000,44/1000,130/1000,28/1000,16/1000,35/1000,74/1000,
                 2/1000,3/1000,35/1000, 25/1000,78/1000,74/1000,27/1000,3/1000,77/1000,63/1000,
                 93/1000,27/1000,13/1000,16/1000,5/1000,19/1000,1/1000]

In [17]:
probofalpha= ProbyMap(sspace=alphaspace, probs= alphaRelFreqs)
probofalpha

{A: 0.073, B: 0.009, C: 0.030, D: 0.044, E: 0.130, F: 0.028, G: 0.016, H: 0.035, I: 0.074, J: 0.002, K: 0.003, L: 0.035, M: 0.025, N: 0.078, O: 0.074, P: 0.027, Q: 0.003, R: 0.077, S: 0.063, T: 0.093, U: 0.027, V: 0.013, W: 0.016, X: 0.005, Y: 0.019, Z: 0.001}

In [18]:
vowels = ['A', 'E', 'I', 'O', 'U']

In [19]:
probofalpha.P(vowels)

0.37800000000000006

In [26]:
cat= 'CAT'
cat

'CAT'

In [22]:
cats= list(cat)

In [23]:
cats

['c', 'a', 't']

In [27]:
probofalpha.P(cat)

0.196

In [34]:
def load_sms():
    import csv
    lines=[]
    hamspam = {'ham': 0, 'spam': 1}
    with open('D:/Uppsala Materials/data/spam.csv', mode='r', encoding= 'latin-1') as f:
        reader= csv.reader(f)
        header= next(reader)
        lines= [(line[1], hamspam[line[0]]) for line in reader]
    return lines
    

In [38]:
sms_data= load_sms()
sms_data[:2]

[('Go until jurong point, crazy.. Available only in bugis n great world la e buffet... Cine there got amore wat...',
  0),
 ('Ok lar... Joking wif u oni...', 0)]

In [59]:
sms_data[1]

('Ok lar... Joking wif u oni...', 0)

In [39]:
samplespaceofSMS= [0, 1]

In [41]:
probofspam= sum(sms[1] for sms in sms_data)/ len(sms_data)
probofspam

0.13406317300789664

In [43]:
probofham = 1- probofspam
probofham

0.8659368269921034

In [44]:
probSpace = [probofham, probofspam]
probOFSms= ProbyMap(sspace=samplespaceofSMS, probs=probSpace)
probOFSms

{0: 0.866, 1: 0.134}

In [45]:
def freeintext(txt):
    if 'free' in txt:
        return 1
    return 0


In [46]:
freeintext('samin is free')

1

In [58]:
freein_SMS = sum(freeintext(txt) for txt, label in sms_data)
freein_SMS



122

In [61]:
proboffree= freein_SMS/len(sms_data)
proboffree

0.02189519023689878

In [62]:
## YouTry
# Create a probability map with the sample space and probabilities
# for the above example.

In [63]:
sampleforfree= (0, 1)
probspaceFree= (1-proboffree, proboffree)

In [64]:
proboffreetxt= ProbyMap(sspace=sampleforfree, probs=probspaceFree)
proboffreetxt

{0: 0.978, 1: 0.022}

In [65]:
#if it is both free and spam, how to manage it

In [75]:
X= [labels for txt, labels in sms_data]
Y= [freeintext(txt) for txt,labels in sms_data]
xandy= [x*y for x, y in zip(X,Y)]
probcon= sum(xandy)/len(sms_data)
probcon

0.01094759511844939

In [76]:
probSMSfREE = probcon/ proboffree 
probSMSfREE

0.5