## Assignment 5 

### Shashank Siripragada Group 6

In [309]:
class ProbDist(dict):
    """A Probability Distribution; an {outcome: probability} mapping."""
    def __init__(self, mapping=(), **kwargs):
        self.update(mapping, **kwargs)
        # Make probabilities sum to 1.0; assert no negative probabilities
        total = sum(self.values())
        for outcome in self:
            self[outcome] = self[outcome] / total
            assert self[outcome] >= 0


def p(event, space): 
    """The probability of an event, given a sample space of equiprobable outcomes. 
    event: a collection of outcomes, or a predicate that is true of outcomes in the event. 
    space: a set of outcomes or a probability distribution of {outcome: frequency} pairs."""
    # branch on the type of the first argument
    if is_predicate(event):
        # transform the mapping (untangible) 'event' into the collection (tangible) 'event'
        event = such_that(event, space)
        
    if isinstance(space, ProbDist):
        # if space is a dictionary of distinct probabilities, where each item does not count as the same amount
        # we need to be careful and count each amount according to what it's worth
        return sum([space[o] for o in event])
    else:
        # space is not a dictionary but a collection, let's fall back to our original division
        return Fraction(len(event & space), len(space))

is_predicate = callable

def such_that(predicate, space): 
    """The outcomes in the sample pace for which the predicate is true.
    If space is a set, return a subset {outcome,...} with outcomes where predicate(element) is true;
    if space is a ProbDist, return a ProbDist {outcome: frequency,...} with outcomes where predicate(element) is true."""
    if isinstance(space, ProbDist):
        return ProbDist({o:space[o] for o in space if predicate(o)})
    else:
        return {o for o in space if predicate(o)}

### Question 1)

#### Part 1: What is the Probability Distribution for each F1 driver to win the Italy Grand Prix?

Let IGP, RGP, TGP denote the Italian, Russian Grand Prix respectively. 

Now, to caluculate Probability distribution for each F1 driver we consider their standings and call our ***ProbDist*** function over it.

In [310]:
IGP = ProbDist(
    MV = 287.5,
    LH = 275.5,
    VB = 185,
    SP = 150,
    LN = 149,
    CL = 128,
    CS = 122.5,
    DR = 105,
    PG = 74,
    FA = 58,
    EO = 46,
    SV = 36,
    LS = 26,
    YT = 20,
    GR = 16,
    NL = 7,
    KR = 6,
    AG = 1,
    MS = 0,
    NM = 0)

print(IGP)

{'MV': 0.16986706056129985, 'LH': 0.16277695716395865, 'VB': 0.10930576070901034, 'SP': 0.08862629246676514, 'LN': 0.0880354505169867, 'CL': 0.07562776957163958, 'CS': 0.0723781388478582, 'DR': 0.0620384047267356, 'PG': 0.04372230428360414, 'FA': 0.03426883308714919, 'EO': 0.027178729689807977, 'SV': 0.021270310192023634, 'LS': 0.01536189069423929, 'YT': 0.011816838995568686, 'GR': 0.009453471196454948, 'NL': 0.00413589364844904, 'KR': 0.0035450516986706058, 'AG': 0.0005908419497784342, 'MS': 0.0, 'NM': 0.0}


#### Let's also calculate the RGP and TGP probabilty distributions for later use.

In [311]:
RGP = ProbDist(
    MV = 287.5,
    LH = 275.5,
    VB = 185,
    SP = 150,
    LN = 149,
    CL = 128,
    CS = 122.5,
    DR = 105,
    PG = 74,
    FA = 58,
    EO = 46,
    SV = 36,
    LS = 26,
    YT = 20,
    GR = 16,
    NL = 7,
    KR = 6,
    AG = 1,
    MS = 0,
    RK = 0,
    NM = 0)

print(RGP)

{'MV': 0.16986706056129985, 'LH': 0.16277695716395865, 'VB': 0.10930576070901034, 'SP': 0.08862629246676514, 'LN': 0.0880354505169867, 'CL': 0.07562776957163958, 'CS': 0.0723781388478582, 'DR': 0.0620384047267356, 'PG': 0.04372230428360414, 'FA': 0.03426883308714919, 'EO': 0.027178729689807977, 'SV': 0.021270310192023634, 'LS': 0.01536189069423929, 'YT': 0.011816838995568686, 'GR': 0.009453471196454948, 'NL': 0.00413589364844904, 'KR': 0.0035450516986706058, 'AG': 0.0005908419497784342, 'MS': 0.0, 'RK': 0.0, 'NM': 0.0}


In [312]:
TGP = ProbDist(
    MV = 287.5,
    LH = 275.5,
    VB = 185,
    SP = 150,
    LN = 149,
    CL = 128,
    CS = 122.5,
    DR = 105,
    PG = 74,
    FA = 58,
    EO = 46,
    SV = 36,
    LS = 26,
    YT = 20,
    GR = 16,
    NL = 7,
    KR = 6,
    AG = 1,
    MS = 0,
    RK = 0,
    NM = 0)

print(TGP)

{'MV': 0.16986706056129985, 'LH': 0.16277695716395865, 'VB': 0.10930576070901034, 'SP': 0.08862629246676514, 'LN': 0.0880354505169867, 'CL': 0.07562776957163958, 'CS': 0.0723781388478582, 'DR': 0.0620384047267356, 'PG': 0.04372230428360414, 'FA': 0.03426883308714919, 'EO': 0.027178729689807977, 'SV': 0.021270310192023634, 'LS': 0.01536189069423929, 'YT': 0.011816838995568686, 'GR': 0.009453471196454948, 'NL': 0.00413589364844904, 'KR': 0.0035450516986706058, 'AG': 0.0005908419497784342, 'MS': 0.0, 'RK': 0.0, 'NM': 0.0}


#### We also obtain joint probability distribution on both IGP and RGP for later use.

In [313]:
def joint(A, B, sep=' '):
    """The joint distribution of two independent probability distributions. 
    Result is all entries of the form {a+sep+b: P(a)*P(b)}"""
    return ProbDist({a+sep+b: A[a] * B[b]
                    for a in A
                    for b in B})

JPD = joint(IGP, RGP, ' ')
JPD

{'MV MV': 0.028854818263736336,
 'MV LH': 0.027650443240554302,
 'MV VB': 0.018567448274056427,
 'MV SP': 0.01505468778977548,
 'MV LN': 0.014954323204510312,
 'MV CL': 0.012846666913941743,
 'MV CS': 0.012294661694983309,
 'MV DR': 0.010538281452842836,
 'MV PG': 0.007426979309622571,
 'MV FA': 0.005821145945379853,
 'MV EO': 0.004616770922197815,
 'MV SV': 0.003613125069546115,
 'MV LS': 0.0026094792168944165,
 'MV YT': 0.0020072917053033973,
 'MV GR': 0.0016058333642427179,
 'MV NL': 0.0007025520968561891,
 'MV KR': 0.0006021875115910192,
 'MV AG': 0.00010036458526516987,
 'MV MS': 0.0,
 'MV RK': 0.0,
 'MV NM': 0.0,
 'LH MV': 0.027650443240554302,
 'LH LH': 0.026496337783557256,
 'LH VB': 0.01779245912870451,
 'LH SP': 0.014426318212463112,
 'LH LN': 0.014330142757713358,
 'LH CL': 0.012310458207968522,
 'LH CS': 0.011781493206844875,
 'LH DR': 0.01009842274872418,
 'LH PG': 0.007116983651481803,
 'LH FA': 0.005578176375485738,
 'LH EO': 0.004424070918488688,
 'LH SV': 0.00346231637

#### Part 2: What is the Probability Distribution for each F1 driver to win ***both*** the Italy and the Russia Grand Prix? 

For this we obtain the joint distribution on IGP, RGP where the same driver wins both the races (a==b), where ***a*** is the driver for first race and ***b*** is the driver for second race.

In [314]:
def joint_both_IGP_RGP(A, B, sep=' '):
    """The joint distribution of two independent probability distributions. 
    Result is all entries of the form {a+sep+b: P(a)*P(b)}"""
    return ProbDist({a+sep+b: A[a] * B[b]
                    for a in A
                    for b in B if a==b})

JPD_both_IGP_RGP = joint_both_IGP_RGP(IGP, RGP, ' ')
JPD_both_IGP_RGP

{'MV MV': 0.2815576642870342,
 'LH LH': 0.25854423723314296,
 'VB VB': 0.11658296934864268,
 'SP SP': 0.07664329613862557,
 'LN LN': 0.07562479189216118,
 'CL CL': 0.055809945063788495,
 'CS CS': 0.051116820563566666,
 'DR DR': 0.03755521510792653,
 'PG PG': 0.01865327509578283,
 'FA FA': 0.011459024364903843,
 'EO EO': 0.007207876205748077,
 'SV SV': 0.004414653857584833,
 'LS LS': 0.0023027052528760395,
 'YT YT': 0.0013625474869088992,
 'GR GR': 0.0008720303916216952,
 'NL NL': 0.00016691206714634015,
 'KR KR': 0.00012262927382180092,
 'AG AG': 3.406368717272247e-06,
 'MS MS': 0.0,
 'NM NM': 0.0}

#### Part 3: What is the probability for Red Bull Racing to win ***both*** races? 

Redbull team consists of ***MV*** & ***SP***, so we only consider the possible permutations of these drivers in out joint distribution.  

In [315]:
def redbull_IGP_RGP(outcome):
    return 'MV SP' in outcome or 'SP MV' in outcome or 'MV MV' in outcome or 'SP SP' in outcome

In [316]:
p(redbull_IGP_RGP, JPD)

0.0668188135596919

#### Part 4: What is the probability for Red Bull Racing to win ***at least*** one race?

Either ***MV*** or ***SP*** can win.

In [317]:
def redbull_IGP_RGP(outcome):
    return 'MV' in outcome or 'SP' in outcome

In [318]:
p(redbull_IGP_RGP, JPD)

0.45016789249643885

#### Part 5: What is the probability for Red Bull Racing to win ***all three*** races? 

To calculate this, we first calculate the joint distribution of all the three races. 

Next, we simply count the the occurance of ***MV*** or ***SP*** in each outcome and assess the count to be equal to 3. (As any of them can win all three games)

In [319]:
def joint_all(A, B, C, sep=' '):
    """The joint distribution of two independent probability distributions. 
    Result is all entries of the form {a+sep+b: P(a)*P(b)}"""
    return ProbDist({a+sep+b+sep+c: A[a] * B[b] * C[c]
                    for a in A
                    for b in B
                    for c in C})

JPD_all = joint_all(IGP, RGP, TGP, ' ')
JPD_all

{'MV MV MV': 0.004901483161491395,
 'MV MV LH': 0.004696899516490016,
 'MV MV VB': 0.003153997860437941,
 'MV MV SP': 0.002557295562517249,
 'MV MV LN': 0.002540246925433801,
 'MV MV CL': 0.0021822255466813862,
 'MV MV CS': 0.0020884580427224203,
 'MV MV DR': 0.0017901068937620746,
 'MV MV PG': 0.0012615991441751763,
 'MV MV FA': 0.0009888209508400032,
 'MV MV EO': 0.0007842373058386232,
 'MV MV SV': 0.0006137509350041398,
 'MV MV LS': 0.00044326456416965654,
 'MV MV YT': 0.0003409727416689666,
 'MV MV GR': 0.0002727781933351733,
 'MV MV NL': 0.00011934045958413832,
 'MV MV KR': 0.00010229182250069,
 'MV MV AG': 1.704863708344833e-05,
 'MV MV MS': 0.0,
 'MV MV RK': 0.0,
 'MV MV NM': 0.0,
 'MV LH MV': 0.004696899516490016,
 'MV LH LH': 0.0045008550149321725,
 'MV LH VB': 0.003022352732350097,
 'MV LH SP': 0.002450556269473051,
 'MV LH LN': 0.0024342192276765647,
 'MV LH CL': 0.0020911413499503373,
 'MV LH CS': 0.002001287620069659,
 'MV LH DR': 0.001715389388631136,
 'MV LH PG': 0.00120

In [320]:
def redbull_IGP_RGP_TGP(outcome):
    return outcome.split().count('MV') + outcome.split().count('SP') == 3

p(redbull_IGP_RGP_TGP, JPD_all)

0.017272219162401868

### Question 2) If Red Bull Racing wins the first race, what is the probability that Red Bull Racing wins the next one? If Red Bull Racing wins at least one of these two races, what is the probability Red Bull Racing wins both races? How about Mercedes, McLaren, and Ferrari?



### Redbull

#### If Redbull Racing wins the first race, what is the probability that Redbull Racing wins the next one. 

We find out the probability of Redbull winning in first race to use this as a conditional on the redbull team winning the second game.

In [321]:
def redbull_1st_win(outcome):
    return outcome.startswith('MV') or outcome.startswith('SP')

def redbull_2nd_win(outcome):
    return outcome.endswith('MV') or outcome.endswith('SP')

p(redbull_2nd_win, such_that(redbull_1st_win, JPD))

0.2584933530280649

#### Red Bull Racing wins at least one of these two races, what is the probability Red Bull Racing wins both races.

Similarly, we first calculate the probability of redbull winning atleast one of two races and use this as a conditional on redbull winning both races.

In [322]:
def redbull_atleast1(outcome): return 'MV' in outcome or 'SP' in outcome
def redbull_both(outcome): return ('MV SP' in outcome or 
                                   'SP MV' in outcome or 
                                   'MV MV' in outcome or 
                                   'SP SP' in outcome)

p(redbull_both, such_that(redbull_atleast1, JPD))

0.14843087362171323

### We repeat the above method for the Mercedes, McLaren and Ferrari teams and players to find out the probabiltites

### Mercedes

#### If Mercedes Racing wins the first race, what is the probability that Mercedes Racing wins the next one

In [323]:
def merc_1st_win(outcome):
    return outcome.startswith('LH') or outcome.startswith('VB')

def merc_2nd_win(outcome):
    return outcome.endswith('LH') or outcome.endswith('VB')

p(merc_2nd_win, such_that(merc_1st_win, JPD))

0.272082717872969

#### Mercedes Racing wins at least one of these two races, what is the probability Red Bull Racing wins both races

In [324]:
def merc_atleast1(outcome): return 'LH' in outcome or 'VB' in outcome
def merc_both(outcome): return ('LH VB' in outcome or 
                                'VB LH' in outcome or 
                                'LH LH' in outcome or 
                                'VB VB' in outcome)

p(merc_both, such_that(merc_atleast1, JPD))

0.15746281415626612

### McLaren

#### If McLaren Racing wins the first race, what is the probability that McLaren Racing wins the next one

In [325]:
def mclaren_1st_win(outcome):
    return outcome.startswith('LN') or outcome.startswith('DR')

def mclaren_2nd_win(outcome):
    return outcome.endswith('LN') or outcome.endswith('DR')

p(mclaren_2nd_win,such_that(mclaren_1st_win,JPD))

0.15007385524372233

#### McLaren Racing wins at least one of these two races, what is the probability Red Bull Racing wins both races

In [326]:
def mclaren_atleast1(outcome): return 'LN' in outcome or 'DR' in outcome
def mclaren_both(outcome): return ('LN DR' in outcome or 
                                   'DR LN' in outcome or 
                                   'LN LN' in outcome or 
                                   'DR DR' in outcome)

p(mclaren_both, such_that(mclaren_atleast1, JPD))

0.08112424145640376

### Ferrari

#### If Ferrari Racing wins the first race, what is the probability that Ferrari Racing wins the next one

In [327]:
def ferrari_1st_win(outcome):
    return outcome.startswith('CL') or outcome.startswith('CS')

def ferrari_2nd_win(outcome):
    return outcome.endswith('CL') or outcome.endswith('CS')

p(ferrari_2nd_win, such_that(ferrari_1st_win, JPD))

0.14800590841949776

#### Ferrari Racing wins at least one of these two races, what is the probability Red Bull Racing wins both races

In [328]:
def ferrari_atleast1(outcome): return 'CL' in outcome or 'CS' in outcome
def ferrari_both(outcome): return ('CL CS' in outcome or 
                                   'CS CL' in outcome or 
                                   'CL CL' in outcome or 
                                   'CS CS' in outcome)

p(ferrari_both, such_that(ferrari_atleast1, JPD))

0.07991705216142925

### **Question 3**) Red Bull Racing wins at least one of these two races on a ***rainy day***. What is the probability Red Bull Racing wins ***both*** races, assuming races can be held on either rainy, sunny, cloudy, snowy or foggy days? Also assume that rain, sun, clouds, snow, and fog are the *only possible weather conditions* on race tracks, and that they're *equiprobable*.


Let's first calculate the weather distribution given all the conditions are ***equiprobable***. 

Next, we calculate the joint distribution considering the weather distribution for each race. (Race, Weather, Race, Weather)

In [329]:
weather_dist = ProbDist(rain=0.2,
                        sun=0.2,
                        clouds=0.2,
                        snow=0.2,
                        fog=0.2)

def joint(A, B, C, D, sep=' '):
    """The joint distribution of two independent probability distributions. 
    Result is all entries of the form {a+sep+b: P(a)*P(b)}"""
    return ProbDist({a + b + sep + c + d : A[a] * B[b] * C[c] * D[d]
                    for a in A
                    for b in B
                    for c in C
                    for d in D})

weatherJD = joint(IGP, weather_dist, RGP, weather_dist, ' ')
print(weatherJD)

{'MVrain MVrain': 0.001154192730549468, 'MVrain MVsun': 0.001154192730549468, 'MVrain MVclouds': 0.001154192730549468, 'MVrain MVsnow': 0.001154192730549468, 'MVrain MVfog': 0.001154192730549468, 'MVrain LHrain': 0.001106017729622186, 'MVrain LHsun': 0.001106017729622186, 'MVrain LHclouds': 0.001106017729622186, 'MVrain LHsnow': 0.001106017729622186, 'MVrain LHfog': 0.001106017729622186, 'MVrain VBrain': 0.0007426979309622663, 'MVrain VBsun': 0.0007426979309622663, 'MVrain VBclouds': 0.0007426979309622663, 'MVrain VBsnow': 0.0007426979309622663, 'MVrain VBfog': 0.0007426979309622663, 'MVrain SPrain': 0.0006021875115910268, 'MVrain SPsun': 0.0006021875115910268, 'MVrain SPclouds': 0.0006021875115910268, 'MVrain SPsnow': 0.0006021875115910268, 'MVrain SPfog': 0.0006021875115910268, 'MVrain LNrain': 0.00059817292818042, 'MVrain LNsun': 0.00059817292818042, 'MVrain LNclouds': 0.00059817292818042, 'MVrain LNsnow': 0.00059817292818042, 'MVrain LNfog': 0.00059817292818042, 'MVrain CLrain': 0.

#### Red Bull Racing wins at least one of these two races on a ***rainy day***.

First we calculate the probability of redbull winning atleast one of two races on a rainy day (MVrain or SPrain). 

In [330]:
def redbull_atleast_1_rainy_day(outcome):
    return 'MVrain' in outcome or 'SPrain' in outcome

In [331]:
p(redbull_atleast_1_rainy_day, weatherJD)

0.10072458866883949

#### What is the probability Red Bull Racing wins ***both*** races, assuming races can be held on either rainy, sunny, cloudy, snowy or foggy days?

Next we calculate the probability of redbull winning both races in any weather conditioned on it winning atleast 1 race on a rainy day.

In [332]:
def redbull_both_any_weather(outcome):
    first_race = outcome.split(' ')[0]
    second_race = outcome.split(' ')[1]
    return  (('MV' in first_race and 'SP' in second_race) or 
             ('MV' in second_race and 'SP' in first_race) or 
             ('MV' in second_race and 'MV' in first_race) or 
             ('SP' in second_race and 'SP' in first_race))

In [333]:
p(redbull_both_any_weather, such_that(redbull_atleast_1_rainy_day, weatherJD))

0.23881728582259332