# Assignment 4 : Introduction to Data Science for Sports

In [48]:
class ProbDist(dict):
    """A Probability Distribution; an {outcome: probability} mapping."""
    def __init__(self, mapping=(), **kwargs):
        self.update(mapping, **kwargs)
        # Make probabilities sum to 1.0; assert no negative probabilities
        total = sum(self.values())
        for outcome in self:
            self[outcome] = self[outcome] / total
            assert self[outcome] >= 0

In [49]:
def p(event , space): 
    """The probability of an event, given a sample space of equiprobable outcomes. 
    event: a collection of outcomes, or a predicate that is true of outcomes in the event. 
    space: a set of outcomes or a probability distribution of {outcome: frequency} pairs."""
    if is_predicate(event):
        event = such_that(event, space)
    if isinstance(space, ProbDist):
        return sum(space[o] for o in space if o in event)
    else:
        return Fraction(len(event & space), len(space))

is_predicate = callable

def such_that(predicate, space): 
    """The outcomes in the sample pace for which the predicate is true.
    If space is a set, return a subset {outcome,...} with outcomes where predicate(element) is true;
    if space is a ProbDist, return a ProbDist {outcome: frequency,...} with outcomes where predicate(element) is true."""
    if isinstance(space, ProbDist):
        return ProbDist({o:space[o] for o in space if predicate(o)})
    else:
        return {o for o in space if predicate(o)}

Singapore GP Probability distribution

In [50]:
SGP = ProbDist(
    LH = 413,
    VB = 326,
    MV = 278,
    LC = 264,
    SV = 240,
    CS = 96,
    PG = 95,
    AA = 92,
    RD = 54,
    SP = 52,
    LN = 49,
    KR = 43,
    DK = 37,
    NH = 37,
    LS = 21,
    KM = 20,
    AG = 14,
    RG = 8,
    RK = 1,
    GR = 0)

In [51]:
SGP

{'LH': 0.1929906542056075,
 'VB': 0.15233644859813084,
 'MV': 0.12990654205607477,
 'LC': 0.1233644859813084,
 'SV': 0.11214953271028037,
 'CS': 0.044859813084112146,
 'PG': 0.04439252336448598,
 'AA': 0.04299065420560748,
 'RD': 0.025233644859813085,
 'SP': 0.024299065420560748,
 'LN': 0.022897196261682243,
 'KR': 0.020093457943925235,
 'DK': 0.017289719626168223,
 'NH': 0.017289719626168223,
 'LS': 0.009813084112149532,
 'KM': 0.009345794392523364,
 'AG': 0.0065420560747663555,
 'RG': 0.003738317757009346,
 'RK': 0.00046728971962616824,
 'GR': 0.0}

Russian GP Probability Distribution

In [52]:
RGP = ProbDist(
    LH = 439,
    VB = 344,
    MV = 290,
    CL = 279,
    SV = 240,
    CS = 104,
    PG = 95,
    AA = 102,
    DR = 54,
    SP = 58,
    LN = 53,
    KR = 43,
    DK = 37,
    NH = 38,
    LS = 21,
    KM = 22,
    AG = 14,
    RG = 8,
    RK = 1,
    GR = 0
)

Joint Probability Distribution

In [53]:
def joint(A, B, sep=''):
    """The joint distribution of two independent probability distributions. 
    Result is all entries of the form {a+sep+b: P(a)*P(b)}"""
    return ProbDist({a + sep + b: A[a] * B[b]
                    for a in A
                    for b in B})

J = joint(SGP, RGP, '_')
J

{'LH_LH': 0.037788981800295135,
 'LH_VB': 0.029611411706837194,
 'LH_MV': 0.024963108706345313,
 'LH_CL': 0.024016232169208077,
 'LH_SV': 0.0206591244466306,
 'LH_CS': 0.008952287260206594,
 'LH_PG': 0.008177570093457947,
 'LH_AA': 0.008780127889818006,
 'LH_DR': 0.004648303000491885,
 'LH_SP': 0.004992621741269062,
 'LH_LN': 0.004562223315297591,
 'LH_KR': 0.003701426463354649,
 'LH_DK': 0.0031849483521888848,
 'LH_NH': 0.0032710280373831786,
 'LH_LS': 0.0018076733890801777,
 'LH_KM': 0.001893753074274472,
 'LH_AG': 0.0012051155927201183,
 'LH_RG': 0.0006886374815543533,
 'LH_RK': 8.607968519429417e-05,
 'LH_GR': 0.0,
 'VB_LH': 0.029828590961007782,
 'VB_VB': 0.023373656698375118,
 'VB_MV': 0.019704536170141815,
 'VB_CL': 0.0189571227292054,
 'VB_SV': 0.01630720234770357,
 'VB_CS': 0.007066454350671548,
 'VB_PG': 0.006454934262632664,
 'VB_AA': 0.0069305609977740185,
 'VB_DR': 0.0036691205282333036,
 'VB_SP': 0.003940907234028363,
 'VB_LN': 0.0036011738517845392,
 'VB_KR': 0.002921707

# MERCEDES

## Q 1.2

The probability for Mercedes to win both races

In [54]:
def mercedes_both(outcome): return 'LH_VB' in outcome or 'VB_LH' in outcome or 'LH_LH' in outcome or 'VB_VB' in outcome

In [55]:
p(mercedes_both, J)

0.12060264116651521

The probability for Mercedes to win atleast one race

In [56]:
def mercedes_atleast_one(outcome): return 'LH' in outcome or 'VB' in outcome

In [57]:
p(mercedes_atleast_one, J)

0.5739662100761173

If Mercedes wins the first race, the probability that Mercedes wins the next one

In [58]:
def mercedes_first(outcome) : return outcome.startswith('LH') or outcome.startswith('VB')
def mercedes_second(outcome) : return outcome.endswith('LH') or outcome.endswith('VB')

In [59]:
p(mercedes_second, such_that(mercedes_first, J))

0.3492417484388938

If Mercedes wins at least one of these two races, the probability Mercedes wins both races

In [60]:
def merc(outcome) : return outcome.startswith('LH') or outcome.startswith('VB') or outcome.endswith('LH') or outcome.endswith('VB')

In [61]:
p(merc, J)

0.5739662100761173

In [62]:
def merc_one(outcome): return outcome.startswith("LH") or outcome.startswith("VB") or outcome.endswith("LH") or outcome.endswith("VB")

def merc_both(outcome): return outcome.startswith("LH_VB") or outcome.startswith("VB_LH") or outcome.startswith("LH_LH") or outcome.startswith("VB_VB")

In [63]:
p(merc_both, such_that(merc_one,J))

0.2101215002718041

# FERRARI

The probability for Ferrari to win both races

In [64]:
def ferrari_both(outcome): return 'CL_SV' in outcome or 'SV_CL' in outcome or 'CL_CL' in outcome or 'SV_SV' in outcome

In [65]:
p(ferrari_both, J)

0.02596146631428882

The probability for Ferrari to win atleast one race

In [66]:
def ferrari_atleast_one(outcome): return 'CL' in outcome or 'SV' in outcome

In [67]:
p(ferrari_atleast_one, J)

0.3176778076984004

If Ferrari wins the first race, the probability that Ferrari wins the next one

In [68]:
def ferrari_first(outcome) : return outcome.startswith('CL') or outcome.startswith('SV')
def ferrari_second(outcome) : return outcome.endswith('CL') or outcome.endswith('SV')

In [69]:
p(ferrari_second, such_that(ferrari_first, J))

0.23148974130240862

If Ferrari wins at least one of these two races, the probability Ferrari wins both races

In [70]:
def ferrari_one(outcome): return outcome.startswith("CL") or outcome.startswith("SV") or outcome.endswith("CL") or outcome.endswith("SV")

def ferrari_both(outcome): return outcome.startswith("CL_SV") or outcome.startswith("SV_CL") or outcome.startswith("SV_SV") or outcome.startswith("CL_CL")

In [71]:
p(ferrari_both, such_that(ferrari_one,J))

0.08172263118529302

The probability for Alfa Romeo to win both races

In [72]:
def alfa_romeo_both(outcome): return 'KR_AG' in outcome or 'AG_KR' in outcome or 'AG_AG' in outcome or 'KR_KR' in outcome

In [73]:
p(alfa_romeo_both, J)

0.0006771740852209728

The probability for Alfa Romeo to win atleast one race

In [74]:
def alfa_romeo_one(outcome): return 'KR' in outcome or 'AG' in outcome

In [75]:
p(alfa_romeo_one, J)

0.05138206874702995

If Alfa Romeo wins the first race, the probability that Alpha Romeo wins the next one

In [76]:
def alfa_romeo_first(outcome) : return outcome.startswith('KR') or outcome.startswith('AG')
def alfa_romeo_second(outcome) : return outcome.endswith('KR') or outcome.endswith('AG')

In [77]:
p(alfa_romeo_second, such_that(alfa_romeo_first, J))

0.025423728813559327

If Alfa Romeo wins at least one of these two races, the probability Alfa Romeo wins both races

In [78]:
def alfa_romeo_one(outcome): return outcome.startswith("KR") or outcome.startswith("AG") or outcome.endswith("KR") or outcome.endswith("AG")

def alfa_romeo_both(outcome): return outcome.startswith("KR_AG") or outcome.startswith("AG_KR") or outcome.startswith("KR_KR") or outcome.startswith("AG_AG")

In [79]:
p(alfa_romeo_both, such_that(alfa_romeo_one,J))

0.013179190751445089

# McLaren

The probability for McLaren to win both races

In [80]:
def mclaren_both(outcome): return 'CS_LN' in outcome or 'LN_CS' in outcome or 'LN_LN' in outcome or 'CS_CS' in outcome

In [81]:
p(mclaren_both, J)

0.004744803955080161

The probability for McLaren to win atleast one race

In [82]:
def mclaren_one(outcome): return 'CS' in outcome or 'LN' in outcome

In [83]:
p(mclaren_one, J)

0.13303896721051797

If McLaren wins the first race, the probability that McLaren wins the next one

In [84]:
def mclaren_first(outcome) : return outcome.startswith('CS') or outcome.startswith('LN')
def mclaren_second(outcome) : return outcome.endswith('CS') or outcome.endswith('LN')

In [85]:
p(mclaren_second, such_that(mclaren_first, J))

0.07002676181980376

If McLaren wins at least one of these two races, the probability McLaren wins both races

In [86]:
def mclaren_one(outcome): return outcome.startswith("CS") or outcome.startswith("LN") or outcome.endswith("CS") or outcome.endswith("LN")

def mclaren_both(outcome): return outcome.startswith("CS_LN") or outcome.startswith("LN_CS") or outcome.startswith("CS_CS") or outcome.startswith("LN_LN")

In [87]:
p(mclaren_both, such_that(mclaren_one,J))

0.035664768410086096

# Q 1.3 : Weather Conditions

Considering equiprobable weather conditions

In [88]:
Weather = ProbDist(Rain=0.2, Sun=0.2, Clouds=0.2, Snow=0.2, fog=0.2)

Joint Probability Distribution for the Singapore Grand Prix, Russian Grand Prix and Weather Conditions 

In [89]:
def joint(A, B, C, D ,sep=''):
    """The joint distribution of two independent probability distributions. 
    Result is all entries of the form {a+sep+b: P(a)*P(b)}"""
    return ProbDist({a + sep + b + sep + c+ sep+ d: A[a] * B[b] * C[c] * D[d]
                    for a in A
                    for b in B
                    for c in C
                    for d in D})

W = joint(SGP, Weather ,RGP, Weather,'_')
W

{'LH_Rain_LH_Rain': 0.001511559272011807,
 'LH_Rain_LH_Sun': 0.001511559272011807,
 'LH_Rain_LH_Clouds': 0.001511559272011807,
 'LH_Rain_LH_Snow': 0.001511559272011807,
 'LH_Rain_LH_fog': 0.001511559272011807,
 'LH_Rain_VB_Rain': 0.0011844564682734887,
 'LH_Rain_VB_Sun': 0.0011844564682734887,
 'LH_Rain_VB_Clouds': 0.0011844564682734887,
 'LH_Rain_VB_Snow': 0.0011844564682734887,
 'LH_Rain_VB_fog': 0.0011844564682734887,
 'LH_Rain_MV_Rain': 0.0009985243482538136,
 'LH_Rain_MV_Sun': 0.0009985243482538136,
 'LH_Rain_MV_Clouds': 0.0009985243482538136,
 'LH_Rain_MV_Snow': 0.0009985243482538136,
 'LH_Rain_MV_fog': 0.0009985243482538136,
 'LH_Rain_CL_Rain': 0.0009606492867683239,
 'LH_Rain_CL_Sun': 0.0009606492867683239,
 'LH_Rain_CL_Clouds': 0.0009606492867683239,
 'LH_Rain_CL_Snow': 0.0009606492867683239,
 'LH_Rain_CL_fog': 0.0009606492867683239,
 'LH_Rain_SV_Rain': 0.0008263649778652249,
 'LH_Rain_SV_Sun': 0.0008263649778652249,
 'LH_Rain_SV_Clouds': 0.0008263649778652249,
 'LH_Rain_SV_Sn

Mercedes wins at least one of these two races on a rainy day

In [90]:
def Mercedes_atleast_one(outcome):
    return 'LH_Rain' in outcome or 'VB_Rain' in outcome

In [91]:
p(Mercedes_atleast_one, W)

0.13408966460186592

The probability Mercedes wins both races, assuming races can be held on either rainy, sunny, cloudy, snowy or foggy days

In [92]:
def merc_first_win(outcome):
    return outcome.split('_')[0] == 'LH' or outcome.split('_')[0] == 'VB'

def merc_second_win(outcome):
    return outcome.split('_')[2] == 'LH' or outcome.split('_')[2] == 'VB'

In [93]:
p(merc_second_win, such_that(merc_first_win, W))

0.34924174843889355