# NEERAJ PREMANI           |  NU ID: 001306812

In [28]:
class ProbDist(dict):
    """A Probability Distribution; an {outcome: probability} mapping."""
    def __init__(self, mapping=(), **kwargs):
        self.update(mapping, **kwargs)
        # Make probabilities sum to 1.0; assert no negative probabilities
        total = sum(self.values())
        for outcome in self:
            self[outcome] = self[outcome] / total
            assert self[outcome] >= 0

In [29]:
def p(event, space): 
    """The probability of an event, given a sample space of outcomes. 
    event: a collection of outcomes, or a predicate that is true of outcomes in the event. 
    space: a set of outcomes or a probability distribution of {outcome: frequency} pairs."""
    
    # if event is a predicate it, "unroll" it as a collection 
    if is_predicate(event):
        event = such_that(event, space)
        
    # if space is not an equiprobably collection (a simple set), 
    # but a probability distribution instead (a dictionary set),
    # then add (union) the probabilities for all favorable outcomes
    if isinstance(space, ProbDist):
        return sum(space[o] for o in space if o in event)
    
    # simplest case: what we played with in our previous lesson
    else:
        return Fraction(len(event & space), len(space))

is_predicate = callable

# Here we either return a simple collection in the case of equiprobable outcomes, or a dictionary collection in the
# case of non-equiprobably outcomes
def such_that(predicate, space): 
    """The outcomes in the sample pace for which the predicate is true.
    If space is a set, return a subset {outcome,...} with outcomes where predicate(element) is true;
    if space is a ProbDist, return a ProbDist {outcome: frequency,...} with outcomes where predicate(element) is true."""
    if isinstance(space, ProbDist):
        return ProbDist({o:space[o] for o in space if predicate(o)})
    else:
        return {o for o in space if predicate(o)}

In [30]:
def joint(A, B, sep=''):
    """The joint distribution of two independent probability distributions. 
    Result is all entries of the form {a+sep+b: P(a)*P(b)}"""
    return ProbDist({a + sep + b: A[a] * B[b]
                    for a in A 
                    for b in B})



# Homework: Introduction to Data Science for Sports

<br />
<center>
    <img src="ipynb.images/f1races.png" width=800 />
</center>

Question 1.1 (20 points) There are a number of F1 races coming up: 
- Singapore GP: Date: Sun, Sep 22, 8:10 AM
- Russian GP: Date: Sun, Sep 29, 7:10 AM
- Japanese GP: Date: Sun, Oct 13, 1:10 AM
- Mexican GP Date: Sun, Oct 13, 1:10 AM

The Singaporean Grand Prix this weekend and the Russian Grand Prix the weekend after, as you can see [here](https://www.formula1.com/en/racing/2019.html). 

The 2019 driver standings are given [here](https://www.formula1.com/en/results.html/2019/drivers.html). Given these standings (please do not use team standings given onthe same Web site, use driver standings), what is the Probability Distribution for each F1 driver to win the Singaporean Grand Prix? What is the Probability Distribution for each F1 driver to win *both* the Singaporean and Russian Grand Prix? What is the probability for Mercedes to win both races? What is the probability for Mercedes to win at least one race? Note that Mercedes, and each other racing team, has two drivers per race.

Question 1.2 (30 points) If Mercedes wins the first race, what is the probability that Mercedes wins the next one? If Mercedes wins at least one of these two races, what is the probability Mercedes wins both races? How about Ferrari, Red Bull, and Renault?

Question 1.3 (50 points) Mercedes wins one of these two races on a **rainy** day. What is the probability Mercedes wins both races, assuming races can be held on either rainy, sunny, cloudy, snowy or foggy days? Assume that rain, sun, clouds, snow, and fog are the *only possible weather conditions* on race tracks.

You need to provide *proof* for your answers. `I think it's one in a million because Mercedes sucks and I like Ferrari a lot more` is not a good answer. Leverage the counting framework in this workbook!

Hint: Use SingaporeanGrandPrix, or `SGP` to denote the Probability Distribution given by F1 driver wins. Write driver initials as keys and driver wins as values in a dictionary that you pass to our function `ProbDist`..

### Question 1

# What is the Probability Distribution for each F1 driver to win the Singaporean Grand Prix?



Driver standings used for SGP and RGP are according to September 21st, 2019.

In [31]:
SGP = ProbDist(
    LH = 284, VB=221, MV=185, CL=182, SV=169, PG=65, CS=58, DR=34, AA=34, DK=33, NH=31, KR=31, SP=27, LN=25, LS=19, KM=18, RG=8, AG=3, RK=1, GR=0)
SGP

{'LH': 0.19887955182072828,
 'VB': 0.15476190476190477,
 'MV': 0.12955182072829133,
 'CL': 0.12745098039215685,
 'SV': 0.11834733893557423,
 'PG': 0.04551820728291316,
 'CS': 0.04061624649859944,
 'DR': 0.023809523809523808,
 'AA': 0.023809523809523808,
 'DK': 0.023109243697478993,
 'NH': 0.021708683473389355,
 'KR': 0.021708683473389355,
 'SP': 0.018907563025210083,
 'LN': 0.01750700280112045,
 'LS': 0.01330532212885154,
 'KM': 0.012605042016806723,
 'RG': 0.0056022408963585435,
 'AG': 0.0021008403361344537,
 'RK': 0.0007002801120448179,
 'GR': 0.0}

In [32]:
RGP = ProbDist(
    LH = 284, VB=221, MV=185, CL=182, SV=169, PG=65, CS=58, DR=34, AA=34, DK=33, NH=31, KR=31, SP=27, LN=25, LS=19, KM=18, RG=8, AG=3, RK=1, GR=0)
RGP

{'LH': 0.19887955182072828,
 'VB': 0.15476190476190477,
 'MV': 0.12955182072829133,
 'CL': 0.12745098039215685,
 'SV': 0.11834733893557423,
 'PG': 0.04551820728291316,
 'CS': 0.04061624649859944,
 'DR': 0.023809523809523808,
 'AA': 0.023809523809523808,
 'DK': 0.023109243697478993,
 'NH': 0.021708683473389355,
 'KR': 0.021708683473389355,
 'SP': 0.018907563025210083,
 'LN': 0.01750700280112045,
 'LS': 0.01330532212885154,
 'KM': 0.012605042016806723,
 'RG': 0.0056022408963585435,
 'AG': 0.0021008403361344537,
 'RK': 0.0007002801120448179,
 'GR': 0.0}

# What is the Probability Distribution for each F1 driver to win both the Singaporean and Russian Grand Prix?

In [33]:
SRGP = joint(SGP, RGP, '')
SRGP

{'LHLH': 0.03955307613241369,
 'LHVB': 0.03077897825796981,
 'LHMV': 0.02576520804400188,
 'LHCL': 0.025347393859504547,
 'LHSV': 0.023536865726682798,
 'LHPG': 0.009052640664108767,
 'LHCS': 0.00807774090028167,
 'LHDR': 0.004735227424303048,
 'LHAA': 0.004735227424303048,
 'LHDK': 0.004595956029470606,
 'LHNH': 0.0043174132398057195,
 'LHKR': 0.0043174132398057195,
 'LHSP': 0.0037603276604759495,
 'LHLN': 0.0034817848708110647,
 'LHLS': 0.002646156501816409,
 'LHKM': 0.0025068851069839663,
 'LHRG': 0.0011141711586595408,
 'LHAG': 0.00041781418449732776,
 'LHRK': 0.0001392713948324426,
 'LHGR': 0.0,
 'VBLH': 0.03077897825796981,
 'VBVB': 0.02395124716553285,
 'VBMV': 0.020049686541283154,
 'VBCL': 0.019724556489262343,
 'VBSV': 0.018315659597172178,
 'VBPG': 0.007044484460450837,
 'VBCS': 0.006285847672402286,
 'VBDR': 0.003684807256235823,
 'VBAA': 0.003684807256235823,
 'VBDK': 0.003576430572228887,
 'VBNH': 0.003359677204215015,
 'VBKR': 0.003359677204215015,
 'VBSP': 0.00292617046

Highest Probability is of Lewis Hamilton to win both the SGP and RGP races with a probability of 0.0395

# What is the probability for Mercedes to win both races?

Created a cars dictionary for each driver for SGP and RGP
Applied the joint function onto SGP and RGP

In [34]:

cars_SGP= ProbDist(MR=505, RB=219, RH=98, FR=351, RE=65, MCR=83, PMER=46, ARF=34, WMER=1, HFR=26)
cars_RGP= ProbDist(MR=505, RB=219, RH=98, FR=351, RE=65, MCR=83, PMER=46, ARF=34, WMER=1, HFR=26)
#cars_RGP= ProbDist(MR2=527, RB=242, RH=102, FR=394, RE=67, MCR=89, PMER=46, ARF=35, WMER=1, HFR=26)
MRWins= joint(cars_SGP, cars_RGP, ' ')
MRWins

{'MR MR': 0.12506227981388646,
 'MR RB': 0.05423492926582403,
 'MR RH': 0.024269511726259154,
 'MR FR': 0.08692447567262207,
 'MR RE': 0.01609712512455964,
 'MR MCR': 0.020554790543668468,
 'MR PMER': 0.011391811626611438,
 'MR ARF': 0.008420034680538889,
 'MR WMER': 0.00024764807883937914,
 'MR HFR': 0.006438850049823858,
 'RB MR': 0.05423492926582403,
 'RB RB': 0.02351970199844646,
 'RB RH': 0.01052479815455595,
 'RB FR': 0.037695960737236106,
 'RB RE': 0.006980733469858538,
 'RB MCR': 0.008913859661511673,
 'RB PMER': 0.0049402113786691185,
 'RB ARF': 0.0036514605842336966,
 'RB WMER': 0.0001073958995362852,
 'RB HFR': 0.0027922933879434156,
 'RH MR': 0.024269511726259154,
 'RH RB': 0.01052479815455595,
 'RH RH': 0.004709727028066134,
 'RH FR': 0.01686851211072666,
 'RH RE': 0.0031237985390234553,
 'RH MCR': 0.003988850442137644,
 'RH PMER': 0.0022106881968473684,
 'RH ARF': 0.0016339869281045767,
 'RH WMER': 4.805843906189931e-05,
 'RH HFR': 0.0012495194156093823,
 'FR MR': 0.08692

In [35]:
def first_MR(outcome):  return outcome.split(' ')[0] == 'MR'
def  second_MR(outcome):   return outcome.split(' ')[1] == 'MR'

In [36]:

def first_MR_second_MR(outcome): return outcome=='MR MR'

winboth= p(first_MR_second_MR,MRWins)
print(winboth)


0.12506227981388646


Probability that Mercedes will win both the races is 0.125

# What is the probability for Mercedes to win at least one race?

In [37]:
def Mwinone(outcome): return outcome.split(' ')[0]  == 'MR' or outcome.split(' ')[1] == 'MR' 

def Mwinboth(outcome): return outcome.split(' ')[0]  == 'MR' or outcome.split(' ')[1] == 'MR'

#Atleast one
atleast_one=p(Mwinone, MRWins)
print(atleast_one)

0.5822206333513801


Probability that Mercedes will win at least one race is 0.5822

# If Mercedes wins the first race, what is the probability that Mercedes wins the next one?

In [38]:


firstMRWin = p(first_MR,MRWins)
#print(firstMRWin)

secondMRWin = p(second_MR,such_that(first_MR,MRWins))
print(secondMRWin)

0.3536414565826331


Initially, used the function P to compute the first race winner to be Mercedes
Applied Such that function to get the second race winner to be Mercedes given that First Race was won by Mercedes

# If Mercedes wins at least one of these two races, what is the probability Mercedes wins both races?

In [39]:


def MRwinone(outcome): return outcome.split(' ')[0]  == 'MR' or outcome.split(' ')[1] == 'MR' 

def MRwinboth(outcome): return outcome.split(' ')[0]  == 'MR' and outcome.split(' ')[1] == 'MR'

MRWinsBothRaces=p(MRwinboth, such_that(MRwinone, MRWins))
MRWinsBothRaces

0.2148022118247555

Defined two functions MRwinone to get all rules for ATLEAST condition and MRwinboth to get rules for BOTH MR Wins
Applied such_that function to get Both Race MR Wins given that MR has wone at least one the two races

# How about Ferrari, Red Bull, and Renault?

In [40]:
#How about Ferrari, Red Bull, and Renault?


def FRwinone(outcome): return outcome.split(' ')[0]  == 'FR' or outcome.split(' ')[1] == 'FR' 

def FRwinboth(outcome): return outcome.split(' ')[0]  == 'FR' and outcome.split(' ')[1] == 'FR'

FRWinsBothRaces=p(FRwinboth, such_that(FRwinone, MRWins))
print(FRWinsBothRaces)


def RBwinone(outcome): return outcome.split(' ')[0]  == 'RB' or outcome.split(' ')[1] == 'RB' 

def RBwinboth(outcome): return outcome.split(' ')[0]  == 'RB' and outcome.split(' ')[1] == 'RB'

RBWinsBothRaces=p(RBwinboth, such_that(RBwinone, MRWins))
print(RBWinsBothRaces)


def REwinone(outcome): return outcome.split(' ')[0]  == 'RE' or outcome.split(' ')[1] == 'RE' 

def REwinboth(outcome): return outcome.split(' ')[0]  == 'RE' and outcome.split(' ')[1] == 'RE'

REWinsBothRaces=p(REwinboth, such_that(REwinone, MRWins))
print(REWinsBothRaces)






0.14011976047904195
0.08304891922639362
0.02328914367610176


# Mercedes wins one of these two races on a rainy day. What is the probability Mercedes wins both races, assuming races can be held on either rainy, sunny, cloudy, snowy or foggy days? 

Created Weather Dictionary for all conditions

In [41]:

#Assume that rain, sun, clouds, snow, and fog are the only possible weather conditions on race tracks.


weatherdict= ProbDist(RA=0.2, SU=0.2, CL=0.2, SN=0.2, FO=0.2)

weatherdict

{'RA': 0.2, 'SU': 0.2, 'CL': 0.2, 'SN': 0.2, 'FO': 0.2}

Created a joint for Car_SGP dictionary and weather dictionary

In [42]:
WeatherSGP= joint(cars_SGP,weatherdict, '')

WeatherSGP

{'MRRA': 0.07072829131652666,
 'MRSU': 0.07072829131652666,
 'MRCL': 0.07072829131652666,
 'MRSN': 0.07072829131652666,
 'MRFO': 0.07072829131652666,
 'RBRA': 0.030672268907563045,
 'RBSU': 0.030672268907563045,
 'RBCL': 0.030672268907563045,
 'RBSN': 0.030672268907563045,
 'RBFO': 0.030672268907563045,
 'RHRA': 0.01372549019607844,
 'RHSU': 0.01372549019607844,
 'RHCL': 0.01372549019607844,
 'RHSN': 0.01372549019607844,
 'RHFO': 0.01372549019607844,
 'FRRA': 0.049159663865546245,
 'FRSU': 0.049159663865546245,
 'FRCL': 0.049159663865546245,
 'FRSN': 0.049159663865546245,
 'FRFO': 0.049159663865546245,
 'RERA': 0.009103641456582639,
 'RESU': 0.009103641456582639,
 'RECL': 0.009103641456582639,
 'RESN': 0.009103641456582639,
 'REFO': 0.009103641456582639,
 'MCRRA': 0.011624649859943985,
 'MCRSU': 0.011624649859943985,
 'MCRCL': 0.011624649859943985,
 'MCRSN': 0.011624649859943985,
 'MCRFO': 0.011624649859943985,
 'PMERRA': 0.006442577030812328,
 'PMERSU': 0.006442577030812328,
 'PMERCL'

Created a joint for Car_RGP dictionary and weather dictionary

In [43]:
WeatherRGP= joint(cars_RGP,weatherdict, '')

WeatherRGP 

{'MRRA': 0.07072829131652666,
 'MRSU': 0.07072829131652666,
 'MRCL': 0.07072829131652666,
 'MRSN': 0.07072829131652666,
 'MRFO': 0.07072829131652666,
 'RBRA': 0.030672268907563045,
 'RBSU': 0.030672268907563045,
 'RBCL': 0.030672268907563045,
 'RBSN': 0.030672268907563045,
 'RBFO': 0.030672268907563045,
 'RHRA': 0.01372549019607844,
 'RHSU': 0.01372549019607844,
 'RHCL': 0.01372549019607844,
 'RHSN': 0.01372549019607844,
 'RHFO': 0.01372549019607844,
 'FRRA': 0.049159663865546245,
 'FRSU': 0.049159663865546245,
 'FRCL': 0.049159663865546245,
 'FRSN': 0.049159663865546245,
 'FRFO': 0.049159663865546245,
 'RERA': 0.009103641456582639,
 'RESU': 0.009103641456582639,
 'RECL': 0.009103641456582639,
 'RESN': 0.009103641456582639,
 'REFO': 0.009103641456582639,
 'MCRRA': 0.011624649859943985,
 'MCRSU': 0.011624649859943985,
 'MCRCL': 0.011624649859943985,
 'MCRSN': 0.011624649859943985,
 'MCRFO': 0.011624649859943985,
 'PMERRA': 0.006442577030812328,
 'PMERSU': 0.006442577030812328,
 'PMERCL'

Joint of SGPWeather and RGPWeather

In [44]:
Joint_SGP_RGP=joint(WeatherSGP, WeatherRGP, ' ')
Joint_SGP_RGP

{'MRRA MRRA': 0.005002491192555536,
 'MRRA MRSU': 0.005002491192555536,
 'MRRA MRCL': 0.005002491192555536,
 'MRRA MRSN': 0.005002491192555536,
 'MRRA MRFO': 0.005002491192555536,
 'MRRA RBRA': 0.0021693971706329943,
 'MRRA RBSU': 0.0021693971706329943,
 'MRRA RBCL': 0.0021693971706329943,
 'MRRA RBSN': 0.0021693971706329943,
 'MRRA RBFO': 0.0021693971706329943,
 'MRRA RHRA': 0.000970780469050381,
 'MRRA RHSU': 0.000970780469050381,
 'MRRA RHCL': 0.000970780469050381,
 'MRRA RHSN': 0.000970780469050381,
 'MRRA RHFO': 0.000970780469050381,
 'MRRA FRRA': 0.0034769790269049356,
 'MRRA FRSU': 0.0034769790269049356,
 'MRRA FRCL': 0.0034769790269049356,
 'MRRA FRSN': 0.0034769790269049356,
 'MRRA FRFO': 0.0034769790269049356,
 'MRRA RERA': 0.0006438850049823955,
 'MRRA RESU': 0.0006438850049823955,
 'MRRA RECL': 0.0006438850049823955,
 'MRRA RESN': 0.0006438850049823955,
 'MRRA REFO': 0.0006438850049823955,
 'MRRA MCRRA': 0.0008221916217467513,
 'MRRA MCRSU': 0.0008221916217467513,
 'MRRA MC

Created a function firstRainMR and secondRainMR that returns MercedesRain Combination

Applied such_that function to get combination of second race to be won by MR in any weather conditions such_that First Race is won by MR in Rains

In [45]:
def firstRainMR(outcome): return outcome.split(' ')[0] =='MRRA' 
def secondRainMR(outcome): return outcome.split(' ')[1] =='MRRA' or outcome.split(' ')[1] == 'MRSU' or outcome.split(' ')[1] == 'MRCL' or outcome.split(' ')[1] == 'MRSN' or outcome.split(' ')[1] == 'MRFO'  

#FirstRainMRWin=p(firstRainMR, Joint_SGP_RGP)
#print(FirstRainMRWin)

SecondWin=p(secondRainMR, such_that(firstRainMR,Joint_SGP_RGP))
print(SecondWin)

0.35364145658263346
