In [3]:
import numpy as np

#(
# We can use np method random and its method choice to select a host of values from a set
# replace means is the chosen item going to be returned into list of possible items
# )

dice_6 = range(1, 7)
num_rolls = 50

results_1 = np.random.choice(dice_6, size=num_rolls, replace=True)
#As this is simulating a dice then yes replace is true as rolling doesn't remove future option

print(results_1)

[4 1 5 3 2 5 2 5 2 5 4 5 1 4 6 2 6 2 2 4 2 6 5 1 2 4 1 5 6 3 3 3 1 4 5 4 5
 3 2 4 4 6 4 5 2 1 6 3 2 5]


In [25]:
#Probability Mass Functions PMF

#These let us see the probability of having an exact discrete value from a discrete
#random variable
#So pmf is like the probablity of that exact (given) discrete value occurring
import scipy.stats as stats

print(stats.binom.pmf(3, 10, 0.5))
#Arguments
# 1 = value u want to know about
# 2 = how many tries/sample size/group
# 3 = probability of success

# Task 1 - find the probability of getting 7 heads in 10 coin flips
print(stats.binom.pmf(1, 700, 0.5))
#as you can see we can make it prodce very low numbers if we ask for discrete values
#that are illogical in terms of the variable probability ie asking probability of 10 heads
#in 10 flips - very low and if u say 100 heads in 100 flips its even worse


#(
# Another thing to note is that the laws of big numbers and how they colour probaility
# are involved ie over a larger data set we would expect the true probability to express itself
# in the outcome results and if it does not then the chances are very low
# 
# IE 3 heads in 10 consequtive flips is low probability but not crazy
# But 30 heads in 100 flips is verry low as over 100 for true propability (50%) to not be 
# observed - this is very unlikely
# 
# )

small_data = stats.binom.pmf(3, 10, 0.5)
big_data = stats.binom.pmf(300, 1000, 0.5)




print({big_data, small_data})

0.1171875
1.330764096406523e-208
{5.065988280449224e-38, 0.1171875}


In [31]:
#We can use probability mass function calculations to calculate ranges

#Easiest would be to find probs of finding all values and less

one_to_30 = range(1, 51)

at_or_less = 0
for num in one_to_30:
    at_or_less += stats.binom.pmf(num, 100, 0.5 )
print(at_or_less)
#(
# Above this is basically asking what is the probability of having 50 or fewer heads in
# a sample  size of 100
# )

#(
# What if we wanted to know the probability of having x or more heads in a given sample
# We would need to do 1 - all the values lower than the value we want to be above
# 
# 
# )

above_60 = range(1,61)
above_60_or = 1

for num in above_60:
    above_60_or -= stats.binom.pmf(num, 100, 0.5)

print(above_60_or)  

0.5397946186935892
0.017600100108852382


In [36]:
#(
# Cumulative Distribution Function
# 
# This lets us see the probability of discrete values in a discrete probability variable
# in a range - so it would be probability of all discrete values and less
# 
# )
import scipy.stats as stats

prob_50_or_L = stats.binom.cdf(50, 100, 0.5)
print(prob_50_or_L)

prob_60_more = 1 - stats.binom.cdf(60, 100, 0.5)
print(prob_60_more)

0.5397946186935895
0.01760010010885238


In [7]:
#(
# Cumulative density function
# 
# CDF can show u the likelihood of a discrete value but it can also show u the likelihood
# of a any values occurinng in a range (for continuous values)
# 
# )

#(
# For CDF - Continuous the args are: 1 - value of interest, 2 - mean of the probability distr
# 3: the standard deviation of the probability distribution
# The function will return all the values that are found below the value of interest
# 
# For continuous data we would have to use Cumulative Density as a calculation of continuous data type
# must be a range because the likelihood of an exact value with infinite decimal places is negligible
# so must do the cumulation 
# 
# We need the mean to articulate the likelihood of values relative to the central tendency
# And the standard deviation (the third argument) is needed as it lets us have a kinda modifier to see again
# how likely a value range is to occur 
# 
# This is because a standard deviation will show the general way in which values move against the mean 
# so being within the the std is also likely
# 
# It's important to note that the function for continuous is stats.norm not stats.binom
# 
# )
import scipy.stats as stats

av_height_uk = 160.3
std = 5.62
my_height = 172

those_smaller = stats.norm.cdf(my_height, av_height_uk, std)
print(those_smaller)

0.9813219378660953


In [5]:
import scipy.stats as stats

#(
# If we want to find a range we do the biggest value minus the smaller value so if we wanted to find the
# chance of weather being between 27-20 
# 
# )

between_27_25 = stats.norm.cdf(25, 20, 3) - stats.norm.cdf(18, 20, 3)
print(between_27_25)

#(
#  If you want to find probability values above a certain values then we do 1 minus a result for the cmf
# )
people_taller_than_me = 1 - stats.norm.cdf(157, 175.3, 7.62)
print('Taller:', people_taller_than_me)

0.6997171101802624
Taller: 0.9918376644753395
