In [None]:
#installation of required packages (assuming pip is setup)
%pip install scipy
%pip install pandas
%pip install matplotlib


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as st


In [None]:
#Optional dark mode for plots
plt.style.use('dark_background')


# R function translations
`pbinom` -> `st.binom.cdf` - cumulative distribution function \
`dbinom` -> `st.binom.pmf` - probability mass function \
`qbinom` -> `st.binom.ppf` - percent point function \
`rbinom` -> `st.binom.rvs` - random variates \
The same applies for different distributions (eg. `ppoisson` -> `st.poisson.cdf`)

# Exercise 1.
As a sport analyst, you would like to calculate some probabilities for basketball player who is shooting guard.
n = 10 attempts and
p = 0.7 the probability for scoring three-points
Calculate the following probabilities: P(X ≤ 3), P(X < 3), P(X > 4) and P(X = 7).

In [None]:
n = 10
p = 0.7

print(f"P(X ≤ 3) = {st.binom.cdf(3, n, p)}")

print(f"P(X < 3) = {st.binom.cdf(2, n, p)}")

print(f"P(X > 4) = {1 - st.binom.cdf(4, n, p)}")

print(f"P(X = 7) = {st.binom.pmf(7, n, p)}")

print(f"Median: {st.binom.ppf(0.5, n, p)}")


In [None]:
#Now let's visualize:

df = pd.DataFrame()
df["k"] = np.arange(0, n+1)
df["prob_density"] = st.binom.pmf(df["k"], n, p)

plt.stem(df["k"],df["prob_density"])
plt.xlabel('k')
plt.ylabel('P(X = k)')
plt.title(f'Binomial Distribution (n={n}, p={p})')
plt.show()


# Exercise 2.
On a large fully automated production plant items are pushed to a side band at random time points, from which
they are automatically fed to a control unit. The production plant is set up in such a way that the number of
items sent to the control unit on average is 1.6 item pr. minute. Let the random variable X denote the number
of items pushed to the side band in 1 minute.

a: What is the probability that there will be more than 5 items at the control unit in a given minute?\
b: What is the probability that not more than 8 items arrive to the control unit within a 5-minute period?

In [None]:
la = 1.6 #lambda

print(f"a) {1 - st.poisson.cdf(5, la)}")

print(f"b) {st.poisson.cdf(8, la*5)}")


In [None]:
#Now let's visualize:

df = pd.DataFrame()
df["lambda"] = np.arange(0, 11)
df["prob_density"] = st.poisson.pmf(df["lambda"], la)

plt.stem(df["lambda"],df["prob_density"])
plt.xlabel('k')
plt.ylabel('P(X = k)')
plt.title(f'Poisson (la={la})')
plt.show()


# Exercise 3.(DIY)
In the manufacture of car engine cylinders, it's known that there are 5 defective cylinders in every batch of 100
cylinders produced. From a production batch of 100 cylinders, 6 cylinders are selected randomly for analyzing.
What is the probability that the sample contains 2 defective cylinders?

In [None]:
print(f"Probability is: {st.binom.pmf(2, 6, 0.05)}")


In [None]:
#Now let's visualize:
df = pd.DataFrame()
df["k"] = np.arange(0, 7)
df["prob_density"] = st.binom.pmf(df["k"], 6, 0.05)

plt.stem(df["k"],df["prob_density"])
plt.xlabel('k')
plt.ylabel('P(X = k)')
plt.title(f'Binomial Distribution (n={6}, p={0.05})')
plt.show()


# Exercise 4.(DIY)
A company, which produces tires, is using new technology to provide safer driving experience to drivers. According to their claim, while speed is 70km/h, breaking distance of those tires have normal distribution with mean equal to 26.4 meters and sigma is equal to 2.34

According to standards, breaking distance shouldn't be higher than 29 meters, while speed is 70 km/h. \
a: What is the probability of being comply with standards? \
b: What is the probability of having breaking distance between 26 and 24?

In [None]:
# a) Probability of compliance with standards
mu = 26.4
sigma = 2.34
x = 29
z = (x - mu) / sigma
print(f"Probability of compliance with standards: {st.norm.cdf(z)}")

# b) Probability of breaking distance between 26 and 24
x1 = 26
x2 = 24
z1 = (x1 - mu) / sigma
z2 = (x2 - mu) / sigma
print(f"Probability of breaking distance between 26 and 24: {st.norm.cdf(z1) - st.norm.cdf(z2)}")


In [None]:
# Now let's visualize:

df = pd.DataFrame()
df["x"] = np.linspace(20, 40, 100)
df["prob_density"] = st.norm.pdf(df["x"], mu, sigma)

plt.plot(df["x"],df["prob_density"])
plt.xlabel('x')
plt.ylabel('P(X = x)')
plt.title(f'Normal Distribution (mu={mu}, sigma={sigma})')

#shade area under curve
plt.fill_between(df["x"], df["prob_density"], where = (df["x"] < x), color = "green")
plt.fill_between(df["x"], df["prob_density"], where = (df["x"] > x2) & (df["x"] < x1), color = "red")
plt.show()


In [None]:
#stanardized:

df = pd.DataFrame()
df["z"] = np.linspace(-3, 3, 100)
df["prob_density"] = st.norm.pdf(df["z"], 0, 1)

plt.plot(df["z"],df["prob_density"])
plt.xlabel('z')
plt.ylabel('P(Z = z)')
plt.title(f'Standard Normal Distribution')

plt.fill_between(df["z"], df["prob_density"], where = (df["z"] < z), color = "green")
plt.fill_between(df["z"], df["prob_density"], where = (df["z"] > z2) & (df["z"] < z1), color = "red")
plt.show()
