In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import seaborn as sns
import random

from scipy.stats import bernoulli
from scipy.stats import binom
from scipy.stats import poisson
from scipy.stats import norm
from scipy.stats import geom

from scipy.stats import describe
from scipy.stats import find_repeats
from scipy.stats import relfreq

from scipy.stats import linregress
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression

np.random.seed(42)

import warnings
warnings.filterwarnings('ignore')

In [None]:
# Simulate one coin flip with 35% chance of getting heads
coin_flip = bernoulli.rvs(p=0.35, size=1)
print(coin_flip)

# Simulate ten coin flips and get the number of heads
ten_coin_flips = bernoulli.rvs(p=0.35, size=10)
print(ten_coin_flips)

coin_flips_sum = sum(ten_coin_flips)
print(coin_flips_sum)


# Simulate 5 coin flips and get the number of heads
five_coin_flips = bernoulli.rvs(p=0.5, size=5)
print(five_coin_flips)

coin_flips_sum = sum(five_coin_flips)
print(coin_flips_sum)

In [None]:
# Simulate 20 trials of 10 coin flips 
draws = binom.rvs(n=10, p=0.35, size=20)
print(draws)

# What is the probability of getting more than 20 heads from a fair coin after 30 coin flips?
prob_gt_20_heads = binom.sf(k=20, n=30, p=0.5)
print(prob_gt_20_heads)

In [None]:
# Predicting the probability of defects:

# Any situation with exactly two possible outcomes can be modeled with binomial random variables. 
# Let's model whether or not a component from a supplier comes with a defect. 
# From the thousands of components that we got from a supplier, we are going to take a sample of 50, 
# selected randomly. The agreed and accepted defect rate is 2%.

# Probability of getting exactly 1 defective component
prob_one_defect = binom.pmf(k=1, n=50, p=0.02)
print(prob_one_defect)

# Probability of not getting any defective components
prob_no_defects = binom.pmf(k=0, n=50, p=0.02)
print(prob_no_defects)

# Probability of getting 2 or less defective components
prob_two_or_less_defects = binom.cdf(k=2, n=50, p=0.02)
print(prob_two_or_less_defects)

In [None]:
# Predicting employment status:

# Consider a survey about employment that contains the question "Are you employed?" 
# It is known that 65% of respondents will answer "yes." Eight survey responses have been collected. 

# Calculate the probability of getting exactly 5 yes responses
prob_five_yes = binom.pmf(k=5, n=8, p=0.65)
print(prob_five_yes)

# Calculate the probability of getting 3 or less no responses
prob_three_or_less_no = 1-binom.cdf(k=5, n=8, p=0.65)
print(prob_three_or_less_no)

# Calculate the probability of getting more than 3 yes responses
prob_more_than_three_yes = binom.sf(k=3, n=8, p=0.65)
print(prob_more_than_three_yes)

In [None]:
# Imagine that in your town there are many crimes, including burglaries, but only 20% of them get solved. 
# Last week, there were 9 burglaries. Answer the following questions.

# What is the probability of solving 4 burglaries?
four_solved = binom.pmf(k=4, n=9, p=0.20)
print(four_solved)

# What is the probability of solving more than 3 burglaries?
more_than_three_solved = binom.sf(k=3, n=9, p=0.20)
print(more_than_three_solved)

# What is the probability of solving 2 or 3 burglaries?
two_or_three_solved = binom.pmf(k=2, n=9, p=0.20) + binom.pmf(k=3, n=9, p=0.20)
print(two_or_three_solved)


# You can see that, 
    # binom.sf(k=7, n=9, p=0.2) can be described as solving 8 or more burglaries or as solving more than 7. 
    # You can also see that the survival function calculates probabilities for more than the k 
    # value provided and that the cdf calculates probabilities for values equal to k or less.

# You can use these techniques to calculate probabilities for many types of events (be creative!). 
# You just need to identify if you need a particular probability or cumulative probabilites to 
# determine the function that best suits your needs.

# What is the probability of solving 1 or fewer or 8 or more burglaries?
tail_probabilities = binom.cdf(k=1, n=9, p=0.2) + binom.sf(k=7, n=9, p=0.2)
print(tail_probabilities)

In [None]:
# Calculate the expected value and the variance from a binomial distribution with parameters n=10 and p=0.25?
mean, variance = binom.stats(n=10, p=0.25)
print(mean)
print(variance)

In [None]:
# Calculating the sample mean:

# Simulation involves generating samples and then measuring. 
# Generate some samples and calculate the sample mean with the describe() method. 
# See what you observe about the sample mean as the number of samples increases.

# Generate a sample of 100 fair coin flips and calculate the sample mean.
num_coin_filp_trials = 1
num_experiments = 100
prob_success = 0.5

sample_of_100_flips = binom.rvs(n=num_coin_filp_trials, p=prob_success, size=num_experiments)
sample_mean_100_flips = describe(sample_of_100_flips).mean
print(sample_mean_100_flips)


# Generate a sample of 1,000 fair coin flips and calculate the sample mean.
num_coin_filp_trials = 1
num_experiments = 1000
prob_success = 0.5

sample_of_1000_flips = binom.rvs(n=num_coin_filp_trials, p=prob_success, size=num_experiments)
sample_mean_1000_flips = describe(sample_of_100_flips).mean
print(sample_mean_1000_flips)


# Sample mean from a generated sample of 2,000 fair coin flips
sample_mean_2000_flips = describe(binom.rvs(n=1, p=0.5, size=2000)).mean
print(sample_mean_2000_flips)



# Checking the result:
# Now try generating some samples and calculating the expected value and variance yourself, 
# then using the method provided by binom to check if the sample values match the theoretical values.

num_coin_filp_trials = 10
num_experiments = 2000
prob_success = 0.3
sample = binom.rvs(n=num_coin_filp_trials, p=prob_success, size=num_experiments)


# Method 1
# Calculate the sample mean and variance from the sample variable
sample_describe = describe(sample)
print(sample_describe)


# Method 2
# Calculate the Theoritical sample mean & sample variance
mean = 10*0.3
variance = mean*0.7

# Method 3
# Calculate the sample mean and variance for 10 coin flips with p=0.3
binom_stats = binom.stats(n=10, p=0.3)
print(binom_stats)


print(sample_describe.mean, sample_describe.variance, mean, variance, binom_stats)

In [None]:
averages = list()
variances = list()

for i in range(0, 1500):
    
    # 10 trials of 10 coin flips with 25% probability of heads
    sample = binom.rvs(n=10, p=0.25, size=10)
    
    # Mean and variance of the values in the sample variable
    averages.append(describe(sample).mean)
    variances.append(describe(sample).variance)
    
# Calculate the mean of the averages variable
print("Mean {}".format(describe(averages).mean))

# Calculate the mean of the variances variable
print("Variance {}".format(describe(variances).mean))

# Calculate the mean and variance
print(binom.stats(n=10, p=0.25))

In [None]:
num_coins_flips_trial = 2
num_experiments = 1000
prob_success = 0.5

sample = binom.rvs(n=num_coins_flips_trial, p=prob_success, size=num_experiments, random_state=1)

# From the provided sample, get the probability of having 2 heads out of the 1,000 trials.
print(find_repeats(sample))
print(find_repeats(sample).counts[2]/1000)

# Calculate the relative frequency from sample, set numbins as 3, and extract frequency.
print(relfreq(sample, numbins=3))
print(relfreq(sample, numbins=3).frequency)

# From the provided sample, get the probability of having 2 heads out of the 1,000 trials.
# Calculate the probability of getting 0, 1, or 2 from a binomial distribution with n=2 and p=0.5.
# Probability of getting 0, 1, or 2 from the distribution

probabilities = binom.pmf([0,1,2], n=2, p=0.5)
print(probabilities)

In [None]:
# Individual probabilities
p_engine_fails = 0.01
p_engine_works = 0.99
p_gear_box_fails = 0.005
p_gear_box_works = 0.995

# Joint probability calculation
p_engineworks_gearboxworks = p_engine_works * p_gear_box_works
p_enginefails_gearboxfails = p_engine_fails * p_gear_box_fails
p_only_gear_box_fails = p_gear_box_fails * p_engine_works
p_only_engine_fails = p_engine_fails * p_gear_box_works

# Calculate result
p_one_fails = p_only_engine_fails + p_only_gear_box_fails
print(p_one_fails)

p_fails_or_works = p_engineworks_gearboxworks + p_enginefails_gearboxfails
print(p_fails_or_works)

In [None]:
# Plotting normal distributions:

# A certain restaurant chain has been collecting data about customer spending. 
# The data shows that the spending is approximately normally distributed, 
# with a mean of $3.15 and a standard deviation of $1.50 per customer.


# Create the sample using norm.rvs()
sample = norm.rvs(loc=3.15, scale=1.5, size=10000, random_state=13)

# Plot the sample
fig = plt.figure(figsize=(8,8))
ax = fig.add_subplot(111)
ax = sns.distplot(sample)
plt.show();

# Probability of spending less than $3
spending = norm.cdf(3, loc=3.15, scale=1.5)
print(spending)

# Probability of spending more than $5
spending = norm.sf(5, loc=3.15, scale=1.5)
print(spending)

# Probability of spending more than $2.15 and less than $4.15
spending_4 = norm.cdf(4.15, loc=3.15, scale=1.5)
spending_2 = norm.cdf(2.15, loc=3.15, scale=1.5)
print(spending_4 - spending_2)

# Probability of spending less than $2.15 or more than $4.15
spending_2 = norm.cdf(2.15, loc=3.15, scale=1.5)
spending_over_4 = norm.sf(4.15, loc=3.15, scale=1.5) 
print(spending_2 + spending_over_4)

In [None]:
# Smartphone battery example

# Suppose the period of time between charges can be modeled with a normal distribution with 
# a mean of 5 hours and a standard deviation of 1.5 hours.
# A friend wants to buy a smartphone and is asking you the following questions.

# Probability that battery will last less than 3 hours
less_than_3h = norm.cdf(3, loc=5, scale=1.5)
print(less_than_3h)

# Probability that battery will last more than 3 hours
more_than_3h = norm.sf(3, loc=5, scale=1.5)
print(more_than_3h)

# Probability that battery will last between 5 and 7 hours
P_less_than_7h = norm.cdf(7, loc=5, scale=1.5)
P_less_than_5h = norm.cdf(5, loc=5, scale=1.5)
print(P_less_than_7h - P_less_than_5h)

In [None]:
# Probability that battery will last between 5 and 7 hours
P_less_than_7h = norm.cdf(7, loc=5, scale=1.5)
P_less_than_5h = norm.cdf(5, loc=5, scale=1.5)
print(P_less_than_7h - P_less_than_5h)

In [None]:
# Adults' heights example

# The heights of adults aged between 18 and 35 years are normally distributed. 
# For males, the mean height is 70 inches with a standard deviation of 4. 
# Adult females have a mean height of 65 inches with a standard deviation of 3.5.

# Values one standard deviation from mean height for females
interval = norm.interval(0.68, loc=65, scale=3.5)
print(interval)

# Value where the tallest males fall with 0.01 probability
tallest = norm.interval(0.99, loc=70, scale=4)
print(tallest)

# Value where the tallest males fall with 0.01 probability
tallest = norm.ppf(0.99, loc=70, scale=4)
print(tallest)

# Probability of being taller than 73 inches for males and females
P_taller_male = norm.sf(73, loc=70, scale=4)
P_taller_female = norm.sf(73, loc=65, scale=3.5)
print(P_taller_male, P_taller_female)

# Probability of being shorter than 61 inches for males and females
P_shorter_male = norm.cdf(61, loc=70, scale=4)
P_shorter_female = norm.cdf(61, loc=65, scale=3.5)
print(P_shorter_male, P_shorter_female)

In [None]:
# ATM example

# If you know how many specific events occurred per unit of measure, you can assume that the 
# distribution of the random variable follows a Poisson distribution to study the phenomenon.

# Consider an ATM (automatic teller machine) at a very busy shopping mall. 
# The bank wants to avoid making customers wait in line to use the ATM. 
# It has been observed that the average number of customers making withdrawals between 
# 10:00 a.m. and 10:05 a.m. on any given day is 1.

# As a data analyst at the bank, you are asked what the probability is that the bank will 
# need to install another ATM to handle the load.

# To answer the question, you need to calculate the probability of getting more than one 
# customer during that time period.

# Probability of more than 1 customer
probability = poisson.pmf(k=1, mu=1)

# Print the result
print(probability)


# Probability of more than 1 customer
probability = poisson.sf(k=1, mu=1)

# Print the result
print(probability)

In [None]:
# Highway accidents example

# On a certain turn on a very busy highway, there are 2 accidents per day.
# Let's assume the number of accidents per day can be modeled as a Poisson 
# random variable and is distributed as in the following plot:

# Probability of 5 accidents any day
P_five_accidents = poisson.pmf(k=5, mu=2)
print(P_five_accidents)

# Probability of having 4 or 5 accidents on any day
P_less_than_6 = poisson.cdf(k=5, mu=2)
P_less_than_4 = poisson.cdf(k=3, mu=2)
print(P_less_than_6 - P_less_than_4)

# Probability of more than 3 accidents any day
P_more_than_3 = poisson.sf(k=3, mu=2)
print(P_more_than_3)

# Number of accidents with 0.75 probability
accidents = poisson.ppf(q=0.857123460498547, mu=2)
print(accidents)

In [None]:
# Generating and plotting Poisson distributions
sample = poisson.rvs(mu=2, size=10000, random_state=13)

# Plot the sample
fig = plt.figure(figsize=(8,8))
ax = fig.add_subplot(111)
sns.distplot(sample, bins = 10, kde=False)
plt.show()

In [None]:
# Bears can eat 18 salmon in 3 hours, and they have a 0.0333 probability of success in their 
# attempts to catch a fish.

# Getting a salmon on the third attempt
probability = geom.pmf(k=3, p=0.0333)
print(probability)

# Probability of getting a salmon in less than 5 attempts
probability = geom.cdf(k=4, p=0.0333)
print(probability)

# Probability of getting a salmon in less than 21 attempts
probability = geom.cdf(k=20, p=0.0333)
print(probability)

# Attempts for 0.9 probability of catching a salmon
attempts = geom.ppf(q=0.9, p=0.0333)
print(attempts)

In [None]:
# Free throws example

# Suppose you know that a basketball player has a 0.3 probability of scoring a free throw. 
# What is the probability of them missing with the first throw and scoring with the second?

# Probability of missing first and scoring on second throw
probability = geom.pmf(k=2, p=0.3)

# Print the result
print(probability)

In [None]:
# Generating and plotting geometric distributions
# Create the sample

sample = geom.rvs(p=0.3, size=10000, random_state=13)

# Plot the sample
fig = plt.figure(figsize=(8,8))
ax = fig.add_subplot(111)
sns.distplot(sample, bins = np.linspace(0,20,21), kde=False)
plt.show()

In [None]:
# Generating a sample

# A hospital's planning department is investigating different treatments for newborns. 
# As a data scientist you are hired to simulate the sex of 250 newborn children, 
# and you are told that on average 50.50% are males.

# Generate a sample of 250 newborn children
sample = binom.rvs(n=1, p=0.505, size=250, random_state=42)
mean_sample_size_10 = describe(sample[0:10]).mean

averages = list()
for i in range(2, 251):
    averages.append(describe(sample[0:i]).mean)

# Add population mean line and sample mean plot
fig = plt.figure(figsize=(20,10))
ax1 = fig.add_subplot(111)
ax1.axhline(binom.mean(n=1, p=0.505), color='red')
ax1.plot(averages, '-')
ax1.legend(("Population mean","Sample mean"), loc='upper right')
plt.show();

In [None]:
# Sample means

# An important result in probability and statistics is that the shape of the distribution of the 
# means of random variables tends to a normal distribution, which happens when you add random variables 
# with any distribution with the same expected value and variance.

# Binomial Distribution
# -----------------------
population = binom.rvs(n=10, p=0.5, size=1000, random_state=42)
sample = np.random.choice(population, 20)

# Example
population = binom.rvs(n=10, p=0.5, size=1000, random_state=42)

sample_means = []
for _ in range(1500):
    sample = np.random.choice(population, 20)
    sample_means.append(describe(sample).mean)
    
    
# Plot the histogram
fig = plt.figure(figsize=(8,4))
ax1 = fig.add_subplot(111)
ax1.hist(sample_means)
ax1.set_xlabel("Sample mean values")
ax1.set_xlabel("Frequency")
plt.show()



# Geometric Distribution
# -----------------------
# Generate the population
population = geom.rvs(p=0.5, size=1000)

# Create list for sample means
sample_means = []
for _ in range(3000):
    sample = np.random.choice(population, 20)
    sample_means.append(describe(sample).mean)

# Plot the histogram
fig = plt.figure(figsize=(8,4))
ax1 = fig.add_subplot(111)
ax1.hist(sample_means)
ax1.set_xlabel("Sample mean values")
ax1.set_xlabel("Frequency")
plt.show()



# Poisson Distribution
# -----------------------
# Generate the population
population = poisson.rvs(mu=2, size=1000)

# Create list for sample means
sample_means = []
for _ in range(3000):
    sample = np.random.choice(population, 20)
    sample_means.append(describe(sample).mean)

# Plot the histogram
fig = plt.figure(figsize=(8,4))
ax1 = fig.add_subplot(111)
ax1.hist(sample_means)
ax1.set_xlabel("Sample mean values")
ax1.set_xlabel("Frequency")
plt.show()

In [None]:
# Adding dice rolls

# To illustrate the central limit theorem, we are going to work with dice rolls. 
# We'll generate the samples and then add them to plot the outcome.

# Configure random generator
np.random.seed(42)

# Dice Roll Sample Generator
sample_a = [np.random.randint(1,7) for i in range(2000)]
sample_b = [np.random.randint(1,7) for i in range(2000)]
sample_c = [np.random.randint(1,7) for i in range(2000)]

sum_of_samples_a_b = np.add(sample_a, sample_b)
sum_of_samples_a_b_c = np.add(sum_of_samples_a_b, sample_c)

# Plot the histogram
fig = plt.figure(figsize=(8,4))
ax1 = fig.add_subplot(111)
ax1.hist(sample_a, bins=range(1, 8), width=.9)
plt.show();


# Plot the histogram
fig = plt.figure(figsize=(8,4))
ax1 = fig.add_subplot(111)
ax1.hist(sum_of_samples_a_b, bins=range(2, 14), width=.9)
plt.show();


# Plot the histogram
fig = plt.figure(figsize=(8,4))
ax1 = fig.add_subplot(111)
ax1.hist(sum_of_samples_a_b_c, bins=range(3, 20), width=.9)
plt.show();

In [None]:
# Fitting a model

# A university has provided you with data that shows a relationship between the hours of study 
# and the scores that the students get on a given test.

# You have access to the data through the variables hours_of_study and scores. 
# Use a linear model to learn from the data.


hours_of_study = [num*(random.randint(75,99)/100) for num in range(4, 25, 1)]
scores = [num*(random.randint(75,99)/100) for num in range(35, 98, 3)]
plt.scatter(hours_of_study, scores);


# Get the model parameters
slope, intercept, r_value, p_value, std_err = linregress(hours_of_study, scores)

# Print the linear model parameters
print('slope:', slope)
print('intercept:', intercept)

In [None]:
hours_of_study_a = [num*(random.randint(75,99)/100) for num in range(4, 25, 1)]
scores_a = [num*(random.randint(75,99)/100) for num in range(35, 98, 3)]
plt.scatter(hours_of_study_a, scores_a);

hours_of_study_b = [num*(random.randint(10,99)/100) for num in range(4, 25, 1)]
scores_b = [num*(random.randint(10,99)/100) for num in range(35, 98, 3)]
plt.scatter(hours_of_study_b, scores_b);

dict = {'hours': hours_of_study_a, 'scores': scores_a}  
df_a = pd.DataFrame(dict)

dict = {'hours': hours_of_study_b, 'scores': scores_b}  
df_b = pd.DataFrame(dict) 

In [None]:
# create object for the class
linear_regressor = LinearRegression() 

# values converts it into a numpy array
# -1 means that calculate the dimension of rows, but have 1 column
hours_a = df_a.iloc[:, 0].values.reshape(-1, 1)  
scores_a = df_a.iloc[:, 1].values.reshape(-1, 1)  


# perform linear regression
model_a = linear_regressor.fit(hours_a, scores_a)

# make predictions
scores_a_pred = model_a.predict(hours_a)

# Scatterplot of hours of study and test scores
plt.scatter(df_a.hours, df_a.scores)

# Plot of hours_of_study_values_A and predicted values
plt.plot(df_a.hours, scores_a_pred)
plt.title("Model A", fontsize=12)
plt.show();

# Calculate the residuals
residuals_a = model_a.predict(hours_a) - scores_a

# Make a scatterplot of residuals of model_A
plt.scatter(hours_a, residuals_a)

# Add reference line and title and show plot
plt.hlines(0, 0, 30, colors='r', linestyles='--')
plt.title("Residuals plot of Model A", fontsize=12)
plt.show()

In [None]:
# create object for the class
linear_regressor = LinearRegression() 


# type(df_b.iloc[:, 0]) => pandas.core.series.Series
# type(df_b.iloc[:, 0].values) => numpy.ndarray => (21,)
# type(df_b.iloc[:, 0].values.reshape(-1, 1)) => numpy.ndarray => (21, 1)
hours_b = df_b.iloc[:, 0].values.reshape(-1, 1)
scores_b = df_b.iloc[:, 1].values.reshape(-1, 1)

model_b = linear_regressor.fit(hours_b, scores_b)  # perform linear regression
scores_b_pred = linear_regressor.predict(hours_b)  # make predictions

# Scatterplot of hours of study and test scores
plt.scatter(df_b.hours, df_b.scores)

# Plot of hours_of_study_values_A and predicted values
plt.plot(df_b.hours, scores_b_pred)
plt.title("Model B", fontsize=12)
plt.show();


# Calculate the residuals
residuals_b = model_b.predict(hours_b) - scores_b

# Make a scatterplot of residuals of model_A
plt.scatter(hours_b, residuals_b)

# Add reference line and title and show plot
plt.hlines(0, 0, 30, colors='r', linestyles='--')
plt.title("Residuals plot of Model B", fontsize=12)
plt.show()

In [None]:
# Fitting a logistic model

hours_of_study = [int(num*(random.randint(75,99)/100)) for num in range(4, 25, 1)]
scores = [int(num*(random.randint(35,99)/100)) >= 35 for num in range(35, 98, 3)]
plt.scatter(hours_of_study, scores)

dict = {'hours': hours_of_study, 'scores': scores}  
df = pd.DataFrame(dict)

hours_of_study = df.iloc[:, 0].values.reshape(-1, 1)
scores = df.iloc[:, 1].values.reshape(-1, 1)


# sklearn logistic model
model_a = LogisticRegression(C=1e9)
fit_model_a = model_a.fit(hours_of_study, scores)

# Get parameters
slope_beta_1 = fit_model_a.coef_[0][0]
intercept_beta_0 = fit_model_a.intercept_[0]

# Print parameters
print(slope_beta_1, intercept_beta_0)


# Specify values to predict
hours_of_study_test = [[10], [11], [12], [13], [14]]

# Pass values to predict
predicted_outcomes = fit_model_a.predict(hours_of_study_test)
print(predicted_outcomes)


# Set value in array
value = np.asarray(11).reshape(-1,1)

# Probability of passing the test with 11 hours of study
print("Probability of passing test ", fit_model_a.predict_proba(value)[:,1])

# Specify values to predict
hours_of_study_test_a = [[6], [7], [8], [9], [10]]

# Pass values to predict
predicted_outcomes_a = model_a.predict(hours_of_study_test_a)
print(predicted_outcomes_a)

In [None]:
hours_of_study_b = [int(num*(random.randint(75,99)/100)) for num in range(4, 25, 1)]
scores_b = [int(num*(random.randint(35,99)/100)) >= 35 for num in range(35, 98, 3)]
plt.scatter(hours_of_study_b, scores_b);

_dict = {'hours': hours_of_study_b, 'scores': scores_b}  
df_b = pd.DataFrame(_dict)

hours_of_study_b = df_b.iloc[:, 0].values.reshape(-1, 1)
scores_b = df_b.iloc[:, 1].values.reshape(-1, 1)

# sklearn logistic model
model_b = LogisticRegression(C=1e9)
fit_model_b = model_b.fit(hours_of_study_b, scores_b)

# Specify values to predict
hours_of_study_test_b = [[3], [4], [5], [6]]

# Pass values to predict
predicted_outcomes_b = fit_model_b.predict(hours_of_study_test_b)
print(predicted_outcomes_b)

In [None]:
# Set value in array
value_A = np.asarray(8.6).reshape(-1,1)
# Probability of passing test A with 8.6 hours of study
print("The probability of passing test A with 8.6 hours of study is ", fit_model_a.predict_proba(value_A)[:,1])

# Set value in array
value_B = np.asarray(4.7).reshape(-1,1)
# Probability of passing test B with 4.7 hours of study
print("The probability of passing test B with 4.7 hours of study is ", fit_model_b.predict_proba(value_B)[:,1])

# Print the hours required to have 0.5 probability on model_A
print("Minimum hours of study for test A are ", -fit_model_a.intercept_/fit_model_a.coef_)

# Print the hours required to have 0.5 probability on model_B
print("Minimum hours of study for test B are ", -fit_model_b.intercept_/fit_model_b.coef_)

# Probability calculation for each value of study_hours
prob_passing_a = fit_model_a.predict_proba(hours_of_study.reshape(-1,1))[:,1]
prob_passing_b = fit_model_b.predict_proba(hours_of_study_b.reshape(-1,1))[:,1]
prob_passing_a_and_b = prob_passing_a * prob_passing_b
prob_passing_a_and_b

# Maximum probability value
max_prob = max(prob_passing_a_and_b)
max_prob

# Position where we get the maximum value
max_position = np.where(prob_passing_a_and_b == max_prob)[0][0]
max_position

float(hours_of_study_b[max_position])
float(max_prob)

# Study hours for each test
print("Study {:1.0f} hours for the first and {:1.0f} hours for the second test and you will pass both tests with {:01.2f} probability.".format(
            float(hours_of_study[max_position]), 
            float(hours_of_study_b[max_position]), 
            float(max_prob))
)