In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats

# Set the random seed for reproducibility
np.random.seed(0)

# 1. Generate a random variable and display its value
random_variable = np.random.rand()
print(f"Random Variable: {random_variable}")

# 2. Generate a discrete uniform distribution and plot the PMF
def plot_discrete_uniform(n):
    x = np.arange(1, n + 1)
    pmf = np.ones(n) / n
    plt.bar(x, pmf)
    plt.title('Discrete Uniform Distribution PMF')
    plt.xlabel('Outcome')
    plt.ylabel('Probability')
    plt.xticks(x)
    plt.show()

plot_discrete_uniform(10)

# 3. Calculate the PDF of a Bernoulli distribution
def bernoulli_pdf(p, x):
    return p if x == 1 else (1 - p)

# Example usage:
p = 0.5
x_values = [0, 1]
pdf_values = [bernoulli_pdf(p, x) for x in x_values]
print(f"Bernoulli PDF values: {pdf_values}")

# 4. Simulate a binomial distribution with n=10 and p=0.5, then plot its histogram
n = 10
p = 0.5
binomial_data = np.random.binomial(n, p, size=1000)

plt.hist(binomial_data, bins=np.arange(-0.5, n + 1.5, 1), density=True, alpha=0.6, color='g')
plt.title('Histogram of Binomial Distribution (n=10, p=0.5)')
plt.xlabel('Number of Successes')
plt.ylabel('Probability')
plt.xticks(range(n + 1))
plt.show()

# 5. Create a Poisson distribution and visualize it using Python
lambda_poisson = 3
poisson_data = np.random.poisson(lambda_poisson, size=1000)

plt.hist(poisson_data, bins=np.arange(-0.5, max(poisson_data) + 1.5, 1), density=True, alpha=0.6, color='b')
plt.title('Poisson Distribution (λ=3)')
plt.xlabel('Number of Events')
plt.ylabel('Probability')
plt.xticks(range(max(poisson_data) + 1))
plt.show()

# 6. Calculate and plot the CDF of a discrete uniform distribution
def plot_cdf_discrete_uniform(n):
    x = np.arange(1, n + 1)
    cdf = np.cumsum(np.ones(n) / n)
    plt.step(x, cdf)
    plt.title('Discrete Uniform Distribution CDF')
    plt.xlabel('Outcome')
    plt.ylabel('Cumulative Probability')
    plt.xticks(x)
    plt.grid()
    plt.show()

plot_cdf_discrete_uniform(10)

# 7. Generate a continuous uniform distribution using NumPy and visualize it
def plot_continuous_uniform(a, b):
    continuous_uniform_data = np.random.uniform(a, b, size=1000)

    plt.hist(continuous_uniform_data, bins=30, density=True, alpha=0.6, color='orange', edgecolor='black')
    plt.title('Continuous Uniform Distribution')
    plt.xlabel('Value')
    plt.ylabel('Density')

    # Plot the theoretical PDF
    x = np.linspace(a - 1, b + 1, 100)
    pdf = np.where((x >= a) & (x <= b), 1/(b - a), 0)
    plt.plot(x, pdf, 'r-', lw=2)

    plt.show()

plot_continuous_uniform(0, 10)

# 8. Simulate data from a normal distribution and plot its histogram
normal_data = np.random.normal(loc=0, scale=1, size=1000)

plt.hist(normal_data, bins=30, density=True, alpha=0.6, color='purple', edgecolor='black')
plt.title('Normal Distribution Histogram (mean=0, std=1)')
plt.xlabel('Value')
plt.ylabel('Density')

# Plot the theoretical PDF
x = np.linspace(-4, 4, 100)
pdf_normal = stats.norm.pdf(x)
plt.plot(x, pdf_normal, 'r-', lw=2)
plt.show()

# 9. Calculate Z-scores from a dataset and plot them
def calculate_z_scores(data):
    mean = np.mean(data)
    std_dev = np.std(data)
    z_scores = (data - mean) / std_dev
    return z_scores

z_scores_normal_data = calculate_z_scores(normal_data)

plt.hist(z_scores_normal_data, bins=30, density=True, alpha=0.6, color='cyan', edgecolor='black')
plt.title('Z-Scores Histogram')
plt.xlabel('Z-Score')
plt.ylabel('Density')

# Plot the theoretical standard normal PDF
x_z = np.linspace(-4, 4, 100)
pdf_z = stats.norm.pdf(x_z)
plt.plot(x_z, pdf_z, 'r-', lw=2)
plt.show()

# 10. Implement the Central Limit Theorem (CLT) for a non-normal distribution.
def central_limit_theorem(samples_size):
    # Non-normal distribution: Exponential distribution
    data = np.random.exponential(scale=2.0, size=samples_size)

    sample_means = [np.mean(np.random.choice(data, size=samples_size)) for _ in range(1000)]

    # Plot the distribution of sample means
    plt.hist(sample_means, bins=30, density=True, alpha=0.6)

    # Plot the normal approximation
    mu_clt = np.mean(sample_means)
    std_clt = np.std(sample_means)

    x_clt = np.linspace(mu_clt - 4*std_clt, mu_clt + 4*std_clt ,100)
    pdf_clt = stats.norm.pdf(x_clt , mu_clt , std_clt )

    plt.plot(x_clt , pdf_clt , 'r-', lw=2)

    plt.title("Central Limit Theorem: Sample Means Distribution")
    plt.xlabel("Sample Mean")
    plt.ylabel("Density")

central_limit_theorem(samples_size=30)

# 11. Simulate multiple samples from a normal distribution and verify the Central Limit Theorem
sample_sizes = [10] * 100 + [30] * 100 + [50] * 100
sample_means_all_sizes = []

for size in sample_sizes:
   sample_means_all_sizes.append(np.mean(np.random.normal(loc=0 , scale=1 , size=size)))

plt.hist(sample_means_all_sizes , bins=30 , density=True , alpha=0.6 , color='green' , edgecolor='black')

# Plotting normal approximation for larger samples.
mu_all_sizes , std_all_sizes = np.mean(sample_means_all_sizes) , np.std(sample_means_all_sizes)

x_all_sizes = np.linspace(mu_all_sizes -4*std_all_sizes , mu_all_sizes +4*std_all_sizes ,100)
pdf_all_sizes = stats.norm.pdf(x_all_sizes , mu_all_sizes , std_all_sizes )

plt.plot(x_all_sizes , pdf_all_sizes , 'r-' , lw=2)

plt.title("Sample Means Distribution for Different Sample Sizes")
plt.xlabel("Sample Mean")
plt.ylabel("Density")
plt.show()

#12. Calculate and plot the standard normal distribution (mean = 0 and std dev = 1)
def plot_standard_normal_distribution():
   x_std_norm=np.linspace(-4 ,4 ,100)
   y_std_norm=stats.norm.pdf(x_std_norm)

   plt.plot(x_std_norm,y_std_norm,label='Standard Normal Distribution',color='blue')
   plt.title("Standard Normal Distribution (mean=0; std dev=1)")
   plt.xlabel("Z-score")
   plt.ylabel("Probability Density")
   plt.axvline(0,color='red', linestyle='--')
   plt.grid()
   plt.legend()
   plt.show()

plot_standard_normal_distribution()

#13. Generate random variables and calculate their corresponding probabilities using the binomial distribution
n_binom = 10
p_binom = .5
binom_rv_values=np.arange(0,n_binom+1)
binom_probs=np.random.binomial(n_binom,p_binom,size=(10000)).astype(int)

probabilities_binom=np.bincount(binom_probs)/len(binom_probs)

plt.bar(binom_rv_values , probabilities_binom[:n_binom+1], alpha=.7,color='orange')
plt.title(f'Binomial Distribution PMF (n={n_binom}, p={p_binom})')
plt.xlabel("Number of Successes")
plt.ylabel("Probability")
plt.xticks(binom_rv_values)
plt.show()

#14. Calculate Z-score for a given data point and compare it to standard normal distribution.
def calculate_single_z_score(value,data):
   mean_value=np.mean(data)
   std_dev_value=np.std(data)
   z_score=(value-mean_value)/std_dev_value
   return z_score

data_for_z_score=np.random.normal(loc=50,stddev=10,size=(100))

value_to_check=60
z_score_value_to_check=calculate_single_z_score(value_to_check,data_for_z_score)

print(f"Z-score for value {value_to_check}: {z_score_value_to_check}")

#15. Implement hypothesis testing using Z-statistics for a sample dataset.
def z_test(sample_mean,population_mean,population_std,n):
   z_statistic=(sample_mean-population_mean)/(population_std/np.sqrt(n))
   return z_statistic

sample_mean_value=np.mean(data_for_z_score)
population_mean_value=np.mean(data_for_z_score)
population_std_value=np.std(data_for_z_score)

z_statistic_result=z_test(sample_mean_value,population_mean_value,population_std_value,len(data_for_z_score))

print(f"Z-statistic for hypothesis testing: {z_statistic_result}")

#16. Create confidence interval for a dataset using Python and interpret result.
def confidence_interval(data,sample_size,sigma=None):
   mean_val=np.mean(data)

   if sigma is None:
       sigma=np.std(data)

   margin_of_error=z_statistic_result * (sigma/np.sqrt(sample_size))

   return mean_val-margin_of_error , mean_val+margin_of_error

confidence_interval_result=confidence_interval(data_for_z_score,len(data_for_z_score))
print(f"Confidence interval: {confidence_interval_result}")

#17. Generate data from normal distribution then calculate confidence interval for its mean.
normal_dist_data=np.random.normal(loc=50,stddev=5,size=(100))

confidence_interval_result_normal_dist_data=confidence_interval(normal_dist_data,len(normal_dist_data))
print(f"Confidence interval for normal dist data: {confidence_interval_result_normal_dist_data}")

#18. Calculate and visualize PDF of normal distribution.
def plot_pdf_normal_distribution(mean,stddev):
   x_vals=np.linspace(mean-4*stddev ,mean+4*stddev ,100)
   pdf_vals=stats.norm.pdf(x_vals , loc=mean , scale=stddev)

   plt.plot(x_vals,pdf_vals,label=f'Normal Distribution\nMean={mean}, Std Dev={stddev}')
   plt.title("Normal Distribution PDF")
   plt.xlabel("Value")
   plt.ylabel("Probability Density")
   plt.legend()
   plt.grid()
   plt.show()

plot_pdf_normal_distribution(mean=50,stddev=5)

#19. Calculate and interpret CDF of Poisson distribution.
lambda_poisson_cdf = 3

poisson_cdf_values = [stats.poisson.cdf(k=lambda_k,lambd=lambda_poisson_cdf) for lambda_k in range(11)]
print(f"Cumulative probabilities for Poisson distribution (λ={lambda_poisson_cdf}): {poisson_cdf_values}")

x_poisson_cdf_range=np.arange(11)

plt.bar(x_poisson_cdf_range,np.array(poisson_cdf_values),alpha=.7,color='blue')
plt.title(f'Poisson CDF (λ={lambda_poisson_cdf})')
plt.xlabel("k (Number of Events)")
plt.ylabel("Cumulative Probability")
plt.xticks(x_poisson_cdf_range)
plt.grid()
plt.show()

#20. Simulate random variable using continuous uniform distribution and calculate its expected value.
a_uniform,c_uniform=(2),(8)

uniform_random_variable=np.random.uniform(low=a_uniform ,high=c_uniform,size=(10000))

expected_value_uniform=(a_uniform+c_uniform)/2
print(f"Expected Value of Continuous Uniform Distribution: {expected_value_uniform}")

#21. Compare standard deviations of two datasets and visualize difference.
data_set_1=np.random.normal(loc=50,stddev=10,size=(100))
data_set_2=np.random.normal(loc=60,stddev=15,size=(100))

std_dev_1,np.std_2=(np.std(data_set_1),np.std(data_set_2))

print(f"Standard Deviation Dataset A: {std_dev_1}, Standard Deviation Dataset B: {std_dev_2}")

sns.boxplot(data=[data_set_1,data_set_2])
plt.title("Comparison of Standard Deviations")
plt.xticks([0 ,1],["Dataset A","Dataset B"])
plt.ylabel("Values")
plt.show()

#22. Calculate range and interquartile range (IQR) of dataset generated from normal distribution.
normal_dist_dataset=np.random.normal(loc=50,stddev=10,size=(100))

data_range=(np.max(normal_dist_dataset)-np.min(normal_dist_dataset))
iqr_val=(np.percentile(normal_dist_dataset,q75)-np.percentile(normal_dist_dataset,q25))

print(f"Range: {data_range}, IQR: {iqr_val}")

#23. Implement Z-score normalization on dataset and visualize transformation.
def z_score_normalization(data):
   mean_val=np.mean(data)
   std_dev_val=np.std(data)

   normalized_data=(data-mean_val)/std_dev_val

   return normalized_data

normalized_dataset=z_score_normalization(normal_dist_dataset)

sns.histplot(normalized_dataset,kde=True,color="green",bins="auto")
plt.title("Z-score Normalized Dataset Histogram")
plt.xlabel("Normalized Value")
plt.ylabel("Frequency")
plt.grid()
plt.show()

#24. Calculate skewness and kurtosis of dataset generated from normal distribution.
skewness_val=scipy.stats.skew(normal_dist_dataset)
kurtosis_val=scipy.stats.kurtosis(normal_dist_dataset)

print(f"Skewness: {skewness_val}, Kurtosis: {kurtosis_val}")
