### How do you calculate the mean, median, and mode of a dataset

In [None]:
import numpy as np
from scipy import stats

data = [12, 15, 12, 18, 19, 12, 14, 17]
mean = np.mean(data)
median = np.median(data)
mode = stats.mode(data, keepdims=True)
print(f"Mean: {mean}, Median: {median}, Mode: {mode.mode[0]}")

### Write a Python program to compute the variance and standard deviation of a dataset

In [None]:
variance = np.var(data)
std_deviation = np.std(data)
print(f"Variance: {variance}, Standard Deviation: {std_deviation}")

### Create a dataset and classify it into nominal, ordinal, interval, and ratio types

In [None]:
dataset = {
    "Nominal": ["Red", "Blue", "Green"],
    "Ordinal": ["Low", "Medium", "High"],
    "Interval": [10, 20, 30],  # temperature in Celsius
    "Ratio": [5.0, 10.0, 15.0]  # weight in kg
}
print(dataset)

### Implement sampling techniques like random sampling and stratified sampling

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split

df = pd.DataFrame({'value': range(100), 'group': ['A']*50 + ['B']*50})
random_sample = df.sample(n=10)
stratified_sample = df.groupby('group', group_keys=False).apply(lambda x: x.sample(5))
print(random_sample)
print(stratified_sample)

### Write a Python function to calculate the range of a dataset

In [None]:
def calc_range(data):
    return max(data) - min(data)

print("Range:", calc_range(data))

### Create a dataset and plot its histogram to visualize skewness

In [None]:
import matplotlib.pyplot as plt
plt.hist(data, bins=10)
plt.title('Histogram')
plt.show()

### Calculate skewness and kurtosis of a dataset using Python libraries

In [None]:
print("Skewness:", stats.skew(data))
print("Kurtosis:", stats.kurtosis(data))

### Generate a dataset and demonstrate positive and negative skewness

In [None]:
from scipy.stats import skewnorm
positive_skew = skewnorm.rvs(a=10, size=1000)
negative_skew = skewnorm.rvs(a=-10, size=1000)
plt.hist(positive_skew, bins=30, alpha=0.7, label='Positive Skew')
plt.hist(negative_skew, bins=30, alpha=0.7, label='Negative Skew')
plt.legend()
plt.show()

### Write a Python script to calculate covariance between two datasets

In [None]:
x = [2, 4, 6, 8]
y = [1, 3, 2, 5]
cov_matrix = np.cov(x, y)
print("Covariance matrix:\n", cov_matrix)

### Write a Python script to calculate the correlation coefficient between two datasets

In [None]:
correlation = np.corrcoef(x, y)
print("Correlation coefficient matrix:\n", correlation)

### Create a scatter plot to visualize the relationship between two variables

In [None]:
plt.scatter(x, y)
plt.title('Scatter Plot')
plt.xlabel('X')
plt.ylabel('Y')
plt.show()

### Implement and compare simple random sampling and systematic sampling

In [None]:
import numpy as np

# Simple random
simple_random = df.sample(n=10)
# Systematic sampling
interval = len(df) // 10
systematic_sample = df.iloc[::interval]
print("Simple Random:\n", simple_random)
print("Systematic:\n", systematic_sample)

### Calculate the mean, median, and mode of grouped data

In [None]:
grouped_data = {10: 3, 20: 5, 30: 2}  # value: frequency
values = []
for val, freq in grouped_data.items():
    values.extend([val]*freq)
print(f"Mean: {np.mean(values)}, Median: {np.median(values)}, Mode: {stats.mode(values, keepdims=True).mode[0]}")

### Simulate data using Python and calculate its central tendency and dispersion

In [None]:
simulated = np.random.normal(50, 10, 1000)
print(f"Mean: {np.mean(simulated)}, Std: {np.std(simulated)}")

### Use NumPy or pandas to summarize a dataset’s descriptive statistics

In [None]:
df = pd.DataFrame({'data': simulated})
print(df.describe())

### Plot a boxplot to understand the spread and identify outliers

In [None]:
plt.boxplot(simulated)
plt.title('Boxplot')
plt.show()

### Calculate the interquartile range (IQR) of a dataset

In [None]:
Q1 = np.percentile(simulated, 25)
Q3 = np.percentile(simulated, 75)
IQR = Q3 - Q1
print("IQR:", IQR)

### Implement Z-score normalization and explain its significance

In [None]:
z_scores = (simulated - np.mean(simulated)) / np.std(simulated)
print("Z-scores (first 10):", z_scores[:10])

### Compare two datasets using their standard deviations

In [None]:
dataset1 = np.random.normal(50, 5, 1000)
dataset2 = np.random.normal(50, 15, 1000)
print("Std Dev 1:", np.std(dataset1), "Std Dev 2:", np.std(dataset2))

### Write a Python program to visualize covariance using a heatmap

In [None]:
import seaborn as sns

df_cov = pd.DataFrame({'x': x, 'y': y})
sns.heatmap(df_cov.cov(), annot=True)
plt.title("Covariance Heatmap")
plt.show()

### Use seaborn to create a correlation matrix for a dataset

In [None]:
sns.heatmap(df_cov.corr(), annot=True)
plt.title("Correlation Matrix")
plt.show()

### Generate a dataset and implement both variance and standard deviation computations

In [None]:
generated = np.random.randint(0, 100, size=100)
print("Variance:", np.var(generated), "Standard Deviation:", np.std(generated))

### Visualize skewness and kurtosis using Python libraries like matplotlib or seaborn

In [None]:
sns.histplot(generated, kde=True)
plt.title('Distribution with KDE')
plt.show()
print("Skewness:", stats.skew(generated))
print("Kurtosis:", stats.kurtosis(generated))

### Implement the Pearson and Spearman correlation coefficients for a dataset

In [None]:
from scipy.stats import pearsonr, spearmanr

pearson_corr, _ = pearsonr(x, y)
spearman_corr, _ = spearmanr(x, y)
print(f"Pearson: {pearson_corr}, Spearman: {spearman_corr}")