In [7]:
import numpy as np

def generate_idds(mean, variance, num_samples):
  """
  Generates IDs (presumably Income Disparity) with a normal distribution.

  Args:
      mean: The mean value of the normal distribution.
      variance: The variance of the normal distribution.
      num_samples: The number of IDs to generate.

  Returns:
      A list of generated IDs.
  """

  # Generate random samples from the normal distribution
  samples = np.random.normal(loc=mean, scale=np.sqrt(variance), size=num_samples)


  return samples.tolist()

# Example usage
mean = 0  # Adjust the mean as needed
variance = 1  # Adjust the variance as needed
num_samples = 20

idds = generate_idds(mean, variance, num_samples)

# print("Generated IDs:", idds)



In [8]:
np.mean(idds)

-0.044852979131291805

In [9]:
import numpy as np

def generate_binomial_idds(p, num_samples):
  """
  Generates IDs (Income Disparity) with a binomial distribution, representing two income groups.

  Args:
      p: The probability of belonging to the "higher income" group.
      num_samples: The number of IDs to generate.

  Returns:
      A list of generated IDs, where 0 represents "lower income" and 1 represents "higher income".
  """

  # Generate random samples from the binomial distribution
  samples = np.random.binomial(n=1, p=p, size=num_samples)

  return samples.tolist()

# Example usage
p = 0.3  # Probability of belonging to the "higher income" group
num_samples = 100

idds = generate_binomial_idds(p, num_samples)

print("Generated IDs:", idds)
print("0 = lower income, 1 = higher income")


Generated IDs: [0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1]
0 = lower income, 1 = higher income


In [2]:
from sklearn.datasets import load_iris

# Load the Iris dataset
iris = load_iris()

# Access data features (4 features for sepal and petal length/width)
X = iris.data

# Access target labels (species)
y = iris.target

# Print the first few data points and labels
print(X[:5])
print(y[:5])

[[5.1 3.5 1.4 0.2]
 [4.9 3.  1.4 0.2]
 [4.7 3.2 1.3 0.2]
 [4.6 3.1 1.5 0.2]
 [5.  3.6 1.4 0.2]]
[0 0 0 0 0]


In [3]:
iris.feature_names

['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']

numpy.ndarray