In [5]:
import math
import matplotlib.pyplot as plt

In [14]:
class Gaussian():
  """Gaussian distribution class for calculating and 
  visualization a Gaussian distribution

  Attributes:
    mean (float) representing the mean value of the distribution
    stdev (float) representing the standard deviation of the distribution
    data_list (list of floats) a list of floats extracted from the data file

  """
  def __init__(self, mu = 0, sigma = 1):
    self.mean = mu
    self.stdev = sigma
    self.data = []

  def calculate_mean(self):
    """Function to calculate the mean of the dataset

    Args: None

    Returns:
      float: mean of the dataset

    """

    self.mean = 1.0 * sum(self.data) / len(self.data)

    return self.mean

  def calculate_stdev(self, sample=True):
    """Function to calculate the standard deviation of the dataset

    Args: 
      sample (bool): whether the data represents a sample or population
    
    Returns: 
      float: standard deviation of the dataset

    """
    if sample:
      n = len(self.data) - 1
    else:
      n = len(self.data)
    
    mean = self.mean

    sigma = 0

    for d in self.data:
      sigma += (d - mean) ** 2
    
    sigma = math.sqrt(sigma / n)

    self.stdev = sigma

    return self.stdev

  def read_data_file(self, file_name, sample=True):
    """Function to read in data from a text file.
    The text file should have one number (float) per line. 
    The numbers are stored in the data attribute.
    After reading in the file, the mean and stdev are calculated.
    
    Args: 
      file_name (str): name of a file to read from

    Returns:
      None

    """

    with open(file_name) as file:
      data_list = []
      line = file.readline()
      while line:
        data_list.append(int(line))
        line = file.readline()
    file.close()

    self.data = data_list
    self.mean = self.calculate_mean()
    self.stdev = self.calculate_stdev(sample)

  def plot_histogram(self):
    """Function to output a histogram of the instance 
    variable data using matplotlib pyplot library

    Args:
      None

    Returns:
      None
    
    """
    plt.hist(self.data)
    plt.title('Histogram of Data')
    plt.xlabel('Data')
    plot.ylabel('Count')

  def pdf(self, x):
    """Probability density function calcualor for the 
    gaussian distribution

    Args:
      x (float): point for calculating the probability density function
    
    Returns:
      float: probability density function output

    """

    first = 1 / (math.sqrt(2 * math.pi * self.stdev ** 2))

    second = math.exp(-1 * ((x - self.mean) ** 2) / (2 * self.stdev ** 2))

    return first * second

  def plot_histogram_pdf(self, n_spaces=50):
    """Method to plot normalized histogram of the data
    and the probability density function on the same range

    Args:
      n_spaces (int): number of data points

    Returns:
      list: x values for the pdf plot
      list: y values for the pdf plot

    """
    mu = self.mean
    sigma = self.stdev
    min_range = min(self.data)
    max_range = max(self.data)

    # calculate interval between x-values
    interval = 1.0 * (max_range - min_range) / n_spaces

    x = []
    y = []
    
    # identify x values to plot
    for i in range(n_spaces):
      tmp = min_range + interval*i
      x.append(tmp)
      y.append(self.pdf(tmp))

    # create the plot
    fig, axes = plot.subplots(2, sharex=True)
    fig.subplots_adjust(hspace=.5)
    axes[0].hist(self.data, density=True)
    axes[0].set_title('Normed Histogram of Data')
    axes[0].set_ylabel('Density')
    axes[1].plot(x, y)
    axes[1].set_title('Normal Distribution for Sample Mean and Standard Deviation')
    axes[1].set_ylabel('Density')
    plot.show()

    return x, y


  def __add__(self, other):
    """Magic method to add together two Gaussian distributions

    Args:
      other (Gaussian): Gaussian instance to add
    
    Returns:
      Gaussian: Gaussian distribution (result)
    
    """
    
    # create a new gaussian object
    result = Gaussian()

    # calculate mean and standard deviation of the sum of gaussians
    result.mean = self.mean + other.mean
    result.stdev = math.sqrt(self.stdev ** 2 + other.stdev ** 2)

    return result

  def __repr__(self):
    """Magic method to output the characteristics of the Gaussian instance

    Args: 
      None

    Returns:
      string: characteristics of the Gaussian

    """

    return 'mean {}, standard deviation {}'.format(self.mean, self.stdev)

In [16]:
# test the solution
gaussian_one = Gaussian(25, 2)
gaussian_two = Gaussian(30, 4)
gaussian_sum = gaussian_one + gaussian_two

print(gaussian_sum.mean == 55)

print(gaussian_one)
print(gaussian_two)
print(gaussian_sum)

True
mean 25, standard deviation 2
mean 30, standard deviation 4
mean 55, standard deviation 4.47213595499958
