# EECS764P Coursework 1
- Name: Bheki Maenetja
- Student ID: 230382466

## Imports

In [1]:
import numpy as np
from scipy.stats import expon
from scipy.integrate import quad

!pip install plotly
import plotly.graph_objects as go
import plotly.io as pio
pio.renderers.default = "iframe"



## Plotting Functions

In [2]:
# Plotting functions
def plot_data(x=None, y=None, z=None, title="", x_label="", y_label="", name="", mode="markers", text="", fill=None, **traces):
    """
    General purpose function for plotting scatter plots in plotly.
    """
    fig = go.Figure(layout={
        "title": title,
        "xaxis": {"title": x_label},
        "yaxis": {"title": y_label}
    })
    
    if z is None:
        data = go.Scatter(
            x=x,
            y=y,
            mode=mode,
            name=name,
            text=text,
            fill=fill,
        )
    else:
        data = go.Scatter3d(
            x=x,
            y=y,
            z=z,
            mode=mode,
            name=name,
            text=text,
        )

    if x is not None and y is not None:
        fig.add_trace(data)
    
    for t in traces:
        fig.add_trace(traces[t])
    
    return fig

## Question  1

In [3]:
def count(x):
    """
    PARAMETERS
        * x --> an instance of the type "set".
    RETURNS
        * an integer value denoting the number of elements in x.
    """
    # check that x is of the correct data type.
    assert type(x) == set, "Error — Input type is incorrect!"
    
    return len(x)

In [4]:
def additivity_test(set_1, set_2):
    """
    PARAMETERS
        * set_1 --> instance of the type set.
        * set_2 --> instance of the type set.
    RETURNS
        * a string indicating whether or not the test has been passed.
    """
    # check that both inputs are of the correct data type
    assert type(set_1) == set and type(set_2), "Error — Input type is incorrect!"

    # check that the sets are disjoint
    assert not set_1.intersection(set_2), "Error — Sets are not disjoint."

    # get the count of the union of both sets
    count_1 = count(set_1.union(set_2))

    # get the sum of the counts of both sets
    count_2 = count(set_1) + count(set_2)

    # check that count_1 is equal to count_2
    if count_1 == count_2: 
        print(f"Count = {count_1}")
        return "Additivity test passed" 
    return "Additivity test failed"

### Tests

In [5]:
a = {"a", "b", "c"}
b = {"d", "e", "f"}

In [6]:
count(a)

3

In [7]:
count(b)

3

In [8]:
empty = set() # the empty set
count(set())

0

In [9]:
additivity_test(a, b)

Count = 6


'Additivity test passed'

## Question 2

In [10]:
class Interval:
    def __init__(self, start, end, incl_start=True, incl_end=True):
        """
        PARAMETERS
            * start --> real number indicating start of interval.
            * end --> real number indicating end of interval.
            * incl_start --> boolean indicating whether interval includes start value.
            * incl_end --> boolean indicating whether interval includes end value.
        RETURNS
            * None
        """
        # check that the end value is greater than or equal to start value
        assert start <= end, "Error – End value < start value"

        # assign inputs to variables
        self.start = start
        self.end = end
        self.incl_start = incl_start
        self.incl_end = incl_end

    def __str__(self):
        """
        PARAMETERS
            * None
        RETURNS
            * a string representation of the interval.
        """
        # choose appropriate brackets based on incl_start and incl_end
        brackets = "([)]"
        left_b = brackets[self.incl_start]
        right_b = brackets[self.incl_end + 2]

        return f"{left_b}{self.start}, {self.end}{right_b}"

    def __repr__(self):
        return self.__str__()

In [11]:
def length(inter):
    """
    PARAMETERS
        * inter --> an instance of the type Interval
    RETURNS
        * a real number indicating the length of the interval
    """
    # check that inter is of the correct the data type
    assert isinstance(inter, Interval), "Error — Input type must be Interval!"

    # calculate and return length of interval
    return inter.end - inter.start

### Tests

In [12]:
a = Interval(1, 3.5)
a

[1, 3.5]

In [13]:
length(a)

2.5

## Question 3

In [14]:
def expon_measure(inter):
    """
    PARAMETERS
        * inter --> an instance of the type Interval.
    RETURNS
        * a real number between 0 and 1 indicating the probability mass of the given interval.
    """
    # check that inter is of the correct the data type
    assert isinstance(inter, Interval), "Error — Input type must be Interval!"

    # calculating the scale parameter based on lambda = 2
    lambda_val = 2
    scale = 1 / lambda_val

    # calculating and return probability mass
    pm = expon.cdf(inter.end, scale=scale) - expon.cdf(inter.start, scale=scale)
    return pm

### Tests

In [15]:
a = Interval(0,1) # [0, 1]
b = Interval(1, 1) # [1, 1]
c = Interval(1, 10) # [1, 10]
d = Interval(0, float("inf"), incl_end=False) # [0, inf); "inf" means infinity

print(f"a = {a}")
print(f"b = {b}")
print(f"c = {c}")
print(f"d = {d}")

a = [0, 1]
b = [1, 1]
c = [1, 10]
d = [0, inf)


In [16]:
expon_measure(a)

0.8646647167633873

In [17]:
expon_measure(b)

0.0

In [18]:
expon_measure(c)

0.13533528117545912

In [19]:
expon_measure(d)

1.0

### Plotting Exponential Distribution

In [20]:
lambda_val = 2
x = np.linspace(0, 5, 1000)
y = expon.pdf(x, scale=1/lambda_val)

In [21]:
plot_data(
    x, 
    y, 
    title="The Exponential Distribution", 
    x_label="x",
    y_label="P(X)",
    name="Exponential Distribution",
    mode="lines",
    fill="tozeroy",
)

When looking at the plot of the exponential distribution, the results of running 'expon_measure' do appear to make sense. The bulk of the area under the curve is contained in the interval [0, 1] whereas only a much smaller slice of it is contained in [1, 2]; the area from [2, inf) is almost negligible in comparison. This aligns with the results from running the function. The result of running expon_measure on [0, 1] (approx. 0.86) is roughly six times greater than the result for [1, 10] (approx. 0.14); this lines up with what we see in the plot. The result of expon_measure([0, inf)) is of course 1 as the interval represents all values in the support for the exponential distribution. result of expon_measure([1,1]) is 0 because the interval has length 0.

## Question 4

In [22]:
def expon_pdf(x):
    """
    PARAMETERS
        * x --> a real number
    RETURNS
        * a tuple: 1st value is the pdf of Exp(2) at x, 2nd value is the absolute error
    """
    # check that x is of the correct type
    assert type(x) == int or type(x) == float, "Input type must be real number!"

    # calculating scale parameter based on lambda = 2
    lambda_val = 2
    scale = 1 / lambda_val

    # calculate and return pdf
    return expon.pdf(x, scale=scale)

### Tests

In [23]:
# a) [0, 1]
quad(expon_pdf, 0, 1)

(0.8646647167633872, 9.59970677131702e-15)

In [24]:
# b) [1, 1]
quad(expon_pdf, 1, 1)

(0.0, 0.0)

In [25]:
# c) [1, 10]
quad(expon_pdf, 1, 10)

(0.13533528117545912, 2.4502060294750603e-09)

In [26]:
# d) [0, inf)
quad(expon_pdf, 0, float("inf"))

(1.0, 1.547006336759497e-10)

The answers (i.e. the first value in the tuple) are exactly the same as the answers from question 3. This is because the probability density function (pdf) for the exponential distribution is continuous and integrable. Therefore (according to the fundamental theorem of calculus), integrating the function yields its cumulative distribution function. If the integration is done between a specific interval than we can compute the probability mass for that interval. Given that this was done for the same intervals used in question 3, the answers match up exactly.