In [None]:
## Preload libraries
import numpy as np
from matplotlib import pyplot as plt

## Create RNG generator
rng = np.random.default_rng()
try:
    rng_integers = rng.integers
except AttributeError:
    rng_integers = rng.randint

## Preset numpy printing_options
np.set_printoptions(suppress=True)      # disable scientific notation
np.set_printoptions(edgeitems=6)        # show more elements

In [None]:
## helper functions
def _rng_int_stream(low=0, high=100, count=10):
    """
        Return a stream of random integers
        low: the min integer to generate from pool (inclusive)
        high: the max integer to generate from pool (exclusive)
        count: the number of integers to generate
        method: how to provide the values back to the calling function
            - list: return a list() to the caller
            - yield: return values using the yield keyword
    """
    # coerce arguments to int()
    low, high, count = [int(x) for x in [low, high, count]]
    for _ in range(0,count):
        yield rng_integers(low,high)

def rng_int_iter(low=0, high=100, count=10):
    return list(_rng_int_stream(low, high, count))

def rng_int_gen(low=0, high=100, count=10):
    return _rng_int_stream(low, high, count)

def npa_details(npa):
    dic = {
        'type': type(npa),
        'ndim': npa.ndim,
        'shape': npa.shape,
        'size': npa.size,
        'dtype': npa.dtype,
        'itemsize': npa.itemsize,
        'nbytes': npa.nbytes,
        'data': npa.data,
    }
    for k,v in dic.items():
        print(f"The {k} of the numpy array is: {v}")
    print("print(np_array):")
    print(npa)
    print()
    return dic

def np2d_rows(npa):
    rows = [i for i in npa]
    for idx,item in enumerate(rows):
        print(f"Index({idx}): {item}")
    return rows

Before we begin to plot data, we need some interesting data to look at. Let's begin by loading some hits data from a webserver.

Once loading the data into numpy, we should look at what the data looks like and what properties it has.

In [None]:
# Lets load some data
web_traffic_file = "../data_files/web_traffic.tsv"
data = np.genfromtxt(web_traffic_file)
_ = npa_details(data)

Looking at our data it appears to have the following structure.

|Hour|Number of unique hits|
|---|---|
|0.0|1590.0|
|1.0|2130.0|
|2.0|3221.0|
|...|...|

Let's extract each column out and store them in independent arrays labeled `hits` and `hours`

In [None]:
def check_nan(npa):
    result = list(filter(lambda x: (x == True), np.isnan(npa)))
    if len(result) > 0:
        return True
    else:
        return False

hours = data[:,0]   # Extract row(*)col(0)
hits = data[:,1]    # Extract row(*)col(1)
print(f"First 5 records in hours: {hours[0:5]}")
print(f"First 5 records in hits: {hits[0:5]}")

# is the data reliable? Check for Not-A-Number(nan)
print(f"Does hours include nan?: {check_nan(hours)}")
print(f"Does hits include nan?: {check_nan(hits)}")

So, it appears we have an issue. While the hours data is all numeric, it appears as though the hits data has some entries that contain `nan`

If we try and give this data to matplotlib, we are gonna have issues. So before proceeding we need to preprocess our data such that it doesn't have any issues. We can do this using the function `nan_to_num()`
> numpy.nan_to_num(x, copy=True, nan=0.0, posinf=None, neginf=None)
>
>Replace NaN with zero and infinity with large finite numbers (default behaviour) or with the numbers defined by the user using the nan, posinf and/or neginf keywords.
>
>If x is inexact, NaN is replaced by zero or by the user defined value in nan keyword, infinity is replaced by the largest finite floating point values representable by x.dtype or by the user defined value in posinf keyword and -infinity is replaced by the most negative finite floating point values representable by x.dtype or by the user defined value in neginf keyword.

This function can be used to force `nan` values to zero, but it also allows us to place limits to be used in the place of positively or negatively infinite values.

In [None]:
def check_nan(npa):
    result = list(filter(lambda x: (x == True), np.isnan(npa)))
    if len(result) > 0:
        return True
    else:
        return False

hits = np.nan_to_num(hits)
print(f"Does hits include nan?: {check_nan(hits)}")
print(hits)

# Plotting data
Now that our data is squared away, lets plot some graphs.

The general format for graphing plots looks like this.
```py
# import matplotlib
from matplotlib import pyplot as plt

# plot data points
plt.plot(data)

# show the graph
plt.show()
```

In [None]:
from matplotlib import pyplot as plt
plt.plot(hits)
plt.show()

# Graphs Types
Matplotlib provides numerous types of graphs that can be generated.

## Basic Plots
* plot(x,y)
* scatter(x,y)
* bar(x,y)
* stem(x,y)
* step(x,y)

## Statistics
* hist(x)
* boxplot(x)
* pie(x)

# Improving our graphs
We have a graph, but its not very intuitive.

Since we now how to plot data, lets add some details to it.

Useful methods to add to a chart include
* figure: Create a new figure.
* legend: Place a legend on the axes.
* title: Set a title for the axes.
* xlabel: Set the x-axis label.
* xticks: Set the x-axis ticks and label
* ylabel: Set the y-axis label.
* yticks: Set the y-axis ticks and label

In [None]:
# import matplotlib
from matplotlib import pyplot as plt

def hoursToDays():
    ticks = hoursInMonth = [i*24 for i in range(31)]
    labels = daysInMonth = [i for i in range (31)]
    return ticks, labels

# Control the size of the plot, before plotting any data
i_width = 12
i_height = 6
size_inchs = (i_width, i_height)
plt.figure(figsize=size_inchs, dpi=300)

# Add some legend details to the plot
plt.title("Unique Visitors / Hour")
plt.xlabel("Day")
plt.ylabel("Visitors")
ticks, labels = hoursToDays()
plt.xticks(ticks, labels)
plt.grid()

# Plot and render the graph
plt.plot(hits)
plt.show()