In [None]:
## Preload libraries
import numpy as np
from matplotlib import pyplot as plt

## Create RNG generator
rng = np.random.default_rng()
try:
    rng_integers = rng.integers
except AttributeError:
    rng_integers = rng.randint

## Preset numpy printing_options
np.set_printoptions(suppress=True)      # disable scientific notation
np.set_printoptions(edgeitems=6)        # show more elements

In [None]:
## helper functions
def _rng_int_stream(low=0, high=100, count=10):
    """
        Return a stream of random integers
        low: the min integer to generate from pool (inclusive)
        high: the max integer to generate from pool (exclusive)
        count: the number of integers to generate
        method: how to provide the values back to the calling function
            - list: return a list() to the caller
            - yield: return values using the yield keyword
    """
    # coerce arguments to int()
    low, high, count = [int(x) for x in [low, high, count]]
    for _ in range(0,count):
        yield rng_integers(low,high)

def rng_int_iter(low=0, high=100, count=10):
    return list(_rng_int_stream(low, high, count))

def rng_int_gen(low=0, high=100, count=10):
    return _rng_int_stream(low, high, count)

def npa_details(npa):
    dic = {
        'type': type(npa),
        'ndim': npa.ndim,
        'shape': npa.shape,
        'size': npa.size,
        'dtype': npa.dtype,
        'itemsize': npa.itemsize,
        'nbytes': npa.nbytes,
        'data': npa.data,
    }
    for k,v in dic.items():
        print(f"The {k} of the numpy array is: {v}")
    print("print(np_array):")
    print(npa)
    print()
    return dic

def np2d_rows(npa):
    rows = [i for i in npa]
    for idx,item in enumerate(rows):
        print(f"Index({idx}): {item}")
    return rows

Before we begin to plot data, we need some interesting data to look at. Let's begin by loading some hits data from a webserver.

Once loading the data into numpy, we should look at what the data looks like and what properties it has.

In [None]:
# Lets load some data
web_traffic_file = "../data_files/web_traffic.tsv"
data = np.genfromtxt(web_traffic_file)
_ = npa_details(data)

Looking at our data it appears to have the following structure.

|Hour|Number of unique hits|
|---|---|
|0.0|1590.0|
|1.0|2130.0|
|2.0|3221.0|
|...|...|

Let's extract each column out and store them in independent arrays labeled `hits` and `hours`

In [None]:
def check_nan(npa):
    result = list(filter(lambda x: (x == True), np.isnan(npa)))
    if len(result) > 0:
        return True
    else:
        return False

hours = data[:,0]   # Extract row(*)col(0)
hits = data[:,1]    # Extract row(*)col(1)
print(f"First 5 records in hours: {hours[0:5]}")
print(f"First 5 records in hits: {hits[0:5]}")

# is the data reliable? Check for Not-A-Number(nan)
print(f"Does hours include nan?: {check_nan(hours)}")
print(f"Does hits include nan?: {check_nan(hits)}")

So, it appears we have an issue. While the hours data is all numeric, it appears as though the hits data has some entries that contain `nan`

If we try and give this data to matplotlib, we are gonna have issues. So before proceeding we need to preprocess our data such that it doesn't have any issues. We can do this using the function `nan_to_num()`
> numpy.nan_to_num(x, copy=True, nan=0.0, posinf=None, neginf=None)
>
>Replace NaN with zero and infinity with large finite numbers (default behaviour) or with the numbers defined by the user using the nan, posinf and/or neginf keywords.
>
>If x is inexact, NaN is replaced by zero or by the user defined value in nan keyword, infinity is replaced by the largest finite floating point values representable by x.dtype or by the user defined value in posinf keyword and -infinity is replaced by the most negative finite floating point values representable by x.dtype or by the user defined value in neginf keyword.

This function can be used to force `nan` values to zero, but it also allows us to place limits to be used in the place of positively or negatively infinite values.

In [None]:
def check_nan(npa):
    result = list(filter(lambda x: (x == True), np.isnan(npa)))
    if len(result) > 0:
        return True
    else:
        return False

hits = np.nan_to_num(hits)
print(f"Does hits include nan?: {check_nan(hits)}")
print(hits)

# Plotting data
Now that our data is squared away, lets plot some graphs.

The general format for graphing plots looks like this.
```py
# import matplotlib
from matplotlib import pyplot as plt

# plot data points
plt.plot(data)

# show the graph
plt.show()
```

In [None]:
from matplotlib import pyplot as plt
plt.plot(hits)
plt.show()

# Graphs Types
Matplotlib provides numerous types of graphs that can be generated.

## Basic Plots
* plot(x,y)
* scatter(x,y)
* bar(x,y)
* stem(x,y)
* step(x,y)

## Statistics
* hist(x)
* boxplot(x)
* pie(x)

# Improving our graphs
We have a graph, but its not very intuitive.

Since we now how to plot data, lets add some details to it.

Useful methods to add to a chart include
* figure: Create a new figure.
* legend: Place a legend on the axes.
* title: Set a title for the axes.
* xlabel: Set the x-axis label.
* xticks: Set the x-axis ticks and label
* ylabel: Set the y-axis label.
* yticks: Set the y-axis ticks and label

In [None]:
def hoursToDays():
    ticks = hoursInMonth = [i*24 for i in range(31)]
    labels = daysInMonth = [i for i in range (31)]
    return ticks, labels

# Control the size of the plot, before plotting any data
i_width = 12
i_height = 6
size_inchs = (i_width, i_height)
plt.figure(figsize=size_inchs, dpi=300)

# Add some legend details to the plot
plt.title("Unique Visitors / Hour")
plt.xlabel("Day")
plt.ylabel("Visitors")
ticks, labels = hoursToDays()
plt.xticks(ticks, labels)
plt.grid()

# Plot and render the graph
plt.plot(hits)
plt.show()

Our chart looks way better!

Lets chart it as a scatter chat for fun as well. We have to options to plot a scatter chart with matplotlib.
* Pass a format string as an argument to the `plot()` function. 
* Call `plt.scatter()`

The `scatter()` function requires that we pass two arrays, one representing the x-axis and the other representing the y-axis.  Reasonably, both arrays must be of the same length.  There is also an `s` argument that we can pass to the `scatter()` plot function that allows us to specify the *size* of the points displayed.

## Reference table fpr the format string used by MATLAB:
| Character | Description                          |
|:---------:|:-------------------------------------|
| .         | 1 point                              |
| ,         | 1 pixel                              |
| o         | circle                               |
| v         | downward triangle                    |
| ^         | upward triangle                      |
| <         | left triangle                        |
| >         | right triangle                       |
| 1         | tristar down                         |
| 2         | tristar up                           |
| 3         | tristar left                         |
| 4         | tristar right                        |
| s         | square                               |
| p         | pentagon                             |
| *         | star                                 |
| h         | hexagon (point on top)               |
| H         | hexagon (flat top)                   |
| +         | plus                                 |
| x         | x                                    |
| D         | diamond                              |
| d         | skinny diamond                       |
| \|        | vertical line                        |
| _         | horizontal line                      |
| -         | solid line style                     |
| --        | dashed line style                    |
| -.        | dash-dot line style                  |
| :         | dotted line style                    |
| b         | blue                                 |
| g         | green                                |
| r         | red                                  |
| c         | cyan                                 |
| m         | magenta                              |
| y         | yellow                               |
| k         | black                                |
| w         | white                                |

In [None]:
def hoursToDays():
    ticks = hoursInMonth = [i*24 for i in range(31)]
    labels = daysInMonth = [i for i in range (31)]
    return ticks, labels

# Control the size of the plot, before plotting any data
i_width = 12
i_height = 6
size_inchs = (i_width, i_height)
plt.figure(figsize=size_inchs, dpi=300)

# Add some legend details to the plot
plt.title("Unique Visitors / Hour")
plt.xlabel("Day")
plt.ylabel("Visitors")
ticks, labels = hoursToDays()
plt.xticks(ticks, labels)
plt.grid()

# Plot and render the graph
method = "scatter"
if method == "scatter":
    # plot using plt.scatter()
    x = range(0, len(hits))
    y = hits
    plt.scatter(x, y, s=10, c='#ff0000', marker='*')
    plt.show()
else:
    plt.plot(hits, '.r')
plt.show()


# Subplots

Lets turn this into a graph with subplots on it, represents weeks

To do this, we call the `subplots()` method describing the layout. For example, if we called plt.subplots(5) it would create 5 subplots. If we called plt.subplots(4,2) it would generate a grid four rows high and two columns wide with a subplot in each cell.

When calling the subplots() function, a tuple is returned. The first element is the figure itself. The second is the array of subplots, frequently described as the "axes":

Before calling the `show()` function, we need to finish generating all the subplots. Additionally we will need to do a bit more work to set the title and labels on each subplot.

* set_title: set the title on a subplot
* set_xlabel: set the label for the x-axis on a subplot
* set_ylabel: set the label for the y-axis on a subplot

setting xticks is now two seperate operations on a subplot
* set_xticks: This array defines a list of positions at which tick marks should be drawn on the subplot.
* set_xticklabels: takes an array of labels that should be placed on the corresponding tick marks from `set_xticks()`

In [None]:
def dataToWeeks(data):
    xticks = [i*24 for i in range(len(data)//24)]
    labels = [i for i in range (len(data)//24)]
    return xticks, labels

def draw_subplot(plt, data):
    plt.plot(data)
    plt.set_title("Unique Visitors / Hour")
    plt.set_xlabel("Day")
    plt.set_ylabel("Visitors")
    ticks, labels = dataToWeeks(data)
    plt.set_xticks(ticks)
    plt.set_xticklabels(labels)
    plt.grid()

# Control the size of the plot, before plotting any data
num_charts = 5
i_width = 8
i_height = 10
size_inchs = (i_width, i_height)

hours_per_week = 24 * 7
figure, subplots = plt.subplots(num_charts, figsize=size_inchs, dpi=300)
for weekNum, subplot in enumerate(subplots):
    begin = weekNum * hours_per_week
    end = begin + hours_per_week
    draw_subplot(subplot, hits[begin:end])

# Fit the charts and render
plt.tight_layout()
plt.show()

# Getting a sense for stacking
Given we have the data broken up weekly, maybe we can stack the data and gleam details on how certain week compared to other weeks in terms of data volume.

In [None]:
def offset(data, begin, end):
    return data[begin:end]

# Control the size of the plot, before plotting any data
i_width = 12
i_height = 6
size_inchs = (i_width, i_height)

# define more variables for the chart
weeks = 4
hours_per_week = 24 * 7
colors = ['b', 'c', 'g', 'r']
labels=[f'Week {i}' for i in range(0,weeks)]
x = range(0, hours_per_week)

plt.figure(figsize=size_inchs, dpi=300)
plt.stackplot(
    x, 
    offset(hits, 0, hours_per_week),
    offset(hits, hours_per_week, hours_per_week * 2),
    offset(hits, hours_per_week * 2, hours_per_week * 3),
    offset(hits, hours_per_week * 3, hours_per_week * 4),
    labels = labels,
    colors = colors,
)

# Fit the charts and render
plt.legend()
plt.show()
