# The histogram

## Setting up the plotly library

In [1]:
# Importing high-level chart objects
import plotly.graph_objects as go
import plotly.io as pio
# pio.renderers.default = 'notebook'
pio.renderers.default = 'iframe'

## Creating data point values for a numerical variable

In [2]:
# Import the random module
import random

In [3]:
# Import the numerical python library
import numpy as np

In [4]:
# Seed the pseudo-random generator
np.random.seed(1234)

In [5]:
age = np.random.uniform(low = 21, high = 75, size = 100)
salary = np.random.normal(loc = 3000, scale = 1000, size = 100)
binary_gender = random.choices(["female", "male"], k = 100);

In [6]:
# Import pandas
import pandas as pd

In [7]:
# Create dataframe
df = pd.DataFrame({"Age":age, "Salary":salary, "Gender":binary_gender})

In [8]:
# Gender-based sub-dataframes
female = df[df.Gender == "female"]
male = df[df.Gender == "male"]

## Bare-bones histogram

In [9]:
age[:10]

array([31.34205032, 54.59387364, 44.63729791, 63.40936352, 63.11869364,
       35.72000069, 35.92906978, 64.30109759, 72.7395251 , 68.30036228])

In [10]:
trace = go.Histogram(x = age)

data = [trace]

# iplot({"data":data}) # old way of doing things in Jupyter Notebook!
pio.show({"data": data})

## Frequency distribution (normalized histogram)

In [11]:
trace = go.Histogram(x = age,
                    histnorm  = "probability")

data = [trace]

layout = {"title":"Frequency distribution of age",
         "xaxis":{"title":"Age in 5-year increments"}}

# iplot({"data":data}) # old way of doing things in Jupyter Notebook!
pio.show({"data": data, 'layout':layout})

## Two datasets

### Overlay

In [12]:
trace0 = go.Histogram(x = female.Age,
                     name = "Female age")

trace1 = go.Histogram(x = male.Age,
                     name = "Male age",
                     opacity = 0.8)

data = [trace0, trace1]

layout = {"title":"Age by gender",
         "xaxis":{"title":"Age in 10-year increments"},
         "barmode":"overlay"}

# iplot({"data":data}) # old way of doing things in Jupyter Notebook!
pio.show({"data": data, 'layout':layout})

### Stack

In [13]:
trace0 = go.Histogram(x = female.Age,
                     name = "Female age")

trace1 = go.Histogram(x = male.Age,
                     name = "Male age")

data = [trace0, trace1]

layout = {"title":"Age by gender",
         "xaxis":{"title":"Age in 10-year increments"},
         "barmode":"stack"}

# iplot({"data":data}) # old way of doing things in Jupyter Notebook!
pio.show({"data": data, 'layout':layout})

## Bin size

In [14]:
trace0 = go.Histogram(x = female.Age,
                     name = "Female age",
                     xbins = dict(start = 20, end = 80, size = 5))

trace1 = go.Histogram(x = male.Age,
                     name = "Male age",
                     xbins = dict(start = 20, end = 80, size = 5))

data = [trace0, trace1]

layout = {"title":"Age by gender",
         "xaxis":{"title":"Age in 5-year increments"},
         "barmode":"stack"}

# iplot({"data":data}) # old way of doing things in Jupyter Notebook!
pio.show({"data": data, 'layout':layout})

In [15]:
trace0 = go.Histogram(x = female.Salary,
                     name = "Female salary",
                     xbins = dict(start = 500, end = 5500, size = 200))

trace1 = go.Histogram(x = male.Salary,
                     name = "Male salay",
                     xbins = dict(start = 500, end = 5500, size = 200))

data = [trace0, trace1]

layout = {"title":"Salary by gender",
         "xaxis":{"title":"Salary in 200 euro increments"},
         "barmode":"stack"}

# iplot({"data":data}) # old way of doing things in Jupyter Notebook!
pio.show({"data": data, 'layout':layout})

## Cumulative histogram

In [16]:
trace = go.Histogram(x = df.Salary,
                    cumulative = dict(enabled = True))

data = [trace]

layout = {"title":"Cumulative histogram"}

# iplot({"data":data}) # old way of doing things in Jupyter Notebook!
pio.show({"data": data, 'layout':layout})