# Distribution plot

## Setting up the plotly library

In [None]:
# Importing high-level chart objects
import plotly.graph_objects as go
import plotly.io as pio
pio.renderers.default = 'notebook'

# Importing the figure factory
import plotly.figure_factory as ff

# This should not be used for new installations !!!
from plotly.offline import iplot 

## Creating data point values

In [None]:
# Import the random module
import random

# Import the numerical python library
import numpy as np

# Seed the pseudo-random generator
np.random.seed(1234)

age = np.random.uniform(low = 21, high = 75, size = 1000)
salary = np.random.normal(loc = 3000, scale = 1000, size = 1000)
binary_gender = random.choices(["female", "male"], k = 1000);

# Import pandas
import pandas as pd

# Create dataframe
df = pd.DataFrame({"Age":age, "Salary":salary, "Gender":binary_gender})

# Gender-based sub-dataframes
female = df[df.Gender == "female"]
male = df[df.Gender == "male"]

In [None]:
# First five rows of female sub-dataframe
female.head()

In [None]:
# last five rows of male sub-dataframe
male.tail()

## Bare-bones distribution plot

In [None]:
fig = ff.create_distplot(hist_data = [df.Salary.values.tolist()],
                         group_labels = ["Salary distribution"],
                        bin_size = [200])

pio.show(fig)

## Adding a title

In [None]:
fig = ff.create_distplot(hist_data = [df.Salary.values.tolist()],
                         group_labels = ["Salary distribution"],
                        bin_size = [200])

fig["layout"].update(title = "Salary distribution")

# iplot(fig)
pio.show(fig)

## Distribution plot of two datasets

In [None]:
hist_data = [female.Salary.values.tolist(), male.Salary.values.tolist()]
group_labels = ["Female salary", "Male salary"]

fig = ff.create_distplot(hist_data, group_labels,
                        bin_size = [200, 200])

# iplot(fig)
pio.show(fig)

## Changing colors

In [None]:
hist_data = [female.Salary.values.tolist(), male.Salary.values.tolist()]
group_labels = ["Female salary", "Male salary"]

fig = ff.create_distplot(hist_data,
                         group_labels,
                         bin_size = [200, 200],
                         colors = ["rgba(20,20,20,0.8)", "rgba(150,150,150,0.8)"])

# iplot(fig)
pio.show(fig)

## Normal curve

In [None]:
hist_data = [female.Salary.values.tolist(), male.Salary.values.tolist()]
group_labels = ["Female salary", "Male salary"]

fig = ff.create_distplot(hist_data,
                         group_labels,
                         bin_size = [200, 200],
                        curve_type = "normal")

fig.layout.update({"title":"Fitted normal curve"})

# iplot(fig)
pio.show(fig)

## Omit the histogram and rug plot

In [None]:
hist_data = [female.Salary.values.tolist(), male.Salary.values.tolist()]
group_labels = ["Female salary", "Male salary"]

fig = ff.create_distplot(hist_data,
                         group_labels,
                         bin_size = [200, 200],
                         curve_type = "normal",
                        show_hist = False,
                        show_rug = False)

fig.layout.update({"title":"Fitted normal curve without histogram or rug plot",
                  "xaxis":{"title":"Salary", "domain":[0,1]}})

# iplot(fig)
pio.show(fig)