<img src="../../Pics/MLSb-T.png" width="160">
<br><br>
<center><u><H1>Seaborn-Numerical Data</H1></u></center>

In [None]:
import seaborn as sns
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

### Plotting univariate distributions:

In [None]:
# displot function
x = np.random.normal(size=100)
sns.distplot(x)

### Histograms:

#### A histogram represents the distribution of data by forming bins along the range of the data and then drawing bars to show the number of observations that fall in each bin. we also can plot a rug plot, which draws a small vertical tick at each observation.

In [None]:
sns.distplot(x, kde=False, rug=True)

### Kernel Density Estimation:

In [None]:
sns.distplot(x, hist=False, rug=True)

In [None]:
sns.kdeplot(x, shade=True)

#### The bandwidth (bw) parameter of the KDE controls how tightly the estimation is fit to the data, much like the bin size in a histogram.

In [None]:
sns.kdeplot(x)
sns.kdeplot(x, bw=.2, label="bw: 0.2")
sns.kdeplot(x, bw=2, label="bw: 2")
plt.legend()

### Fitting parametric distributions:
You can use distplot() to fit a parametric distribution to a dataset and visually evaluate how closely it corresponds to the observed data.

In [None]:
from scipy import stats
x = np.random.gamma(6, size=200) # 8 is the shape of Gamma distribution
sns.distplot(x, kde=False, fit=stats.gamma)

### Plotting bivariate distributions:

### Join Plot:

In [None]:
mean, cov = [0, 1], [(1, .5), (.5, 1)]
data = np.random.multivariate_normal(mean, cov, 200)
df = pd.DataFrame(data, columns=["x", "y"])
sns.jointplot(x="x", y="y", data=df)

### Hexbin plots:
it shows the counts of observations that fall within hexagonal bins

x, y = np.random.multivariate_normal(mean, cov, 1000).T
with sns.axes_style("white"):
    sns.jointplot(x=x, y=y, kind="hex", color="k");

In [None]:
x, y = np.random.multivariate_normal(mean, cov, 1000).T
with sns.axes_style("white"):
    sns.jointplot(x=x, y=y, kind="hex", color="k");

### Kernel density estimation:

#### you can also plot two-dimensional kernel density plot

In [None]:
f, ax = plt.subplots(figsize=(6, 6))
sns.kdeplot(df.x, df.y, ax=ax)
sns.rugplot(df.x, color="g", ax=ax)
sns.rugplot(df.y, vertical=True, ax=ax)

#### The jointplot() function uses a JointGrid to manage the figure.

In [None]:
g = sns.jointplot(x="x", y="y", data=df, kind="kde", color="g")
g.plot_joint(plt.scatter, c="w", s=30, linewidth=1, marker="+")
g.ax_joint.collections[0].set_alpha(0)
g.set_axis_labels("$X$", "$Y$")

In [None]:
iris = sns.load_dataset("iris")
g = sns.jointplot("sepal_width", "petal_length", data=iris,kind="kde", space=0, color="g")

In [None]:
g = sns.jointplot("petal_length", "sepal_length", data=iris,
                  marginal_kws=dict(bins=15, rug=True),
                  annot_kws=dict(stat="r"),
                  s=40, edgecolor="w", linewidth=1)

### Scatter plot and Join Density Estimate Plot:

In [None]:
g = (sns.jointplot("sepal_length", "sepal_width",
                   data=iris, color="k")
     .plot_joint(sns.kdeplot, zorder=0, n_levels=6))

### Visualizing pairwise relationships in a dataset:
pairplot() function creates a matrix of axes and shows the relationship for each pair of columns in a DataFrame

In [None]:
iris = sns.load_dataset("iris")
sns.pairplot(iris,palette='husl',hue='species',markers=["o", "s", "D"])

#### Fit Linear Regression to data:

In [None]:
g = sns.pairplot(iris, kind="reg")

## Heat Map:

In [None]:
# Load the example flights dataset
flights_long = sns.load_dataset("flights")
flights_long.head()

### Plotting a correlation matrix:

In [None]:
flights_long.corr()

In [None]:
sns.heatmap(flights_long.corr(), annot=True, linewidths=.5)

In [None]:
flights = flights_long.pivot("month", "year", "passengers")
# Draw a heatmap with the numeric values in each cell
sns.heatmap(flights, annot=True, fmt="d", linewidths=.5) # fmt="d" : Integer formatting

### Changing the color map:

In [None]:
sns.heatmap(flights, annot=True, fmt="d", linewidths=.5, cmap="YlGnBu") 
# if you dont need the colobar:cbar=False

### Using a mask to plot the half of the matrix:

In [None]:
corr = np.corrcoef(np.random.randn(10, 200))
corr.shape

In [None]:
mask = np.zeros_like(corr)
mask[np.triu_indices_from(mask)] = True # np.triu_indices_from(arr): Return the indices for the upper-triangle of arr.
with sns.axes_style("white"):
     ax = sns.heatmap(corr, mask=mask, vmax=.5, square=True, annot=True)

## Cluster Map:

In [None]:
cmap = sns.cubehelix_palette(as_cmap=True, rot=-.3, light=1)
g = sns.clustermap(flights, cmap=cmap, linewidths=.5,figsize=(8, 8))

### Normalize data across the rows:

In [None]:
g = sns.clustermap(flights, z_score=0, figsize=(8, 8))

### Standardize the data across the columns:

In [None]:
g = sns.clustermap(flights, standard_scale=1,cmap='coolwarm',figsize=(8, 8))

## Reference:

https://seaborn.pydata.org/