# WORKING WITH SEABORN 

In [None]:
import seaborn as sns
import pandas as pd

In [None]:
#Apply default theme 
sns.set_theme()

In [None]:
# Load an example dataset
tips = sns.load_dataset("tips")
#this dataset is provided by seaborn lib, if we want our own dataset we have to use pandas

In [None]:
tips.head()

In [None]:
tips.describe()

In [None]:
# ERROR !!
sns.lmplot(x="sex", y="tip", data=tips)
#This will give an error !
# Because sex is String we need numerical values

## Convert Gender (string) to numerical value

In [None]:
# Data of Gender is converted into Binary Data
df_one = pd.get_dummies(tips["sex"])

In [None]:
df_one

In [None]:
# Binary Data is Concatenated into tips dataset
df_two = pd.concat((df_one, tips), axis=1)


In [None]:
df_two

In [None]:
# SEX column is droped
df_two = df_two.drop(["sex"], axis=1)

In [None]:
# We want Male =0 and Female =1 So we drop Male column here
df_two = df_two.drop(["Male"], axis=1)

In [None]:
# Rename the Column
tips = df_two.rename(columns={"Female": "Gender"})

In [None]:
tips


In [None]:
sns.regplot(x='total_bill', y='tip', data=tips,marker='+')
#used to plot data and a linear regression model fit. 
#There are a number of mutually exclusive options for estimating the regression model.

In [None]:
sns.stripplot(x="day", y="total_bill",data=tips)
# draw a scatter plot based on the category.


In [None]:
sns.swarmplot(x="day", y="total_bill",data=tips)

# This is very much similar to stripplot but the only difference
# is that it does not allow overlapping of markers. 
# It causes jittering in the markers of the plot so that graph can easily be read 
# without information loss

In [None]:
sns.barplot(x="Gender", y="tip",data=tips)
#  categorical column for the x-axis 
#  numerical column for the y-axis

In [None]:
sns.countplot(x ='Gender', data = tips)

In [None]:
sns.boxplot(x='day', y='total_bill', data=tips, hue='smoker')
# x takes the categorical column and
# y is a numerical column.
# ” hue” parameter is used to further add a categorical separation. 
# By looking at the plot we can say that the people 
# who do not smoke had a higher bill on Friday as compared to the people who smoked.

In [None]:
sns.boxplot(x='day', y='total_bill', data=tips)

In [None]:
# It is similar to the boxplot except that it provides a higher, 
# more advanced visualization and
# uses the " kernel density estimation "
# to give a better description about the data distribution.
sns.violinplot(x='day', y='total_bill', data=tips,
               hue='Gender', split=True)

In [None]:
sns.scatterplot(x="total_bill",y="tip",data=tips, marker = '+')

In [None]:
sns.scatterplot(x="day",y="tip",data=tips, marker = '+',hue="time", palette='pastel')

In [None]:
sns.relplot(x ="total_bill", y ="tip",data = tips,hue ="day",marker = '+')

In [None]:
sns.lmplot(x = "tip",y = "total_bill", ci = None,data = tips)
# It shows a line representing a linear regression model along with data points 
# on the 2D-space and x and y can be set as the horizontal 
# and vertical labels respectively.

In [None]:
sns.jointplot(data=tips, x="total_bill", y="tip",marker='+')

In [None]:
sns.jointplot(data=tips, x="total_bill", y="tip", kind="reg", marker="+")
#Using kind=”reg” attribute you can add a linear regression fit and univariate KDE curves

In [None]:
sns.jointplot(data=tips, x="total_bill", y="tip", hue="time",marker='+')

In [None]:
sns.kdeplot(data=tips, x="total_bill",bw_adjust=1)
#bw_adjust = for bandwith change
#Less bandwith more peaks ,
#More bandwith smooth curve 
#probability density function

In [None]:
sns.kdeplot(data=tips, x="total_bill",cumulative=True)
#KDE represents the data using a continuous probability density curve.


In [None]:
sns.kdeplot(data=tips, x="total_bill",y="tip",n_levels=20,cbar=True)
#bivariate plot

In [None]:
sns.lineplot(x="day", y="total_bill", data=tips, hue="Gender")

In [None]:
sns.pairplot(tips, hue ='Gender')

In [None]:
sns.heatmap(tips.corr(), center=0, cmap='BrBG', annot=True)
# Lets you know the coorealtion 
# total_bill and gender have very less coorelation
# while tip and total bill are corealted
#As parameter it takes a 2D dataset. That dataset can be coerced into an ndarray.

In [None]:
fg = sns.FacetGrid(tips, col="time",  row="Gender")
fg.map(sns.scatterplot, "total_bill", "tip")

In [None]:
fg = sns.FacetGrid(tips, col="day")
fg.map(sns.histplot, "total_bill")

regplot() performs a simple linear regression model fit and plot. 
lmplot() combines regplot() and FacetGrid.


In [None]:
sns.lmplot(x="total_bill", y="tip", hue="smoker", data=tips,markers=["+","*"])