# DATA VISUALIZATION USING SEABORN


RELATIONAL PLOTS (numerical x numerical)
relplot	    Figure-level interface for drawing relational plots onto a FacetGrid.
scatterplot	Draw a scatter plot with possibility of several semantic groupings.
lineplot	Draw a line plot with possibility of several semantic groupings.

CATEGORICAL PLOTS (categorical x categorical)
stripplot	Draw a scatterplot where one variable is categorical.
swarmplot	Draw a categorical scatterplot with non-overlapping points.
boxplot	    Draw a box plot to show distributions with respect to categories.
boxenplot	Draw an enhanced box plot for larger datasets.
violinplot	Draw a combination of boxplot and kernel density estimate.

REGRESSION PLOTS 
lmplot	    Plot data and regression model fits across a FacetGrid.
regplot	    Plot data and a linear regression model fit.
residplot	Plot the residuals of a linear regression.

MATRIX PLOTS
heatmap	Plot rectangular data as a color-encoded matrix.
clustermap	Plot a matrix dataset as a hierarchically-clustered heatmap.

MULTI-PLOT GRID
FacetGrid	Multi-plot grid for plotting conditional relationships.
pairplot	Plot pairwise relationships in a dataset.
jointplot	Draw a plot of two variables with bivariate and univariate graphs.

## Importing python libraries

In [None]:
#Data processing libraries
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

#Visualization libraries
import seaborn as sns
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

In [None]:
cd /content/drive/MyDrive/Colab Notebooks/DATASCIENCE/DATA_VISUALIZATION

## Importing datasets

In [None]:
bank = pd.read_csv('../input/telco-customer-churn/Churn_Modelling.csv')
telco = pd.read_csv('../input/telcom-customer-churn/customer_churn_data.csv')
stroke = pd.read_csv('../input/strokes-data/strokes_data.csv')
house = pd.read_csv('../input/house-price/kc_house_data.csv')
insurance = pd.read_csv('../input/insurance-charges/insurance.csv')
tips = pd.read_csv('../input/tips-data/tips_data.csv')

## Display few lines of data

**Display first 5 rows of 'bank' dataset**

In [None]:
bank.head()
#Target = Exited (0,1)

**Display first 5 rows of 'telco' dataset**

In [None]:
telco.head()
#Target = Churn (0,1)

**Display first 5 rows of 'stroke' dataset**

In [None]:
stroke.head()
#Target = stroke (0,1)

**Display first 5 rows of 'house' dataset**

In [None]:
house.head()
#Target = price (continuous)

**Display first 5 rows of 'insurance' dataset**

In [None]:
insurance.head()
#Target = charges (continous)

## Display size of the data

**Display the size of 'bank','telco','stroke','house' and 'insurance' datasets**

In [None]:
print("bank :", bank.shape)
print("telco :", telco.shape)
print("stroke :", stroke.shape)
print("house :", house.shape)
print("insurance :", insurance.shape)

## Display Information

**Display information (fields, datatypes, null values etc) about 'bank' dataset**

In [None]:
bank.info()

**Display information (fields, datatypes, null values etc) about 'telco' dataset**

In [None]:
telco.info()

**Display information (fields, datatypes, null values etc) about 'stroke' dataset**

In [None]:
stroke.info()

**Display information (fields, datatypes, null values etc) about 'house' dataset**

In [None]:
house.info()

**Display information (fields, datatypes, null values etc) about 'insurance' dataset**

In [None]:
insurance.info()

# RELATIONAL PLOTS (numerical x numerical)

## tips | relplot | total_bill vs tip - day

In [None]:
sns.relplot(data=tips, x="total_bill", y="tip", hue="day")

## tips | scatterplot | total_bill vs tip - day

In [None]:
sns.scatterplot(data=tips, x="total_bill", y="tip", hue="day")

## tips | lineplot | total_bill vs tip - day

In [None]:
sns.lineplot(data=tips, x="total_bill", y="tip", hue="day")

## bank | scatterplot | NumOfProducts vs Age   -   Exited

In [None]:
sns.scatterplot(data=bank, x="NumOfProducts", y="Age", hue="Exited")

## stroke | scatterplot | bmi vs age   -   stroke

In [None]:
sns.scatterplot(data=stroke, x="bmi", y="age", hue="stroke")

## house | scatterplot | sqft_living vs price

In [None]:
sns.scatterplot(data=house, x="sqft_living", y="price")

## house | scatterplot | age vs charges   -   smoker

In [None]:
sns.scatterplot(data=insurance, x="age", y="charges", hue="smoker")

# CATEGORICAL PLOTS (categorical x numerical)

## stroke | stripplot | stroke vs age

In [None]:
sns.stripplot(x='stroke', y='age', data=stroke)

## stroke | swarmplot | stroke vs age

In [None]:
sns.swarmplot(x='stroke', y='age', data=stroke)

## stroke | boxplot | stroke vs age

In [None]:
sns.boxplot(x='stroke', y='age', data=stroke)

## stroke | boxenplot | stroke vs age

In [None]:
sns.boxenplot(x='stroke', y='age', data=stroke)

## stroke | violinplot | stroke vs age

In [None]:
sns.violinplot(x='stroke', y='age', data=stroke)

# REGRESSION PLOTS (numerical x numerical)

## stroke | lmplot | age vs bmi

In [None]:
sns.lmplot(data=stroke, x="age", y="bmi", hue="stroke")

In [None]:
sns.regplot(data=stroke, x="age", y="bmi")

In [None]:
sns.residplot(data=stroke, x="age", y="bmi")

## tips | lmplot | total_bill vs tip

In [None]:
sns.lmplot(data=tips, x="total_bill", y="tip", hue="day")

## tips | lmplot | total_bill vs tip

In [None]:
sns.regplot(data=tips, x="total_bill", y="tip")

## tips | residplot | total_bill vs tip

In [None]:
sns.residplot(data=tips, x="total_bill", y="tip")

## insurance | lmplot | age vs charges   -  smoker

In [None]:
sns.lmplot(data=insurance, x="age", y="charges", hue="smoker")

## insurance | regplot | age vs charges

In [None]:
sns.regplot(data=insurance, x="age", y="charges")

## insurance | residplot | age vs charges

In [None]:
sns.residplot(data=insurance, x="age", y="charges")

# MATRIX PLOTS (table)

## bank | heatmap 

In [None]:
ax = plt.subplots(figsize=(12, 5));
ax = sns.heatmap(bank.corr(), annot = True) ;

## stroke | heatmap 

In [None]:
ax = plt.subplots(figsize=(16, 8));
ax = sns.heatmap(stroke.corr(), annot = True) ;

## telco | heatmap 

In [None]:
ax = plt.subplots(figsize=(16, 8));
ax = sns.heatmap(telco.corr(), annot = True) ;

## house | heatmap 

In [None]:
ax = plt.subplots(figsize=(16, 8));
ax = sns.heatmap(house.corr(), annot = True) ;

## insurance | heatmap 

In [None]:
ax = plt.subplots(figsize=(16, 8));
ax = sns.heatmap(insurance.corr(), annot = True) ;

## bank | clustermap 

In [None]:
sns.clustermap(bank.corr(), annot = True) ;

## data = tips | plot = FacetGrid | x = total_bill | y = tip | row = sex | column = time 

In [None]:
g = sns.FacetGrid(tips, col="time",  row="sex")
g.map(sns.scatterplot, "total_bill", "tip")

## data = insurance | plot = FacetGrid | x = age | y = charges | row = smoker | column = gender 

In [None]:
g = sns.FacetGrid(insurance, col="smoker",  row="gender")
g.map(sns.scatterplot, "age", "charges")

## data = stroke | plot = FacetGrid | x = age | y = bmi | row = stroke | column = ever_married 

In [None]:
g = sns.FacetGrid(stroke, col="ever_married",  row="stroke")
g.map(sns.scatterplot, "age", "bmi")

## data = stroke | plot = jointplot | x = age | y = bmi

In [None]:
sns.jointplot(data=stroke, x="age", y="bmi")

## data = insurance | plot = jointplot | x = age | y = charges

In [None]:
sns.jointplot(data=insurance, x="age", y="charges")

## data = bank | plot = pairplot | x = 'CreditScore','Age','Tenure','Exited' | hue = Exited

In [None]:
sns.pairplot(data=bank[['CreditScore','Age','Tenure','Exited']],hue='Exited',height=3, kind='scatter')

### data = bank | plot = pairplot | x = 'Balance','NumOfProducts','HasCrCard','Exited' | hue = Exited

In [None]:
sns.pairplot(data=bank[['Balance','NumOfProducts','HasCrCard','Exited']],hue='Exited',height=3, kind='scatter')

### data = bank | plot = pairplot | x = 'IsActiveMember','EstimatedSalary','Exited' | hue = Exited

In [None]:
sns.pairplot(data=bank[['IsActiveMember','EstimatedSalary','Exited']],hue='Exited',height=3, kind='scatter')

### data = stroke | plot = pairplot | x = 'age','hypertension','heart_disease','stroke' | hue = stroke

In [None]:
sns.pairplot(data=stroke[['age','hypertension','heart_disease','stroke']],hue='stroke',height=3, kind='scatter')

### data = stroke | plot = pairplot | x = 'avg_glucose_level','bmi','stroke' | hue = stroke

In [None]:
sns.pairplot(data=stroke[['avg_glucose_level','bmi','stroke']],hue='stroke',height=3, kind='scatter')

### data = stroke | plot = histplot | x = 'bmi' | hue = stroke

In [None]:
sns.histplot(data=stroke, x="bmi", kde=True,hue='stroke')

# SAMPLE DATASETS

In [None]:
sns.get_dataset_names()

In [None]:
anagrams = sns.load_dataset('anagrams')
anagrams.head()

In [None]:
planets = sns.load_dataset('planets')
planets.head()

In [None]:
titanic = sns.load_dataset('titanic')
titanic.head()