# Introduction to Exploratory Data Analysis with Matplotlib and Seaborn

In [None]:
!pip install --upgrade matplotlib seaborn

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

## Monovariate Categorical

In [None]:
titanic = pd.read_csv("datasets/titanic.csv")
titanic.head()

In [None]:
pclass = titanic.Pclass
pclass.dtype

In [None]:
pclass.unique()

In [None]:
pclass = pclass.astype('category')
pclass.dtype

In [None]:
pclass.cat.categories

In [None]:
sns.countplot(pclass)

## Monovariate Continuous

In [None]:
california = pd.read_csv("california_pb_2023.csv")
california.describe(include="all")

In [None]:
sns.histplot(california,x="Daily Mean Pb Concentration")

In [None]:
from scipy import stats
stats.probplot(california["Daily Mean Pb Concentration"], plot=sns.mpl.pyplot)

In [None]:
stats.probplot(california["Daily Mean Pb Concentration"], plot=sns.mpl.pyplot,dist=stats.distributions.lognorm(s=1))

## Multivariate Categorical

In [None]:
titanic['survived'] = titanic.Survived.astype('category')
titanic['sex'] = titanic.Sex.astype('category')
titanic['pclass'] = titanic.Pclass.astype('category')

In [None]:
# pd.crosstab(titanic['Survived','Sex','Pclass'])
ct = pd.crosstab(titanic['survived'],columns=[titanic['sex'],titanic['pclass']])
ct

In [None]:
ct.plot.bar(stacked=True)

## Multivariate Continuous

In [None]:
iris = pd.read_csv("iris.csv")
iris.head()

In [None]:
sns.pairplot(iris)

## Multivariate Mixed

### One continuous variable against a one categorical variable

In [None]:
sns.boxplot(titanic,x="pclass",y="Age")

In [None]:
sns.violinplot(titanic,x="pclass",y="Age")

### Many continuous variables against one categorical variable

In [None]:
sns.pairplot(iris,hue="variety")

### Many categorical variables against one or more continuous variables

In [None]:
g = sns.FacetGrid(titanic, col="sex", row='pclass')
g.map(sns.histplot, "Age")

In [None]:
sns.histplot(titanic,x="Age")