# Seaborn Cheatsheet

## Importing Library

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
np.random.seed(42)

## Basic Seaborn

### Basic Data Structure

In [None]:
data_sintetis = pd.DataFrame({
    'kategori': ['A','B','C','D'] * 25,
    'nilai_x': np.random.normal(size = 100),
    'nilai_y': np.random.normal(loc=2, scale=1.5, size=100)
})

data_sintetis

In [None]:
sns.scatterplot(data=data_sintetis, x='nilai_x', y='nilai_y', hue='kategori')
plt.show()

In [None]:
sns.barplot(data=data_sintetis, x='kategori', y='nilai_x')
plt.show()

In [None]:
# Mengatur tema, gaya, palet warna, dan konteks
sns.set_theme(style='darkgrid')
sns.set_palette('pastel')
sns.set_context('paper')

# Membuat plot dengan estetika yang telah diatur
sns.barplot(data=data_sintetis, x='kategori', y='nilai_x')
plt.show()

## Univariate Data Distribution Plot

In [None]:
data_sintetis = pd.DataFrame({
    'nilai': np.random.normal(0, 10, 1000)
})

data_sintetis

### Histogram

In [None]:
sns.histplot(data=data_sintetis, x='nilai', bins=30)
plt.show()

### Kernel Density Plot

In [None]:
sns.kdeplot(data=data_sintetis, x='nilai')
plt.show()

### Rug Plot

In [None]:
sns.rugplot(data=data_sintetis, x='nilai')
plt.show()

### Box Plot

In [None]:
sns.boxplot(data=data_sintetis, x='nilai')
plt.show()

### Violin Plot

In [None]:
sns.violinplot(data=data_sintetis, x='nilai')
plt.show()

In [None]:
sns.swarmplot(data=data_sintetis.sample(100), x='nilai')
plt.show()

## Relationship Between Variable Plot

### Scatter Plot

In [None]:
x = np.random.rand(50)
y = 2 * x + np.random.rand(50)

sns.scatterplot(x=x, y=y)
plt.xlabel('Variabel X')
plt.ylabel('Variabel Y')
plt.title('Scatter Plot Variable X dan Y')
plt.show()

### Line Plot

In [None]:
X = np.arange(50)
y = np.cumsum(np.random.randn(50))

sns.lineplot(x=x, y=y)
plt.xlabel('Waktu')
plt.ylabel('Variable Y')
plt.title('Line Plot Waktu dan Variabel Y')
plt.show()

### Regression Plot

In [None]:
x = np.random.rand(50)
y = 2 * x + np.random.rand(50)

sns.regplot(x=x, y=y)
plt.xlabel('Variabel X')
plt.ylabel('Variabel Y')
plt.title('Regresi Plot Variable X dan Y')
plt.show()

### Joint Plot

In [None]:
x = np.random.rand(50)
y = 2 * x + np.random.rand(50)

sns.jointplot(x=x, y=y, kind='scatter')
plt.show()

### Pair Plot

In [None]:
data = np.random.rand(50, 4)

columns = ['A','B','C','D']
df = pd.DataFrame(data, columns=columns)

sns.pairplot(df)
plt.show()

### Heat Map

In [None]:
data = np.random.rand(10, 10)

sns.heatmap(data, annot=True, cmap='coolwarm')
plt.xlabel('Kolom')
plt.ylabel('Baris')
plt.title('Heatmap Data Sintetis')
plt.show()

## Categorical Plot

In [None]:
category = ['A','B','C','D','E']
values = [23, 45, 12, 67, 34]

data_df = pd.DataFrame({
    'category': category,
    'values': values
})

sns.barplot(x='category',y='values', data=data_df)
plt.show()

### Count Plot

In [None]:
categories = ['A','B','C','D','E']
data = np.random.choice(categories, 100)

data_df = pd.DataFrame({
    'category': data
})

sns.countplot(data=data_df, x='category')
plt.show()

### Point Plot

In [None]:
years = np.arange(2010, 2021)
values = np.random.randint(10, 100, size=len(years))

data_df = pd.DataFrame({
    'year': years,
    'values': values
})

sns.pointplot(x='year', y='values', data=data_df)
plt.show()

### Strip Plot

In [None]:
categories = ['A', 'B', 'C', 'D', 'E']
values = np.random.randint(10, 100, size=100)
categories_data = np.random.choice(categories, 100)

data_df = pd.DataFrame({
    'category': categories_data,
    'values': values
})

sns.stripplot(x='category', y='values', data=data_df)
plt.show()

## Multivariate Data Visulization

### Scatter Plot With Hue

In [None]:
x = np.random.normal(0, 1, 100)
y = np.random.normal(0, 1, 100)
z = np.random.normal(0, 1, 100)

labels = np.random.choice(['A','B','C'], 100)

data_df = pd.DataFrame({
    'x': x,
    'y': y,
    'z': z,
    'label': labels
})
data_df

In [None]:
sns.scatterplot(data_df, x=x, y=y, hue='label')
plt.show()

### Line Plot With Hue

In [None]:
x = np.arange(1, 11)
y1 = np.random.randint(1, 100, 10)
y2 = np.random.randint(1, 100, 10)
y3 = np.random.randint(1, 100, 10)

data_df = pd.DataFrame({
    'x': x,
    'y1': y1,
    'y2': y2,
    'y3': y3
})
data_df

In [None]:
data_df_melted = data_df.melt(id_vars=['x'], var_name='label', value_name='y')
data_df_melted

In [None]:
sns.lineplot(x='x', y='y', hue='label', data=data_df_melted)
plt.show()

### Facet Grid

In [None]:
x = np.random.normal(0, 1, 100)
y = np.random.normal(0, 1, 100)
z = np.random.normal(0, 1, 100)
labels_1 = np.random.choice(['A','B'], 100)
labels_2 = np.random.choice(['X','Y'], 100)

data_df = pd.DataFrame({
    'x': x,
    'y': y,
    'z': z,
    'label_1': labels_1,
    'label_2': labels_2
})

g = sns.FacetGrid(data_df, col='label_1', row='label_2')
g.map(sns.scatterplot, 'x', 'y')
plt.show()

### Pair Grid

In [None]:
x1 = np.random.normal(0, 1, 100)
x2 = np.random.normal(0, 1, 100)
x3 = np.random.normal(0, 1, 100)
x4 = np.random.normal(0, 1, 100)

data_df = pd.DataFrame({
    'x1': x1,
    'x2': x2,
    'x3': x3,
    'x4': x4
})

g = sns.PairGrid(data_df)
g.map_diag(sns.histplot)
g.map_offdiag(sns.scatterplot)
plt.show()

### Cluster Map

In [None]:
data = np.random.randint(1, 100, (10,4))
columns = ['A','B','C','D']
data_df = pd.DataFrame(data, columns=columns)

sns.clustermap(data_df, cmap='viridis', annot=True)
plt.show()

## Seaborn Plot Customization

### Using Seaborn With Matplotlib

In [None]:
x = np.random.randn(50)
y = 2 * x + np.random.randn(50)

sns.regplot(x=x, y=y)
plt.xlabel('X Label')
plt.ylabel('Y Label')
plt.title('Scatter Plot dengan Garis Regresi')
plt.grid(True)
plt.show()

### Add Title and Axis Label With Seaborn

In [None]:
data = np.random.rand(100)

ax = sns.histplot(data)

ax.set_title('Histogram Data Sintetis')
ax.set_xlabel('Nilai Data')
ax.set_ylabel('Frekuensi')
plt.show()

### Set Axis Edge

In [None]:
data = np.random.randn(100)

ax = sns.histplot(data)
ax.set_xlim(-4,4)
ax.set_ylim(0, 25)
plt.show()

### Setting Legend

In [None]:
categories = np.random.choice(['A','B','C'], 6)
values = np.arange(1,7)
df = pd.DataFrame({
    'value': values,
    'category': categories
})

ax = sns.barplot(x='category',y='value',data=df)
ax.legend(['Contoh Legend'], loc='upper right', frameon=True)
plt.show()

## How To Save Image
# plt.savefig('Bar Plot Data Sintetis.png', dpi=300)

## Simple Study Case

### Titanic

In [None]:
titanic_df = sns.load_dataset('titanic')
titanic_df

In [None]:
titanic_df.info()

In [None]:
titanic_df.isnull().mean() * 100

In [None]:
titanic_df.describe()

### Relationship Between Survived and Class

In [None]:
sns.barplot(x='class',y='survived', data=titanic_df)
plt.xlabel('Class')
plt.ylabel('Survived')
plt.show()

### Relationship Between Gender and Survived

In [None]:
sns.barplot(x='sex', y='survived', data=titanic_df)
plt.xlabel('Sex')
plt.ylabel('Survived')
plt.show()

### Relationship Between Age and Survived

In [None]:
sns.histplot(titanic_df, x='age', hue='survived', kde=True)
plt.show()

### Tips

In [None]:
tips_df = sns.load_dataset('tips')
tips_df

In [None]:
tips_df.info()

In [None]:
tips_df.describe()

In [None]:
tips_df.isnull().mean() * 100

### Relationship Between Total Bill and Tip

In [None]:
sns.scatterplot(tips_df, x='total_bill', hue='sex', y='tip')
plt.show()

### Tip Distribution by Time

In [None]:
sns.boxplot(tips_df, x='time', y='tip')
plt.show()

### Pima Indian Diabetes

In [None]:
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age',
'class']
pima_df = pd.read_csv(url, names=names)
pima_df

### Age Relationship Between Class

In [None]:
sns.histplot(data=pima_df, x='age', hue='class', kde=True)
plt.show()