## 2. Bar Plot & Histogram

### Bar Plot

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_wine

wine_load = load_wine()
wine = pd.DataFrame(wine_load.data, columns=wine_load.feature_names)
wine['Class'] = wine_load.target
wine['Class'] = wine['Class'].map({0:'class_0', 1:'class_1', 2:'class_2'})
wine_type = wine['Class'].value_counts()
wine_type

In [None]:
plt.bar(wine_type.index, wine_type.values,               # 데이터의 x, 높이
        width = 0.8, bottom = None,        # 막대의 넓이와 막대 밑면의 y좌표
        align = 'edge')        # 막대의 정렬
plt.show()

In [None]:
plt.barh(wine_type.index, wine_type.values, height = 0.8, left = None, align = 'edge')
plt.show()

<br>

### Histogram

In [None]:
plt.title('Wine alcohol histogram')
plt.hist('alcohol', bins = 8, range = (11, 15), color='purple', data = wine)
plt.show()

## 3. Box plot

In [None]:
import pandas as pd
from sklearn.datasets import load_iris

iris = load_iris()
iris = pd.DataFrame(iris.data, columns=iris.feature_names)
iris['Class'] = load_iris().target
iris['Class'] = iris['Class'].map({0:'Setosa', 1:'Versicolour', 2:'Virginica'})

In [None]:
iris.drop(columns='Class').plot(kind='box')
plt.show()

In [None]:
import matplotlib.pyplot as plt
iris.drop(columns='Class').boxplot()
plt.show()

In [None]:
import matplotlib.pyplot as plt

plt.boxplot(iris['sepal width (cm)'], whis=1.5)
plt.title('sepal width (cm)')
plt.show()

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
sns.boxplot(x="Class", y="sepal width (cm)", data=iris)
plt.show()

<br><br>

## 4. Scatter Plot

In [None]:
iris.head()

In [None]:
plt.title('iris scatter') 
plt.xlabel('sepal length (cm)')
plt.ylabel('sepal width (cm)') 

plt.scatter(x = iris['sepal length (cm)'], y = iris['sepal width (cm)'], alpha = 0.5)
plt.show()

In [None]:
import seaborn as sns

sns.scatterplot(x='sepal length (cm)', y='sepal width (cm)', data=iris, hue='Class', style='Class')
plt.show()

<br><br>

## 5. Line Plot

### 직선 그래프

In [None]:
import matplotlib.pyplot as plt

plt.hlines(-6, -10, 10, color='grey')
plt.hlines(-2, -10, 10, color='green')
plt.hlines(2, -10, 10, color='orange')
plt.hlines(6, -10, 10, color='red')

plt.vlines(-6, -10, 10, linestyles='solid')
plt.vlines(-2, -10, 10, linestyles='dashed')
plt.vlines(2, -10, 10, linestyles='dashdot')
plt.vlines(6, -10, 10, linestyles='dotted')
plt.show()

### 함수식 그래프

In [None]:
def linear_func(x):
  return x*2 + 1 

X = iris['sepal length (cm)']
plt.plot(X, linear_func(X), c='#789395')
plt.show()

### 회귀선 그래프

#### 일차 함수 회귀식


In [None]:
import numpy as np

X, Y = iris['sepal length (cm)'], iris['petal length (cm)']
b1, b0 = np.polyfit(X, Y, 1)

plt.scatter(x = X, y = Y, alpha = 0.5)
plt.plot(X, b1*X + b0, color='red')
plt.show()

#### n차 함수 회귀식

In [None]:
import numpy as np

iris2 = iris.sort_values(by='sepal length (cm)')
X, Y = iris2['sepal length (cm)'], iris2['petal length (cm)']
b2, b1, b0 = np.polyfit(X, Y, 2)

plt.scatter(x = X, y = Y, alpha = 0.5)
plt.plot(X, b0 + b1*X + b2*X**2, color='red')
plt.show()

### 꺾은선 그래프

In [None]:
import matplotlib.pyplot as plt

iris2 = iris.sort_values(by='sepal length (cm)')
plt.plot('sepal length (cm)', 'petal length (cm)', data=iris2)
plt.show( )

In [None]:
# 특정 카테고리별로 그래프를 겹쳐 그릴 때 카테고리별로 plot을 그리고 범례를 제시함
plt.plot('sepal length (cm)', 'petal length (cm)', data=iris2.loc[iris2['Class'] == 'Setosa'])
plt.plot('sepal length (cm)', 'petal length (cm)', data=iris2.loc[iris2['Class'] == 'Versicolour'])
plt.plot('sepal length (cm)', 'petal length (cm)', data=iris2.loc[iris2['Class'] == 'Virginica'])
plt.legend(iris2.Class.unique())
plt.show()

## 6. 상관관계 시각화

### 산점도 행렬

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from pandas.plotting import scatter_matrix
from sklearn.datasets import load_iris

iris = load_iris()
iris = pd.DataFrame(iris.data, columns=iris.feature_names)
iris['Class'] = load_iris().target
iris['Class'] = iris['Class'].map({0:'Setosa', 1:'Versicolour', 2:'Virginica'})

scatter_matrix(iris, alpha = 0.5, figsize = (8, 8), diagonal = 'kde')
plt.show()

In [None]:
import seaborn as sns

sns.pairplot(iris, diag_kind = 'auto', hue = 'Class')
plt.show()

In [None]:
iris_feature

### 상관행렬 그래프

In [None]:
iris_corr = iris.drop(columns='Class').corr(method='pearson')
sns.heatmap(iris_corr, xticklabels = iris_corr.columns, yticklabels = iris_corr.columns, cmap = 'RdBu_r', annot = True)
plt.show()

## Pandas Profiling

Colab에서 수행하는 경우 pandas_profiling 재설치

In [None]:
# !pip uninstall pandas_profiling
# !pip install pandas-profiling[notebook,html]

In [None]:
import pandas as pd
from sklearn.datasets import load_iris
from pandas_profiling import ProfileReport

iris = load_iris()
iris = pd.DataFrame(iris.data, columns=iris.feature_names)
iris['Class'] = load_iris().target
iris['Class'] = iris['Class'].map({0:'Setosa', 1:'Versicolour', 2:'Virginica'})

ProfileReport(iris)

In [None]:
iris_corr = iris.drop(columns='Class').corr(method='pearson')

In [None]:
iris_corr

In [None]:
iris.drop(columns='Class')

In [None]:
iris

In [None]:
iris['Class'] = iris['Class'].map({0:'Setosa', 1:'Versicolour', 2:'Virginica'})

In [None]:
iris['Class'] 

In [None]:
import pandas as pd
from sklearn.datasets import load_iris

iris = load_iris()
iris = pd.DataFrame(iris.data, columns=iris.feature_names)
iris['Class'] = load_iris().target

In [None]:
iris.head()

In [None]:
iris_dummy = pd.get_dummies(iris, columns=['Class'])
iris_dummy

In [None]:
plt.plot([1,2,3,4], [0.72, 0.22, 0.03, 0.01], 'o-')