# 第8章 : 行列形式のマトリックスプロットと全般の詳細な表示設定

In [18]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [19]:
df = sns.load_dataset('iris')
df.head(3)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa


In [20]:
df.shape

(150, 5)

In [21]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   sepal_length  150 non-null    float64
 1   sepal_width   150 non-null    float64
 2   petal_length  150 non-null    float64
 3   petal_width   150 non-null    float64
 4   species       150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB


In [22]:
df['species'].unique()

array(['setosa', 'versicolor', 'virginica'], dtype=object)

In [23]:
df['species'].value_counts()

species
setosa        50
versicolor    50
virginica     50
Name: count, dtype: int64

## Category型について

In [24]:
# speciesのobject型をcategory型に変換
df['species'].astype('category')

0         setosa
1         setosa
2         setosa
3         setosa
4         setosa
         ...    
145    virginica
146    virginica
147    virginica
148    virginica
149    virginica
Name: species, Length: 150, dtype: category
Categories (3, object): ['setosa', 'versicolor', 'virginica']

In [25]:
# Categoricalでも変更可能
pd.Categorical(df['species'])

['setosa', 'setosa', 'setosa', 'setosa', 'setosa', ..., 'virginica', 'virginica', 'virginica', 'virginica', 'virginica']
Length: 150
Categories (3, object): ['setosa', 'versicolor', 'virginica']

In [26]:
# Categoryに順序を付ける
pd.Categorical(df['species'], ordered=True, categories=['versicolor', 'virginica', 'setosa'])

['setosa', 'setosa', 'setosa', 'setosa', 'setosa', ..., 'virginica', 'virginica', 'virginica', 'virginica', 'virginica']
Length: 150
Categories (3, object): ['versicolor' < 'virginica' < 'setosa']

In [27]:
df['cat'] = pd.Categorical(df['species'], ordered=True, categories=['versicolor', 'virginica', 'setosa'])
df.head(3)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,cat
0,5.1,3.5,1.4,0.2,setosa,setosa
1,4.9,3.0,1.4,0.2,setosa,setosa
2,4.7,3.2,1.3,0.2,setosa,setosa


In [28]:
df['cat'].cat.categories

Index(['versicolor', 'virginica', 'setosa'], dtype='object')

In [29]:
df['cat'].cat.codes # 順序の確認

0      2
1      2
2      2
3      2
4      2
      ..
145    1
146    1
147    1
148    1
149    1
Length: 150, dtype: int8

In [30]:
df.drop('cat', axis=1, inplace=True)
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


## Numpyのwhere, selectを用いたレコード抽出と置換

In [31]:
np.where(df['petal_width'] > 1.3) # 条件を満たすindexを取得

(array([ 50,  51,  52,  54,  56,  59,  61,  63,  65,  66,  68,  70,  72,
         75,  76,  77,  78,  83,  84,  85,  86,  91, 100, 101, 102, 103,
        104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
        117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129,
        130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
        143, 144, 145, 146, 147, 148, 149]),)

In [32]:
df[df['petal_width'] > 1.3].index # これと同じ

Index([ 50,  51,  52,  54,  56,  59,  61,  63,  65,  66,  68,  70,  72,  75,
        76,  77,  78,  83,  84,  85,  86,  91, 100, 101, 102, 103, 104, 105,
       106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
       120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133,
       134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147,
       148, 149],
      dtype='int64')

In [33]:
# Trueの場合, Falseの場合に格納する文字列を指定できる
np.where(df['petal_width'] > 1.3, 'wide', 'narrow') # Trueならwide, Falseならnarrow

array(['narrow', 'narrow', 'narrow', 'narrow', 'narrow', 'narrow',
       'narrow', 'narrow', 'narrow', 'narrow', 'narrow', 'narrow',
       'narrow', 'narrow', 'narrow', 'narrow', 'narrow', 'narrow',
       'narrow', 'narrow', 'narrow', 'narrow', 'narrow', 'narrow',
       'narrow', 'narrow', 'narrow', 'narrow', 'narrow', 'narrow',
       'narrow', 'narrow', 'narrow', 'narrow', 'narrow', 'narrow',
       'narrow', 'narrow', 'narrow', 'narrow', 'narrow', 'narrow',
       'narrow', 'narrow', 'narrow', 'narrow', 'narrow', 'narrow',
       'narrow', 'narrow', 'wide', 'wide', 'wide', 'narrow', 'wide',
       'narrow', 'wide', 'narrow', 'narrow', 'wide', 'narrow', 'wide',
       'narrow', 'wide', 'narrow', 'wide', 'wide', 'narrow', 'wide',
       'narrow', 'wide', 'narrow', 'wide', 'narrow', 'narrow', 'wide',
       'wide', 'wide', 'wide', 'narrow', 'narrow', 'narrow', 'narrow',
       'wide', 'wide', 'wide', 'wide', 'narrow', 'narrow', 'narrow',
       'narrow', 'wide', 'narrow', 'narrow',

In [34]:
['wide' if df.loc[i, 'petal_width'] > 1.3 else 'narrow' for i in df.index] # 一応これと同じ

['narrow',
 'narrow',
 'narrow',
 'narrow',
 'narrow',
 'narrow',
 'narrow',
 'narrow',
 'narrow',
 'narrow',
 'narrow',
 'narrow',
 'narrow',
 'narrow',
 'narrow',
 'narrow',
 'narrow',
 'narrow',
 'narrow',
 'narrow',
 'narrow',
 'narrow',
 'narrow',
 'narrow',
 'narrow',
 'narrow',
 'narrow',
 'narrow',
 'narrow',
 'narrow',
 'narrow',
 'narrow',
 'narrow',
 'narrow',
 'narrow',
 'narrow',
 'narrow',
 'narrow',
 'narrow',
 'narrow',
 'narrow',
 'narrow',
 'narrow',
 'narrow',
 'narrow',
 'narrow',
 'narrow',
 'narrow',
 'narrow',
 'narrow',
 'wide',
 'wide',
 'wide',
 'narrow',
 'wide',
 'narrow',
 'wide',
 'narrow',
 'narrow',
 'wide',
 'narrow',
 'wide',
 'narrow',
 'wide',
 'narrow',
 'wide',
 'wide',
 'narrow',
 'wide',
 'narrow',
 'wide',
 'narrow',
 'wide',
 'narrow',
 'narrow',
 'wide',
 'wide',
 'wide',
 'wide',
 'narrow',
 'narrow',
 'narrow',
 'narrow',
 'wide',
 'wide',
 'wide',
 'wide',
 'narrow',
 'narrow',
 'narrow',
 'narrow',
 'wide',
 'narrow',
 'narrow',
 'narrow',

In [41]:
# np.whereと異なりnp.selectでは複数の条件式を使える
conditions = [df['petal_width'] < 1,
              (1 < df['petal_width']) & (df['petal_width'] < 2),
              2 < df['petal_width']]
values = ['narrow', 'medium', 'wide']

In [43]:
np.select(conditions, values)

TypeError: Choicelist and default value do not have a common dtype: The DType <class 'numpy.dtypes._PyLongDType'> could not be promoted by <class 'numpy.dtypes.StrDType'>. This means that no common DType exists for the given inputs. For example they cannot be stored in a single array unless the dtype is `object`. The full list of DTypes is: (<class 'numpy.dtypes.StrDType'>, <class 'numpy.dtypes.StrDType'>, <class 'numpy.dtypes.StrDType'>, <class 'numpy.dtypes._PyLongDType'>)

In [46]:
df['pw_cat'] = np.select(conditions, values)
df.head(3)

TypeError: Choicelist and default value do not have a common dtype: The DType <class 'numpy.dtypes._PyLongDType'> could not be promoted by <class 'numpy.dtypes.StrDType'>. This means that no common DType exists for the given inputs. For example they cannot be stored in a single array unless the dtype is `object`. The full list of DTypes is: (<class 'numpy.dtypes.StrDType'>, <class 'numpy.dtypes.StrDType'>, <class 'numpy.dtypes.StrDType'>, <class 'numpy.dtypes._PyLongDType'>)

In [47]:
np.random.seed(17)
arr = ['red', 'green', 'blue']
df['color'] = np.random.choice(arr, len(df))
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,color
0,5.1,3.5,1.4,0.2,setosa,green
1,4.9,3.0,1.4,0.2,setosa,blue
2,4.7,3.2,1.3,0.2,setosa,blue
3,4.6,3.1,1.5,0.2,setosa,green
4,5.0,3.6,1.4,0.2,setosa,red


## Matrix Plot

In [49]:
# Seabornテーマの設定
sns.set_theme(context='talk', style='darkgrid', font='MS GOTHIC')