In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns

In [2]:
# 1. 2차원 표
df = pd.DataFrame(data=np.arange(20).reshape(4,5), index=list('abcd'), columns=list('vwxyz'))
df

Unnamed: 0,v,w,x,y,z
a,0,1,2,3,4
b,5,6,7,8,9
c,10,11,12,13,14
d,15,16,17,18,19


In [3]:
# 2. Dictionary List
dl = [{'name':'James', 'age':24, 'job':'student'},
        {'name':'Maria', 'age':36, 'job':'teacher'},
        {'name':'Brian', 'age':30, 'job':'programmer'}]
df_l = pd.DataFrame(data=dl)
df_l

Unnamed: 0,name,age,job
0,James,24,student
1,Maria,36,teacher
2,Brian,30,programmer


In [4]:
ld = {'name':['James', 'Maria', 'Brian'],
      'age':[24, 36, 30],
      'job':['student', 'teacher', 'programmer']}
df_d = pd.DataFrame(data=ld)
df_d

Unnamed: 0,name,age,job
0,James,24,student
1,Maria,36,teacher
2,Brian,30,programmer


In [5]:
df_l.equals(df_d)

True

In [6]:
iris = sns.load_dataset('iris')
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [7]:
iris.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   sepal_length  150 non-null    float64
 1   sepal_width   150 non-null    float64
 2   petal_length  150 non-null    float64
 3   petal_width   150 non-null    float64
 4   species       150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB


In [8]:
iris.species.value_counts()

setosa        50
virginica     50
versicolor    50
Name: species, dtype: int64

In [11]:
s = iris[iris.species == 'setosa']['sepal_length']
round(s.mean(), 3), round(s.std(), 3)

(5.006, 0.352)

In [14]:
np.unique(iris.species, return_counts=True)

(array(['setosa', 'versicolor', 'virginica'], dtype=object),
 array([50, 50, 50], dtype=int64))

In [15]:
iris.columns

Index(['sepal_length', 'sepal_width', 'petal_length', 'petal_width',
       'species'],
      dtype='object')

In [16]:
species_list, feature_list, mean_list, std_list = [],[],[],[]
for species in iris.species.unique():
    for feature in iris.columns[:-1]:
        s = iris[iris.species == species][feature]
        species_list.append(species)
        feature_list.append(feature)
        mean_list.append(np.round(s.mean(), 2))
        std_list.append(np.round(s.std(), 4))

In [17]:
df = pd.DataFrame(data={'species':species_list, 'feature':feature_list, 'mean':mean_list, 'std':std_list})
df

Unnamed: 0,species,feature,mean,std
0,setosa,sepal_length,5.01,0.3525
1,setosa,sepal_width,3.43,0.3791
2,setosa,petal_length,1.46,0.1737
3,setosa,petal_width,0.25,0.1054
4,versicolor,sepal_length,5.94,0.5162
5,versicolor,sepal_width,2.77,0.3138
6,versicolor,petal_length,4.26,0.4699
7,versicolor,petal_width,1.33,0.1978
8,virginica,sepal_length,6.59,0.6359
9,virginica,sepal_width,2.97,0.3225


In [18]:
df.set_index(['species','feature'], inplace=True)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std
species,feature,Unnamed: 2_level_1,Unnamed: 3_level_1
setosa,sepal_length,5.01,0.3525
setosa,sepal_width,3.43,0.3791
setosa,petal_length,1.46,0.1737
setosa,petal_width,0.25,0.1054
versicolor,sepal_length,5.94,0.5162
versicolor,sepal_width,2.77,0.3138
versicolor,petal_length,4.26,0.4699
versicolor,petal_width,1.33,0.1978
virginica,sepal_length,6.59,0.6359
virginica,sepal_width,2.97,0.3225


In [23]:
iris_melt = pd.melt(id_vars='species', value_vars=['sepal_length','sepal_width','petal_length','petal_width'], 
                    frame=iris, var_name='feature', value_name='value')

iris_melt

Unnamed: 0,species,feature,value
0,setosa,sepal_length,5.1
1,setosa,sepal_length,4.9
2,setosa,sepal_length,4.7
3,setosa,sepal_length,4.6
4,setosa,sepal_length,5.0
...,...,...,...
595,virginica,petal_width,2.3
596,virginica,petal_width,1.9
597,virginica,petal_width,2.0
598,virginica,petal_width,2.3


In [28]:
iris_group = iris_melt.groupby(by=['species','feature'])['value'].agg(func=['mean', 'std']).round(3)
iris_group

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std
species,feature,Unnamed: 2_level_1,Unnamed: 3_level_1
setosa,petal_length,1.462,0.174
setosa,petal_width,0.246,0.105
setosa,sepal_length,5.006,0.352
setosa,sepal_width,3.428,0.379
versicolor,petal_length,4.26,0.47
versicolor,petal_width,1.326,0.198
versicolor,sepal_length,5.936,0.516
versicolor,sepal_width,2.77,0.314
virginica,petal_length,5.552,0.552
virginica,petal_width,2.026,0.275
