In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings('ignore')

In [3]:
# 1. 2차원 표
df = pd.DataFrame(data=np.arange(20).reshape(4,-1),
                  index=list('abcd'), columns=list('vwxyz'))
df

Unnamed: 0,v,w,x,y,z
a,0,1,2,3,4
b,5,6,7,8,9
c,10,11,12,13,14
d,15,16,17,18,19


In [4]:
# 2. Dictionary list
dl = [
      {'name':'James', 'age':24, 'job':'student'}, 
      {'name':'Maria', 'age':36,'job':'teacher'},
      {'name':'Bryan', 'age':30,'job':'programmer'}
]
df = pd.DataFrame(dl)
df

Unnamed: 0,name,age,job
0,James,24,student
1,Maria,36,teacher
2,Bryan,30,programmer


In [5]:
# 3. 여러개의 리스트
names = ['James', 'Maria', 'Bryan']
age = [24, 36, 30]
job = ['student', 'teacher', 'programmer']

df = pd.DataFrame(dict(name=names, age=age, job=job))
df

Unnamed: 0,name,age,job
0,James,24,student
1,Maria,36,teacher
2,Bryan,30,programmer


In [9]:
james_dict = dl[0]
maria_dict = dl[1]
bryan_dict = dl[2]

for d in dl:
    for key, value in d.items():
        print(key, value)
    print()

name James
age 24
job student

name Maria
age 36
job teacher

name Bryan
age 30
job programmer



In [11]:
df = pd.DataFrame()
for d in dl:
    for i, (key, value) in enumerate(d.items()):
        print(i, key, value)
    print()

0 name James
1 age 24
2 job student

0 name Maria
1 age 36
2 job teacher

0 name Bryan
1 age 30
2 job programmer



In [10]:
df = pd.DataFrame()
for d in dl:
    for i, (key, value) in enumerate(d.items()):
        df.loc[i, key] = value 
df

Unnamed: 0,name,age,job
0,Bryan,,
1,,30.0,
2,,,programmer


In [14]:
df = pd.DataFrame()
for d in dl:
    df.append(d, ignore_index=True)
df

## 아리리스

In [17]:
iris = sns.load_dataset('iris')
iris.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   sepal_length  150 non-null    float64
 1   sepal_width   150 non-null    float64
 2   petal_length  150 non-null    float64
 3   petal_width   150 non-null    float64
 4   species       150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB


In [19]:
iris.species.value_counts()

setosa        50
versicolor    50
virginica     50
Name: species, dtype: int64

In [21]:
species, count = np.unique(iris.species, return_counts=True)
pd.DataFrame(zip(species, count))

Unnamed: 0,0,1
0,setosa,50
1,versicolor,50
2,virginica,50


- species, feature mean, std

In [22]:
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [26]:
iris.groupby('species')[['sepal_length']].agg([np.mean, np.std])

Unnamed: 0_level_0,sepal_length,sepal_length
Unnamed: 0_level_1,mean,std
species,Unnamed: 1_level_2,Unnamed: 2_level_2
setosa,5.006,0.35249
versicolor,5.936,0.516171
virginica,6.588,0.63588


In [50]:
iris_melt = pd.melt(iris, id_vars=['species'], value_vars=['sepal_length','sepal_width','petal_length', 'petal_width'])
iris_mean_std = iris_melt.groupby(['species', 'variable']).agg([np.mean, np.std]).reset_index().rename(columns={'variable':'feature'})
iris_mean_std = iris_mean_std.droplevel(-1, axis=1)
iris_mean_std.columns = ['species','feature','mean', 'std']
iris_mean_std

Unnamed: 0,species,feature,mean,std
0,setosa,petal_length,1.462,0.173664
1,setosa,petal_width,0.246,0.105386
2,setosa,sepal_length,5.006,0.35249
3,setosa,sepal_width,3.428,0.379064
4,versicolor,petal_length,4.26,0.469911
5,versicolor,petal_width,1.326,0.197753
6,versicolor,sepal_length,5.936,0.516171
7,versicolor,sepal_width,2.77,0.313798
8,virginica,petal_length,5.552,0.551895
9,virginica,petal_width,2.026,0.27465


In [45]:
iris_mean_std.columns

MultiIndex([('species',     ''),
            ('feature',     ''),
            (  'value', 'mean'),
            (  'value',  'std')],
           )

In [49]:
iris_mean_std.droplevel(-1, axis=1)

Unnamed: 0,Unnamed: 1,Unnamed: 2,mean,std
0,setosa,petal_length,1.462,0.173664
1,setosa,petal_width,0.246,0.105386
2,setosa,sepal_length,5.006,0.35249
3,setosa,sepal_width,3.428,0.379064
4,versicolor,petal_length,4.26,0.469911
5,versicolor,petal_width,1.326,0.197753
6,versicolor,sepal_length,5.936,0.516171
7,versicolor,sepal_width,2.77,0.313798
8,virginica,petal_length,5.552,0.551895
9,virginica,petal_width,2.026,0.27465


In [38]:
iris_mean_std

Unnamed: 0_level_0,Unnamed: 1_level_0,value,value
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std
species,variable,Unnamed: 2_level_2,Unnamed: 3_level_2
setosa,petal_length,1.462,0.173664
setosa,petal_width,0.246,0.105386
setosa,sepal_length,5.006,0.35249
setosa,sepal_width,3.428,0.379064
versicolor,petal_length,4.26,0.469911
versicolor,petal_width,1.326,0.197753
versicolor,sepal_length,5.936,0.516171
versicolor,sepal_width,2.77,0.313798
virginica,petal_length,5.552,0.551895
virginica,petal_width,2.026,0.27465


In [51]:
iris.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   sepal_length  150 non-null    float64
 1   sepal_width   150 non-null    float64
 2   petal_length  150 non-null    float64
 3   petal_width   150 non-null    float64
 4   species       150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB


In [57]:
species_list, feature_list ,mean_list, std_list = [],[],[],[]

for species in iris.species.unique():
    for feature in iris.columns[:-1]:
        s = iris[iris.species==species][feature]
        species_list.append(species)
        feature_list.append(feature)
        mean_list.append(np.round(s.mean(), 2))
        std_list.append(np.round(s.std(), 4))

In [58]:
df = pd.DataFrame(dict(species=species_list, feature=feature_list, mean=mean_list, std=std_list))
df

Unnamed: 0,species,feature,mean,std
0,setosa,sepal_length,5.01,0.3525
1,setosa,sepal_width,3.43,0.3791
2,setosa,petal_length,1.46,0.1737
3,setosa,petal_width,0.25,0.1054
4,versicolor,sepal_length,5.94,0.5162
5,versicolor,sepal_width,2.77,0.3138
6,versicolor,petal_length,4.26,0.4699
7,versicolor,petal_width,1.33,0.1978
8,virginica,sepal_length,6.59,0.6359
9,virginica,sepal_width,2.97,0.3225


In [59]:
df.set_index(['species', 'feature'], inplace=True)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std
species,feature,Unnamed: 2_level_1,Unnamed: 3_level_1
setosa,sepal_length,5.01,0.3525
setosa,sepal_width,3.43,0.3791
setosa,petal_length,1.46,0.1737
setosa,petal_width,0.25,0.1054
versicolor,sepal_length,5.94,0.5162
versicolor,sepal_width,2.77,0.3138
versicolor,petal_length,4.26,0.4699
versicolor,petal_width,1.33,0.1978
virginica,sepal_length,6.59,0.6359
virginica,sepal_width,2.97,0.3225
