# pandas part 2 

## DataFrame 불러오기

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('./data/data-header.csv')
df.index = ['a','b','c','d']
df

Unnamed: 0,name,age,money,sex
a,둘리,100,100.0,M
b,길동,40,500000.0,M
c,또치,15,50.0,F
d,희동,3,,M


## 행 삭제 

In [3]:
df.drop(['a'])

Unnamed: 0,name,age,money,sex
b,길동,40,500000.0,M
c,또치,15,50.0,F
d,희동,3,,M


In [4]:
df

Unnamed: 0,name,age,money,sex
a,둘리,100,100.0,M
b,길동,40,500000.0,M
c,또치,15,50.0,F
d,희동,3,,M


## `df.index`를 이용한 삭제 

In [5]:
df.columns.values

array(['name', 'age', 'money', 'sex'], dtype=object)

In [6]:
df.index.values

array(['a', 'b', 'c', 'd'], dtype=object)

In [7]:
df.index[0]

'a'

In [8]:
df.drop([df.index[0]])

Unnamed: 0,name,age,money,sex
b,길동,40,500000.0,M
c,또치,15,50.0,F
d,희동,3,,M


In [9]:
df.columns[2]

'money'

In [10]:
df.columns[1:3]

Index(['age', 'money'], dtype='object')

In [11]:
df[df.columns[1:3]]

Unnamed: 0,age,money
a,100,100.0
b,40,500000.0
c,15,50.0
d,3,


In [12]:
df[['name','age','sex']]

Unnamed: 0,name,age,sex
a,둘리,100,M
b,길동,40,M
c,또치,15,F
d,희동,3,M


In [13]:
# df.drop(['money']) error

In [14]:
df.drop(['money'], axis=1)

Unnamed: 0,name,age,sex
a,둘리,100,M
b,길동,40,M
c,또치,15,F
d,희동,3,M


In [None]:
df.drop([df.columns[2]], axis=1)

## 컬럼이 많을 경우 특정 컬럼을 제거하고 싶다면 

In [None]:
features = set(df.columns.values) - set(['age', 'money'])

In [None]:
# features is a set
features

In [None]:
features = list(features)

In [None]:
df[features]

## 조건에 의한 삭제 
- 필터링과 같다 

In [None]:
df

In [None]:
df['money'] > 1500

In [None]:
df[df['money'] > 1500]

## 새로운 컬럼 생성 

In [None]:
df['salary'] = 100
df

## 조건부 업데이트 

In [None]:
import numpy as np
df

In [None]:
df['Young'] = np.where(df['age'] < 20, '젊은이', '노인') 
df

## 리스트를 이용한 업데이트 

In [None]:
df['fname'] = ['공', '고', '김','김']
df

In [None]:
df['fullname'] = df['fname'] + ' ' + df['name']
df

## apply 사용법 

In [None]:
import random
random.random()

In [None]:
import random 

def bonus(salary):
    bonus = 0.4 * random.random()
    return int(salary * bonus)

In [None]:
bonus(100)

In [None]:
df['bonus'] = df['salary'].apply(bonus)
df

## apply + lambda 

In [None]:
df['salary2019'] = df['salary'].apply(lambda x: x * 1.1)
df

In [None]:
df['salary2019'] = df['salary2019'].apply(int)
df

## row 추가하기

In [None]:
df2 = pd.DataFrame([{'name': 'snoopy', 'age': 66}], index = ['e'])
df2

In [None]:
#defrecated
df.append(df2)

In [None]:
df.append(df2, sort=False)

## group by 
- https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.groupby.html
- http://pandas.pydata.org/pandas-docs/stable/groupby.html
- 데이터를 나누고(split), 조작하고(apply), 합치는(combining) 작업을 수행

In [None]:
df

In [None]:
young_group = df.groupby('Young')

In [None]:
young_group

In [None]:
print(young_group.groups)
print(type(young_group.groups))

In [None]:
young_group.groups['노인']

In [None]:
df.loc[young_group.groups['노인']]

In [None]:
for key, group in young_group:
    print(key, len(group))
    display(group)
    # print(group)

### count 

In [None]:
young_group.size()

In [None]:
pd.DataFrame({'나이별 통계': young_group.size()})

In [None]:
young_group.mean()