## 1 나만의 데이터 만들기

#### 시리즈와 데이터 프레임 직접 만들기 

In [1]:
import pandas as pd

In [2]:
s = pd.Series(['banana', 42])
print(s)

0    banana
1        42
dtype: object


In [3]:
s = pd.Series(['Wes McKinney', 'Creator of Pandas'])
print(s)

0         Wes McKinney
1    Creator of Pandas
dtype: object


In [4]:
s = pd.Series(['Wes McKinney', 'Creator of Pandas'], index=['Person', 'Who'])
print(s)

Person         Wes McKinney
Who       Creator of Pandas
dtype: object


In [5]:
scientists = pd.DataFrame({
    'Name': ['Rosaline Franklin', 'William Gosset'],
    'Occupation': ['Chemist', 'Statistician'],
    'Born': ['1920-07-25', '1876-06-13'],
    'Died': ['1958-04-16', '1937-10-16'],
    'Age': [37, 61]
})

print(scientists)

                Name    Occupation        Born        Died  Age
0  Rosaline Franklin       Chemist  1920-07-25  1958-04-16   37
1     William Gosset  Statistician  1876-06-13  1937-10-16   61


In [6]:
scientists = pd.DataFrame(
    data={
        'Occupation': ['Chemist', 'Statistician'],
        'Born': ['1920-07-25', '1876-06-13'],
        'Died': ['1958-04-16', '1937-10-16'],
        'Age': [37, 61]
    },
    index=['Rosaline Franklin', 'William Gosset'],
    columns=['Occupation', 'Born', 'Age', 'Died']
)

print(scientists)

                     Occupation        Born  Age        Died
Rosaline Franklin       Chemist  1920-07-25   37  1958-04-16
William Gosset     Statistician  1876-06-13   61  1937-10-16


In [7]:
from collections import OrderedDict

scientists = pd.DataFrame(OrderedDict([
    ('Name', ['Rosaline Franklin', 'William Gosset']),
    ('Occupation', ['Chemist', 'Statistician']),
    ('Born', ['1920-07-25', '1876-06-13']),
    ('Died', ['1958-04-16', '1937-10-16']),
    ('Age', [37, 61])
]))

print(scientists)

                Name    Occupation        Born        Died  Age
0  Rosaline Franklin       Chemist  1920-07-25  1958-04-16   37
1     William Gosset  Statistician  1876-06-13  1937-10-16   61


## 2 시리즈 다루기 - 기초

#### 데이터프레임에서 시리즈 선택하기

In [9]:
scientists = pd.DataFrame(
    data={
        'Occupation': ['Chemist', 'Statistician'],
        'Born': ['1920-07-25', '1876-06-13'],
        'Died': ['1958-04-16', '1937-10-16'],
        'Age': [37, 61]
    },
    index=['Rosaline Franklin', 'William Gosset'],
    columns=['Occupation', 'Born', 'Age', 'Died']
)

In [10]:
first_row = scientists.loc['William Gosset']
print(type(first_row))

<class 'pandas.core.series.Series'>


In [11]:
print(first_row)

Occupation    Statistician
Born            1876-06-13
Age                     61
Died            1937-10-16
Name: William Gosset, dtype: object


### 시리즈 속성과 메서드 사용하기 - index, values, keys

#### index, values 속성과 keys 메서드 사용하기

In [12]:
print(first_row.index)

Index(['Occupation', 'Born', 'Age', 'Died'], dtype='object')


In [13]:
print(first_row.values)

['Statistician' '1876-06-13' 61 '1937-10-16']


In [14]:
print(first_row.keys())

Index(['Occupation', 'Born', 'Age', 'Died'], dtype='object')


In [15]:
print(first_row.index[0])

Occupation


In [16]:
print(first_row.keys()[0])

Occupation


### 시리즈의 기초 통계 메서드 사용하기

#### 시리즈의 mean, min, max, std 메서드 사용하기

In [17]:
ages = scientists['Age']

print(ages)

Rosaline Franklin    37
William Gosset       61
Name: Age, dtype: int64


In [18]:
print(ages.mean())

49.0


In [19]:
print(ages.min())

37


In [20]:
print(ages.max())

61


In [21]:
print(ages.std())

16.97056274847714


## 3 시리즈 다루기 - 응용

### 시리즈와 불린 추출

#### 시리즈와 불린 추출 사용하기

In [25]:
scientists = pd.read_csv('./data/scientists.csv')

In [26]:
ages = scientists['Age']

print(ages.max())

90


In [27]:
print(ages.mean())

59.125


In [28]:
print(ages[ages > ages.mean()])

1    61
2    90
3    66
7    77
Name: Age, dtype: int64


In [29]:
print(ages > ages.mean())

0    False
1     True
2     True
3     True
4    False
5    False
6    False
7     True
Name: Age, dtype: bool


In [30]:
manual_bool_values = [True, True, False, False, True, True, False, True]
print(ages[manual_bool_values])

0    37
1    61
4    56
5    45
7    77
Name: Age, dtype: int64


### 시리즈와 브로드캐스팅

#### 벡터와 스칼라로 브로드캐스팅 수행하기

In [31]:
print(ages + ages)

0     74
1    122
2    180
3    132
4    112
5     90
6     82
7    154
Name: Age, dtype: int64


In [32]:
print(ages * ages)

0    1369
1    3721
2    8100
3    4356
4    3136
5    2025
6    1681
7    5929
Name: Age, dtype: int64


In [33]:
print(ages + 100)

0    137
1    161
2    190
3    166
4    156
5    145
6    141
7    177
Name: Age, dtype: int64


In [34]:
print(ages * 2)

0     74
1    122
2    180
3    132
4    112
5     90
6     82
7    154
Name: Age, dtype: int64


In [35]:
print(pd.Series([1, 100]))

0      1
1    100
dtype: int64


In [36]:
print(ages + pd.Series([1, 100]))

0     38.0
1    161.0
2      NaN
3      NaN
4      NaN
5      NaN
6      NaN
7      NaN
dtype: float64


In [37]:
rev_ages = ages.sort_index(ascending=False)
print(rev_ages)

7    77
6    41
5    45
4    56
3    66
2    90
1    61
0    37
Name: Age, dtype: int64


In [38]:
print(ages * 2)

0     74
1    122
2    180
3    132
4    112
5     90
6     82
7    154
Name: Age, dtype: int64


In [39]:
print(ages + rev_ages)

0     74
1    122
2    180
3    132
4    112
5     90
6     82
7    154
Name: Age, dtype: int64
