In [2]:
import pandas as pd
s = pd.Series(['홍길동', 80])
s

0    홍길동
1     80
dtype: object

In [3]:
type(s)

pandas.core.series.Series

In [4]:
s = pd.Series(['홍길동', 80], index=['name', 'score'])
s

name     홍길동
score     80
dtype: object

In [5]:
s['name'], s[0]

('홍길동', '홍길동')

In [8]:
member = pd.DataFrame({
    'name':['홍길동', '김길동'],
    'email':['hong@gmail.com', 'kim@gmail.com'],
    'age': [30, 20]
})
member

Unnamed: 0,name,email,age
0,홍길동,hong@gmail.com,30
1,김길동,kim@gmail.com,20


In [11]:
member.index = ['hong', 'kim']
member

Unnamed: 0,name,email,age
hong,홍길동,hong@gmail.com,30
kim,김길동,kim@gmail.com,20


In [12]:
member.index

Index(['hong', 'kim'], dtype='object')

In [13]:
first = member.loc['hong']
first

name                홍길동
email    hong@gmail.com
age                  30
Name: hong, dtype: object

In [17]:
# 시리즈의 인덱스
first.index, first.keys()

(Index(['name', 'email', 'age'], dtype='object'),
 Index(['name', 'email', 'age'], dtype='object'))

In [16]:
# 시리즈의 값
first.values

array(['홍길동', 'hong@gmail.com', 30], dtype=object)

In [18]:
# 함수
member['age'].max(), member['age'].min()

(30, 20)

In [19]:
member['age'].mean(), member['age'].std(), member['age'].var()

(25.0, 7.0710678118654755, 50.0)

In [20]:
member['age'].median()

25.0

In [21]:
df = pd.read_csv('../data/scientists.csv')
df

Unnamed: 0,Name,Born,Died,Age,Occupation
0,Rosaline Franklin,1920-07-25,1958-04-16,37,Chemist
1,William Gosset,1876-06-13,1937-10-16,61,Statistician
2,Florence Nightingale,1820-05-12,1910-08-13,90,Nurse
3,Marie Curie,1867-11-07,1934-07-04,66,Chemist
4,Rachel Carson,1907-05-27,1964-04-14,56,Biologist
5,John Snow,1813-03-15,1858-06-16,45,Physician
6,Alan Turing,1912-06-23,1954-06-07,41,Computer Scientist
7,Johann Gauss,1777-04-30,1855-02-23,77,Mathematician


In [24]:
# 평균 나이보다 많은 사람 추출
df[df['Age'] > df['Age'].mean()]

Unnamed: 0,Name,Born,Died,Age,Occupation
1,William Gosset,1876-06-13,1937-10-16,61,Statistician
2,Florence Nightingale,1820-05-12,1910-08-13,90,Nurse
3,Marie Curie,1867-11-07,1934-07-04,66,Chemist
7,Johann Gauss,1777-04-30,1855-02-23,77,Mathematician


In [27]:
df[df['Age'] > df['Age'].mean()]['Age'].min()

61

In [28]:
age = df['Age']
age

0    37
1    61
2    90
3    66
4    56
5    45
6    41
7    77
Name: Age, dtype: int64

In [29]:
age[age > age.mean()]

1    61
2    90
3    66
7    77
Name: Age, dtype: int64

In [30]:
age + age

0     74
1    122
2    180
3    132
4    112
5     90
6     82
7    154
Name: Age, dtype: int64

In [31]:
age + 100

0    137
1    161
2    190
3    166
4    156
5    145
6    141
7    177
Name: Age, dtype: int64

In [32]:
# 서로 길이가 다른 시리즈로 연산
# 같은 인덱스 값만 연산되고 나머지는 NaN(결측값) 처리됨
age2 = pd.Series([1, 2])
age + age2

0    38.0
1    63.0
2     NaN
3     NaN
4     NaN
5     NaN
6     NaN
7     NaN
dtype: float64

In [33]:
# 인덱스로 정렬
age.sort_index(ascending=False)

7    77
6    41
5    45
4    56
3    66
2    90
1    61
0    37
Name: Age, dtype: int64

In [34]:
# 정렬여부와 상관없이 같은 인덱스끼리 연산
age + age.sort_index(ascending=False)

0     74
1    122
2    180
3    132
4    112
5     90
6     82
7    154
Name: Age, dtype: int64

In [39]:
print(df['Born'].dtype)
print(df['Died'].dtype)
print(df['Age'].dtype)

object
object
int64


In [40]:
pd.to_datetime(df['Born'])

0   1920-07-25
1   1876-06-13
2   1820-05-12
3   1867-11-07
4   1907-05-27
5   1813-03-15
6   1912-06-23
7   1777-04-30
Name: Born, dtype: datetime64[ns]

In [41]:
# 새로운 컬럼 추가
df['born_dt'], df['died_dt'] = pd.to_datetime(df['Born']), pd.to_datetime(df['Died'])
df

Unnamed: 0,Name,Born,Died,Age,Occupation,born_dt,died_dt
0,Rosaline Franklin,1920-07-25,1958-04-16,37,Chemist,1920-07-25,1958-04-16
1,William Gosset,1876-06-13,1937-10-16,61,Statistician,1876-06-13,1937-10-16
2,Florence Nightingale,1820-05-12,1910-08-13,90,Nurse,1820-05-12,1910-08-13
3,Marie Curie,1867-11-07,1934-07-04,66,Chemist,1867-11-07,1934-07-04
4,Rachel Carson,1907-05-27,1964-04-14,56,Biologist,1907-05-27,1964-04-14
5,John Snow,1813-03-15,1858-06-16,45,Physician,1813-03-15,1858-06-16
6,Alan Turing,1912-06-23,1954-06-07,41,Computer Scientist,1912-06-23,1954-06-07
7,Johann Gauss,1777-04-30,1855-02-23,77,Mathematician,1777-04-30,1855-02-23


In [43]:
df['days'] = df['died_dt'] - df['born_dt']

In [44]:
df

Unnamed: 0,Name,Born,Died,Age,Occupation,born_dt,died_dt,days
0,Rosaline Franklin,1920-07-25,1958-04-16,37,Chemist,1920-07-25,1958-04-16,13779 days
1,William Gosset,1876-06-13,1937-10-16,61,Statistician,1876-06-13,1937-10-16,22404 days
2,Florence Nightingale,1820-05-12,1910-08-13,90,Nurse,1820-05-12,1910-08-13,32964 days
3,Marie Curie,1867-11-07,1934-07-04,66,Chemist,1867-11-07,1934-07-04,24345 days
4,Rachel Carson,1907-05-27,1964-04-14,56,Biologist,1907-05-27,1964-04-14,20777 days
5,John Snow,1813-03-15,1858-06-16,45,Physician,1813-03-15,1858-06-16,16529 days
6,Alan Turing,1912-06-23,1954-06-07,41,Computer Scientist,1912-06-23,1954-06-07,15324 days
7,Johann Gauss,1777-04-30,1855-02-23,77,Mathematician,1777-04-30,1855-02-23,28422 days
