In [1]:
from pandas import Series, DataFrame
import pandas as pd
import numpy as np

In [2]:
# Example from - https://chrisalbon.com/python/pandas_map_values_to_values.html
raw_data = {'first_name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'],
            'last_name': ['Miller', 'Jacobson', 'Ali', 'Milner', 'Cooze'],
            'age': [42, 52, 36, 24, 73],
            'city': ['San Francisco', 'Baltimore', 'Miami', 'Douglas', 'Boston']}
df = pd.DataFrame(raw_data, columns=['first_name', 'last_name', 'age', 'city'])
df

Unnamed: 0,first_name,last_name,age,city
0,Jason,Miller,42,San Francisco
1,Molly,Jacobson,52,Baltimore
2,Tina,Ali,36,Miami
3,Jake,Milner,24,Douglas
4,Amy,Cooze,73,Boston


In [3]:
DataFrame(raw_data, columns=["age", "city"])

Unnamed: 0,age,city
0,42,San Francisco
1,52,Baltimore
2,36,Miami
3,24,Douglas
4,73,Boston


In [4]:
DataFrame(raw_data, columns=["first_name", "last_name", "age", "city", "debt"])

Unnamed: 0,first_name,last_name,age,city,debt
0,Jason,Miller,42,San Francisco,
1,Molly,Jacobson,52,Baltimore,
2,Tina,Ali,36,Miami,
3,Jake,Milner,24,Douglas,
4,Amy,Cooze,73,Boston,


In [5]:
df = DataFrame(raw_data, columns=["first_name", "last_name", "age", "city", "debt"])
df.first_name

0    Jason
1    Molly
2     Tina
3     Jake
4      Amy
Name: first_name, dtype: object

In [6]:
df["first_name"]   # column 가져오기

0    Jason
1    Molly
2     Tina
3     Jake
4      Amy
Name: first_name, dtype: object

In [7]:
df

Unnamed: 0,first_name,last_name,age,city,debt
0,Jason,Miller,42,San Francisco,
1,Molly,Jacobson,52,Baltimore,
2,Tina,Ali,36,Miami,
3,Jake,Milner,24,Douglas,
4,Amy,Cooze,73,Boston,


In [8]:
df.loc[1]   # row명으로 row 값 가져오기

first_name        Molly
last_name      Jacobson
age                  52
city          Baltimore
debt                NaN
Name: 1, dtype: object

In [9]:
df["age"].iloc[1:]  # "age" column의 2번째 row부터 끝까지 가져오기

1    52
2    36
3    24
4    73
Name: age, dtype: int64

In [10]:
# Example from - https://stackoverflow.com/questions/31593201/pandas-iloc-vs-ix-vs-loc-explanation
s = pd.Series(np.nan, index=[49, 48, 47, 46, 45, 1, 2, 3, 4, 5])
s

49   NaN
48   NaN
47   NaN
46   NaN
45   NaN
1    NaN
2    NaN
3    NaN
4    NaN
5    NaN
dtype: float64

In [15]:
# iloc VS loc
print( s.iloc[:3] )   # index명에 상관없이 index의 순서로 가져옴
print( s.loc[:3] )    # index명에 맞춰서 가져옴

49   NaN
48   NaN
47   NaN
dtype: float64
49   NaN
48   NaN
47   NaN
46   NaN
45   NaN
1    NaN
2    NaN
3    NaN
dtype: float64


In [16]:
df.age > 40

0     True
1     True
2    False
3    False
4     True
Name: age, dtype: bool

In [18]:
df.debt = df.age > 40
df.debt

0     True
1     True
2    False
3    False
4     True
Name: debt, dtype: bool

In [19]:
values = Series(data=["M","F","F"], index=[0,1,3])
values

0    M
1    F
3    F
dtype: object

In [20]:
df["sex"] = values   # index=[0,1,3]인 곳에만 data 입력, 아닌곳에는 NaN
df

Unnamed: 0,first_name,last_name,age,city,debt,sex
0,Jason,Miller,42,San Francisco,True,M
1,Molly,Jacobson,52,Baltimore,True,F
2,Tina,Ali,36,Miami,False,
3,Jake,Milner,24,Douglas,False,F
4,Amy,Cooze,73,Boston,True,


In [21]:
df.T

Unnamed: 0,0,1,2,3,4
first_name,Jason,Molly,Tina,Jake,Amy
last_name,Miller,Jacobson,Ali,Milner,Cooze
age,42,52,36,24,73
city,San Francisco,Baltimore,Miami,Douglas,Boston
debt,True,True,False,False,True
sex,M,F,,F,


In [22]:
df.values

array([['Jason', 'Miller', 42, 'San Francisco', True, 'M'],
       ['Molly', 'Jacobson', 52, 'Baltimore', True, 'F'],
       ['Tina', 'Ali', 36, 'Miami', False, nan],
       ['Jake', 'Milner', 24, 'Douglas', False, 'F'],
       ['Amy', 'Cooze', 73, 'Boston', True, nan]], dtype=object)

In [23]:
df.to_csv()

',first_name,last_name,age,city,debt,sex\n0,Jason,Miller,42,San Francisco,True,M\n1,Molly,Jacobson,52,Baltimore,True,F\n2,Tina,Ali,36,Miami,False,\n3,Jake,Milner,24,Douglas,False,F\n4,Amy,Cooze,73,Boston,True,\n'

In [24]:
del df["debt"]   # column 삭제

In [26]:
df

Unnamed: 0,first_name,last_name,age,city,sex
0,Jason,Miller,42,San Francisco,M
1,Molly,Jacobson,52,Baltimore,F
2,Tina,Ali,36,Miami,
3,Jake,Milner,24,Douglas,F
4,Amy,Cooze,73,Boston,


In [27]:
# Example from Python for data analyis

pop = {'Nevada': {2001: 2.4, 2002: 2.9},
         'Ohio': {2000: 1.5, 2001: 1.7, 2002: 3.6}}

DataFrame(pop)

Unnamed: 0,Nevada,Ohio
2000,,1.5
2001,2.4,1.7
2002,2.9,3.6
