# DataFrame from Nested dict

In [2]:
import numpy as np
import pandas as pd

dic ={'red':[1,2,3],'blue':[4,5,6],'black':[7,8,9]}

df = pd.DataFrame(dic)
df

Unnamed: 0,red,blue,black
0,1,4,7
1,2,5,8
2,3,6,9


In [3]:
# dictionary 안에 dictionary
nestdict = { 'red': { 2012: 22, 2013: 33 },
             'white': { 2011: 13, 2012: 22, 2013: 16},
             'blue': {2011: 17, 2012: 27, 2013: 18}}
frame2 = pd.DataFrame(nestdict)
frame2

Unnamed: 0,red,white,blue
2012,22.0,22,27
2013,33.0,16,18
2011,,13,17


# Transposition of a Dataframe

In [5]:
frame2.T

Unnamed: 0,2012,2013,2011
red,22.0,33.0,
white,22.0,16.0,13.0
blue,27.0,18.0,17.0


# The Index Objects

ser = pd.Series([5,0,3,8,4], index=['red','blue','yellow','white','green'])
ser.index

# Methods on Index

In [10]:
ser.idxmin()

'blue'

In [11]:
ser.idxmax()

'white'

# Index with Duplicate Labels

In [13]:
serd = pd.Series(range(6), index=['white','white','blue','green', 
'green','yellow'])
serd

white     0
white     1
blue      2
green     3
green     4
yellow    5
dtype: int64

In [14]:
serd['white']

white    0
white    1
dtype: int64

In [15]:
serd.index.is_unique

False

In [17]:
frame2.index.is_unique

True

# Reindexing

In [18]:
ser = pd.Series([2,5,7,4], index=['one','two','three','four'])
ser

one      2
two      5
three    7
four     4
dtype: int64

In [19]:
ser.reindex(['three','four','five','one'])

three    7.0
four     4.0
five     NaN
one      2.0
dtype: float64

In [20]:
ser3 = pd.Series([1,5,6,3],index=[0,3,5,6])
ser3

0    1
3    5
5    6
6    3
dtype: int64

In [27]:
ser3.reindex(range(6))

0    1.0
1    NaN
2    NaN
3    5.0
4    NaN
5    6.0
dtype: float64

In [22]:
ser3.reindex(range(6),method='bfill')

0    1
1    5
2    5
3    5
4    6
5    6
dtype: int64

# Dropping 

In [29]:
ser = pd.Series(np.arange(4.), index=['red','blue','yellow','white'])
ser

red       0.0
blue      1.0
yellow    2.0
white     3.0
dtype: float64

In [30]:
ser.drop('yellow')

red      0.0
blue     1.0
white    3.0
dtype: float64

In [31]:
ser.drop(['blue','white'])

red       0.0
yellow    2.0
dtype: float64

In [33]:
frame = pd.DataFrame(np.arange(16).reshape((4,4)),
                    index=['red','blue','yellow','white'],
                    columns=['ball','pen','pencil','paper'])

In [34]:
frame

Unnamed: 0,ball,pen,pencil,paper
red,0,1,2,3
blue,4,5,6,7
yellow,8,9,10,11
white,12,13,14,15


In [36]:
frame.drop(['blue','yellow'])

Unnamed: 0,ball,pen,pencil,paper
red,0,1,2,3
white,12,13,14,15


In [37]:
del frame['paper']
frame

Unnamed: 0,ball,pen,pencil
red,0,1,2
blue,4,5,6
yellow,8,9,10
white,12,13,14


In [39]:
np.sqrt(frame) #sqrt 는 자료궂의 원소마다 적용됨

Unnamed: 0,ball,pen,pencil
red,0.0,1.0,1.414214
blue,2.0,2.236068,2.44949
yellow,2.828427,3.0,3.162278
white,3.464102,3.605551,3.741657


In [40]:
frame.apply(lambda x:x.max()-x.min())

ball      12
pen       12
pencil    12
dtype: int64

# Arithmetic and Data Alignment

In [41]:
s1 = pd.Series([3,2,5,1],['white','yellow','green','blue'])
s2 = pd.Series([1,4,7,2,1],['white','yellow','black','blue','brown'])

In [42]:
s1 + s2

black     NaN
blue      3.0
brown     NaN
green     NaN
white     4.0
yellow    6.0
dtype: float64

In [43]:
frame1 = pd.DataFrame(np.arange(16).reshape((4,4)),
                    index=['red','blue','yellow','white'],
                    columns=['ball','pen','pencil','paper'])
frame2 = pd.DataFrame(np.arange(12).reshape((4,3)),
                    index=['blue','green','white','yellow'],
                    columns=['mug','pen','ball'])

In [44]:
frame1

Unnamed: 0,ball,pen,pencil,paper
red,0,1,2,3
blue,4,5,6,7
yellow,8,9,10,11
white,12,13,14,15


In [45]:
frame2

Unnamed: 0,mug,pen,ball
blue,0,1,2
green,3,4,5
white,6,7,8
yellow,9,10,11


In [46]:
frame1 + frame2

Unnamed: 0,ball,mug,paper,pen,pencil
blue,6.0,,,6.0,
green,,,,,
red,,,,,
white,20.0,,,20.0,
yellow,19.0,,,19.0,


# Sorting and Ranking

In [48]:
ser = pd.Series([5,0,3,8,4],
                index=['red','blue','yellow','white','green'])
ser

red       5
blue      0
yellow    3
white     8
green     4
dtype: int64

In [49]:
ser.sort_index()

blue      0
green     4
red       5
white     8
yellow    3
dtype: int64

In [50]:
ser.sort_index(ascending=False)

yellow    3
white     8
red       5
green     4
blue      0
dtype: int64