In [1]:
import pandas as pd
from pandas import Series, DataFrame

# 5.1 Introduction to pandas Data Structures

### Series

In [2]:
obj = pd.Series([4, 7, -5, 3])

In [3]:
obj

0    4
1    7
2   -5
3    3
dtype: int64

In [4]:
obj.values

array([ 4,  7, -5,  3])

In [5]:
obj.index

RangeIndex(start=0, stop=4, step=1)

In [7]:
obj2 = pd.Series([4, 7, -5, 3], index=['d', 'b', 'a', 'c'])

In [8]:
obj2

d    4
b    7
a   -5
c    3
dtype: int64

In [9]:
obj2.index

Index(['d', 'b', 'a', 'c'], dtype='object')

In [10]:
obj2['a']

-5

In [11]:
obj2['d'] = 6

In [12]:
obj2

d    6
b    7
a   -5
c    3
dtype: int64

In [13]:
obj2[['c', 'a', 'd']]

c    3
a   -5
d    6
dtype: int64

Math functions, operations, boolean things from Numby

In [14]:
obj2[obj2 > 0]

d    6
b    7
c    3
dtype: int64

In [15]:
obj2 * 2

d    12
b    14
a   -10
c     6
dtype: int64

The following example does not work, because numpy isn't imported, even though pandas uses numpy.  Makes no sense.

In [20]:
np.exp(obj2)

NameError: name 'np' is not defined

In [21]:
'b' in obj2

True

In [22]:
'e' in obj2

False

In [23]:
sdata = {'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000}

In [24]:
obj3 = pd.Series(sdata)

In [25]:
obj3

Ohio      35000
Texas     71000
Oregon    16000
Utah       5000
dtype: int64

In [26]:
states = ['California', 'Ohio', 'Oregon', 'Texas']

In [27]:
obj4 = pd.Series(sdata, index=states)

In [28]:
obj4

California        NaN
Ohio          35000.0
Oregon        16000.0
Texas         71000.0
dtype: float64

In [29]:
pd.isnull(obj4)

California     True
Ohio          False
Oregon        False
Texas         False
dtype: bool

In [30]:
pd.notnull(obj4)

California    False
Ohio           True
Oregon         True
Texas          True
dtype: bool

Can do the same with a Series, but hey the only example is invoking a function on the Series object

In [31]:
obj4.isnull()

California     True
Ohio          False
Oregon        False
Texas         False
dtype: bool

In [32]:
obj3 + obj4

California         NaN
Ohio           70000.0
Oregon         32000.0
Texas         142000.0
Utah               NaN
dtype: float64

In [33]:
obj4.name = 'population'

In [34]:
obj4.index.name = 'state'

In [35]:
obj4

state
California        NaN
Ohio          35000.0
Oregon        16000.0
Texas         71000.0
Name: population, dtype: float64

In [36]:
obj

0    4
1    7
2   -5
3    3
dtype: int64

In [37]:
obj.index = ['Bob', 'Steve', 'Jefff', 'Ryan']

In [38]:
obj

Bob      4
Steve    7
Jefff   -5
Ryan     3
dtype: int64

In [39]:
data = {'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada', 'Nevada'],
            'year': [2000, 2001, 2002, 2001, 2002, 2003],
            'pop': [1.5, 1.7, 3.6, 2.4, 2.9, 3.2]}
frame = pd.DataFrame(data)

In [40]:
frame

Unnamed: 0,state,year,pop
0,Ohio,2000,1.5
1,Ohio,2001,1.7
2,Ohio,2002,3.6
3,Nevada,2001,2.4
4,Nevada,2002,2.9
5,Nevada,2003,3.2


In [43]:
pd.DataFrame(data, columns=['year', 'state', 'pop'])

Unnamed: 0,year,state,pop
0,2000,Ohio,1.5
1,2001,Ohio,1.7
2,2002,Ohio,3.6
3,2001,Nevada,2.4
4,2002,Nevada,2.9
5,2003,Nevada,3.2


More examples of something

In [44]:
frame['state']

0      Ohio
1      Ohio
2      Ohio
3    Nevada
4    Nevada
5    Nevada
Name: state, dtype: object

In [45]:
frame.year

0    2000
1    2001
2    2002
3    2001
4    2002
5    2003
Name: year, dtype: int64

In [46]:
frame.pop

<bound method DataFrame.pop of     state  year  pop
0    Ohio  2000  1.5
1    Ohio  2001  1.7
2    Ohio  2002  3.6
3  Nevada  2001  2.4
4  Nevada  2002  2.9
5  Nevada  2003  3.2>

In [47]:
frame.state

0      Ohio
1      Ohio
2      Ohio
3    Nevada
4    Nevada
5    Nevada
Name: state, dtype: object

In [48]:
frame.pop

<bound method DataFrame.pop of     state  year  pop
0    Ohio  2000  1.5
1    Ohio  2001  1.7
2    Ohio  2002  3.6
3  Nevada  2001  2.4
4  Nevada  2002  2.9
5  Nevada  2003  3.2>

In [49]:
frame.state

0      Ohio
1      Ohio
2      Ohio
3    Nevada
4    Nevada
5    Nevada
Name: state, dtype: object

If the index has been made a string, then enclose it in quotes.  Example will get 3rd row (excluding column name/header)

In [52]:
frame.loc[2]

state    Ohio
year     2002
pop       3.6
Name: 2, dtype: object

In [54]:
val = pd.Series([-1.2, -1.5, -1.7], index=[1, 3, 4])
frame['debt'] = val

In [55]:
frame

Unnamed: 0,state,year,pop,debt
0,Ohio,2000,1.5,
1,Ohio,2001,1.7,-1.2
2,Ohio,2002,3.6,
3,Nevada,2001,2.4,-1.5
4,Nevada,2002,2.9,-1.7
5,Nevada,2003,3.2,


In [56]:
frame['eastern'] = frame.state == 'Ohio'

In [57]:
frame

Unnamed: 0,state,year,pop,debt,eastern
0,Ohio,2000,1.5,,True
1,Ohio,2001,1.7,-1.2,True
2,Ohio,2002,3.6,,True
3,Nevada,2001,2.4,-1.5,False
4,Nevada,2002,2.9,-1.7,False
5,Nevada,2003,3.2,,False


In [58]:
del frame['eastern']

In [59]:
frame.columns

Index(['state', 'year', 'pop', 'debt'], dtype='object')

In [60]:
pop = {'Nevada': {2001: 2.4, 2002: 2.9}, 'Ohio': {2000: 1.5, 2001: 1.7, 2002: 3.6}}

In [61]:
frame2 = pd.DataFrame(pop)

In [62]:
frame2

Unnamed: 0,Nevada,Ohio
2001,2.4,1.7
2002,2.9,3.6
2000,,1.5


In [63]:
frame2.T

Unnamed: 0,2001,2002,2000
Nevada,2.4,2.9,
Ohio,1.7,3.6,1.5


In [66]:
frame2.index.name = 'year';
frame2.columns.name = 'state';

In [67]:
frame2

state,Nevada,Ohio
year,Unnamed: 1_level_1,Unnamed: 2_level_1
2001,2.4,1.7
2002,2.9,3.6
2000,,1.5


In [68]:
frame2.values

array([[2.4, 1.7],
       [2.9, 3.6],
       [nan, 1.5]])

In [69]:
frame.values

array([['Ohio', 2000, 1.5, nan],
       ['Ohio', 2001, 1.7, -1.2],
       ['Ohio', 2002, 3.6, nan],
       ['Nevada', 2001, 2.4, -1.5],
       ['Nevada', 2002, 2.9, -1.7],
       ['Nevada', 2003, 3.2, nan]], dtype=object)

Table 5-1. Possible data inputs to DataFrame constructor:

|Type|Notes|
|----|-----|
|2D ndarray|A matrix of data, passing optional row and column labels|
| dict of arrays, lists, or tuples|Each sequence becomes a column in the DataFrame; all sequences must be the same length|
|NumPy structured/record array|Treated as the “dict of arrays” case|
|dict of Series| Each value becomes a column; indexes from each Series are unioned together to form the result’s row index if no explicit index is passed|
|dict of dicts| Each inner dict becomes a column; keys are unioned to form the row index as in the “dict of Series” case|
|List of dicts or Series| Each item becomes a row in the DataFrame; union of dict keys or Series indexes become the DataFrame’s column labels|
|List of lists or tuples| Treated as the “2D ndarray” case|
|Another DataFrame|The DataFrame’s indexes are used unless different ones are passed|
|NumPy MaskedArray|Like the “2D ndarray” case except masked values become NA/missing in the DataFrame result|





In [75]:
obj = pd.Series(range(3), index=['a', 'b', 'c'])

In [76]:
index = obj.index

In [77]:
index

Index(['a', 'b', 'c'], dtype='object')

In [78]:
index[1:]

Index(['b', 'c'], dtype='object')

Another one that requires numpy but the chapter didn't say to get numpy so we will let it fail

In [79]:
labels = pd.Index(np.arrange(3))

NameError: name 'np' is not defined

In [81]:
labels

NameError: name 'labels' is not defined

In [82]:
obj2 = pd.Series([1.5, -2.5, 0], index=labels)

NameError: name 'labels' is not defined

In [83]:
obj2

d    6
b    7
a   -5
c    3
dtype: int64

In [84]:
obj2.index is lables

NameError: name 'lables' is not defined

In [85]:
frame3

NameError: name 'frame3' is not defined

In [86]:
frame2

state,Nevada,Ohio
year,Unnamed: 1_level_1,Unnamed: 2_level_1
2001,2.4,1.7
2002,2.9,3.6
2000,,1.5


In [87]:
frame2.columns

Index(['Nevada', 'Ohio'], dtype='object', name='state')

In [88]:
'Ohio' in frame2.columns

True

In [89]:
2003 in frame2.index

False

In [92]:
dup_labels = pd.Index(['foo', 'foo', 'bar', 'bar'], dtype ='object')

In [94]:
dup_labels

Index(['foo', 'foo', 'bar', 'bar'], dtype='object')

<h2>5.2 Functionality

Re-Indexing

In [95]:
obj = pd.Series([4.5, 7.2, -5.3, 3.6], index=['d', 'b', 'a', 'c'])

In [96]:
obj

d    4.5
b    7.2
a   -5.3
c    3.6
dtype: float64

In [97]:
obj2 = obj.reindex(['a', 'b', 'c', 'd', 'e'])

In [98]:
obj2

a   -5.3
b    7.2
c    3.6
d    4.5
e    NaN
dtype: float64

In [100]:
obj3 = pd.Series(['blue', 'purple', 'yellow'], index=[0, 2, 4])

In [101]:
obj3

0      blue
2    purple
4    yellow
dtype: object

In [102]:
obj3.reindex(range(6), method='ffill')

0      blue
1      blue
2    purple
3    purple
4    yellow
5    yellow
dtype: object

I'll import numbpy now so I can see some more of these working, but instructions have yet to instruct it

In [106]:
import numpy as np

frame = pd.DataFrame(np.arange(9).reshape((3, 3)), index=['a', 'c', 'd'], columns=['Ohio', 'Texas', 'California'])

In [107]:
frame

Unnamed: 0,Ohio,Texas,California
a,0,1,2
c,3,4,5
d,6,7,8


In [108]:
frame2 = frame.reindex(['a', 'b', 'c', 'd'])

In [109]:
frame2

Unnamed: 0,Ohio,Texas,California
a,0.0,1.0,2.0
b,,,
c,3.0,4.0,5.0
d,6.0,7.0,8.0


In [110]:
states = ['Texas', 'Utah', 'California']

In [111]:
frame.reindex(columns=states)

Unnamed: 0,Texas,Utah,California
a,1,,2
c,4,,5
d,7,,8


In [117]:
frame.loc[['a', 'b', 'c', 'd'], states]

KeyError: "['b'] not in index"

As we can see, this doesn't work and the errors it throws are not useful to people who don't develop the pandas package

Dropping Entries from an Axis

In [118]:
obj = pd.Series(np.arange(5.), index=['a', 'b', 'c', 'd', 'e'])

In [119]:
obj

a    0.0
b    1.0
c    2.0
d    3.0
e    4.0
dtype: float64

In [120]:
new_obj = obj.drop('c')

In [121]:
new_obj

a    0.0
b    1.0
d    3.0
e    4.0
dtype: float64

In [122]:
obj.drop(['d', 'c'])

a    0.0
b    1.0
e    4.0
dtype: float64

In [124]:
data = pd.DataFrame(np.arange(16).reshape((4, 4)), index=['ohio', 'colorado', 'utah', 'new york'], columns=[ 'one', 'two', 'three', 'four'])

In [125]:
data

Unnamed: 0,one,two,three,four
ohio,0,1,2,3
colorado,4,5,6,7
utah,8,9,10,11
new york,12,13,14,15


In [126]:
data.drop(['colorado', 'ohio'])

Unnamed: 0,one,two,three,four
utah,8,9,10,11
new york,12,13,14,15


In [127]:
data.drop('two', axis=1)

Unnamed: 0,one,three,four
ohio,0,2,3
colorado,4,6,7
utah,8,10,11
new york,12,14,15


In [128]:
data.drop(['two', 'four'], axis='columns')

Unnamed: 0,one,three
ohio,0,2
colorado,4,6
utah,8,10
new york,12,14


In [129]:
obj.drop('c', inplace=True)

In [130]:
obj

a    0.0
b    1.0
d    3.0
e    4.0
dtype: float64

<h2> Series Indexing

In [131]:
obj = pd.Series(np.arange(4.), index=['a', 'b', 'c', 'd'])

In [132]:
obj

a    0.0
b    1.0
c    2.0
d    3.0
dtype: float64

In [133]:
obj['b']

1.0

In [134]:
obj[obj < 2]

a    0.0
b    1.0
dtype: float64

Slicing in pandas is inclusive of the end index item, unlike python and java where its exclusive

In [135]:
obj['b': 'c'] = 5

In [136]:
obj

a    0.0
b    5.0
c    5.0
d    3.0
dtype: float64

<h4> Index into a DataFrame to retrive one or more columns either with a single value or sequence:

In [137]:
data = pd.DataFrame(np.arange(16).reshape((4, 4)), index=['ohio', 'colorado', 'utah', 'new york'], columns = ['one', 'two', 'three', 'four'])

In [138]:
data

Unnamed: 0,one,two,three,four
ohio,0,1,2,3
colorado,4,5,6,7
utah,8,9,10,11
new york,12,13,14,15


In [140]:
data['two']

ohio         1
colorado     5
utah         9
new york    13
Name: two, dtype: int64

In [141]:
data[['three', 'one']]

Unnamed: 0,three,one
ohio,2,0
colorado,6,4
utah,10,8
new york,14,12


In [142]:
data[data['three'] >5]

Unnamed: 0,one,two,three,four
colorado,4,5,6,7
utah,8,9,10,11
new york,12,13,14,15


In [143]:
data < 5

Unnamed: 0,one,two,three,four
ohio,True,True,True,True
colorado,True,False,False,False
utah,False,False,False,False
new york,False,False,False,False


In [144]:
data[data < 5] = 0

In [145]:
data

Unnamed: 0,one,two,three,four
ohio,0,0,0,0
colorado,0,5,6,7
utah,8,9,10,11
new york,12,13,14,15


In [146]:
data.loc['colorado', ['two', 'three']]

two      5
three    6
Name: colorado, dtype: int64

In [147]:
data.iloc[2, [3, 0, 1]]

four    11
one      8
two      9
Name: utah, dtype: int64

In [148]:
data.iloc[2]

one       8
two       9
three    10
four     11
Name: utah, dtype: int64

In [150]:
data.loc[:'utah', 'two']

ohio        0
colorado    5
utah        9
Name: two, dtype: int64

In [151]:
data.iloc[:, :3][data.three > 5]

Unnamed: 0,one,two,three
colorado,0,5,6
utah,8,9,10
new york,12,13,14


<h3> Indexing with DataFrames</h3>


* df[val] - Select single column or sequence of columns from the DataFrame; special case conveniences: boolean array (filter rows), slice (slice rows), or boolean DataFrame (set values based on some criterion)
* df.loc[val] - Selects single row or subset of rows from the DataFrame by label
* df.loc[:, val] - Selects single column or subset of columns by label
* df.loc[val1, val2] - Select both rows and columns by label
* df.iloc[where] - Selects single row or subset of rows from the DataFrame by integer position
* df.iloc[:, where] - Selects single column or subset of columns by integer position
* df.iloc[where_i, where_j] - Select both rows and columns by integer position
* df.at[label_i, label_j] - Select a single scalar value by row and column label
* df.iat[i, j] - Select a single scalar value by row and column position (integers)
* reindex method - Select either rows or columns by labels
* get_value, set_value methods - Select single value by row and column label

In [154]:
ser = pd.Series(np.arange(3.))
ser
# ser[-1] throws an error cause the guy who made pandas made it so

0    0.0
1    1.0
2    2.0
dtype: float64

In [156]:
ser2 = pd.Series(np.arange(3.), index=['a', 'b', 'c'])

In [158]:
ser2[-1]
# works but throws deprecation warning to use ser.iloc[pos]

  ser2[-1]


2.0

<h4> Arithmatic and Data Alignment

In [159]:
s1 = pd.Series([7.3, -2.5, 3.4, 1.5], index=['a', 'c', 'd', 'e'])

In [160]:
s2 = pd.Series([-2.1, 3.6, -1.5, 4, 3.1], index=['a', 'c', 'e', 'f', 'g'])

In [161]:
s1

a    7.3
c   -2.5
d    3.4
e    1.5
dtype: float64

In [162]:
s2

a   -2.1
c    3.6
e   -1.5
f    4.0
g    3.1
dtype: float64

In [163]:
s1 + s2

a    5.2
c    1.1
d    NaN
e    0.0
f    NaN
g    NaN
dtype: float64

In [164]:
df1 = pd.DataFrame({'A': [1, 2]})

In [165]:
df2 = pd.DataFrame({'B': [3, 4]})

In [166]:
df1

Unnamed: 0,A
0,1
1,2


In [167]:
df2

Unnamed: 0,B
0,3
1,4


In [168]:
df1 - df2

Unnamed: 0,A,B
0,,
1,,


In [169]:
df1 = pd.DataFrame(np.arange(12.).reshape((3, 4)), columns=list('abcd'))

In [170]:
df2 = pd.DataFrame(np.arange(20.).reshape((4, 5)), columns=list('abcde'))

In [171]:
df2.loc[1, 'b'] = np.nan

In [172]:
df1

Unnamed: 0,a,b,c,d
0,0.0,1.0,2.0,3.0
1,4.0,5.0,6.0,7.0
2,8.0,9.0,10.0,11.0


In [173]:
df2

Unnamed: 0,a,b,c,d,e
0,0.0,1.0,2.0,3.0,4.0
1,5.0,,7.0,8.0,9.0
2,10.0,11.0,12.0,13.0,14.0
3,15.0,16.0,17.0,18.0,19.0


In [174]:
df1 + df2

Unnamed: 0,a,b,c,d,e
0,0.0,2.0,4.0,6.0,
1,9.0,,13.0,15.0,
2,18.0,20.0,22.0,24.0,
3,,,,,


In [175]:
df1.add(df2, fill_value=0)

Unnamed: 0,a,b,c,d,e
0,0.0,2.0,4.0,6.0,4.0
1,9.0,5.0,13.0,15.0,9.0
2,18.0,20.0,22.0,24.0,14.0
3,15.0,16.0,17.0,18.0,19.0


Be sure to note this doesn't actually fill the final product, it fills the NAN in df2 with a 0...i think

In [176]:
1 / df1

Unnamed: 0,a,b,c,d
0,inf,1.0,0.5,0.333333
1,0.25,0.2,0.166667,0.142857
2,0.125,0.111111,0.1,0.090909


In [177]:
df1.rdiv(1)

Unnamed: 0,a,b,c,d
0,inf,1.0,0.5,0.333333
1,0.25,0.2,0.166667,0.142857
2,0.125,0.111111,0.1,0.090909


In [178]:
df1.reindex(columns=df2.columns, fill_value=0)

Unnamed: 0,a,b,c,d,e
0,0.0,1.0,2.0,3.0,0
1,4.0,5.0,6.0,7.0,0
2,8.0,9.0,10.0,11.0,0


.r anything will reverse arugments 1 / df1 is same as df1.rdiv(1)

* add, radd: + 
* sub, rsub: -
* div, div:  / 
* floordiv, rfloordiv: //
* mul, rmul: * 
* pow, rpow: **


In [179]:
arr = np.arange(12.).reshape((3, 4))

In [180]:
arr

array([[ 0.,  1.,  2.,  3.],
       [ 4.,  5.,  6.,  7.],
       [ 8.,  9., 10., 11.]])

In [181]:
arr[0]

array([0., 1., 2., 3.])

In [182]:
arr - arr[0]

array([[0., 0., 0., 0.],
       [4., 4., 4., 4.],
       [8., 8., 8., 8.]])

In [183]:
frame = pd.DataFrame(np.arange(12.).reshape((4, 3)), columns=list('bde'), index=['Utah', 'Ohio', 'Texas', 'Oregon'])

In [184]:
series = frame.iloc[0]

In [185]:
frame

Unnamed: 0,b,d,e
Utah,0.0,1.0,2.0
Ohio,3.0,4.0,5.0
Texas,6.0,7.0,8.0
Oregon,9.0,10.0,11.0


In [186]:
series

b    0.0
d    1.0
e    2.0
Name: Utah, dtype: float64

In [187]:
frame - series

Unnamed: 0,b,d,e
Utah,0.0,0.0,0.0
Ohio,3.0,3.0,3.0
Texas,6.0,6.0,6.0
Oregon,9.0,9.0,9.0


broadcasting - arithmatic orperation taking b, d, e from series to each row in the data frame with matching index

In [188]:
series2 = pd.Series(range(3), index=['b', 'e', 'f'])

In [189]:
frame + series2

Unnamed: 0,b,d,e,f
Utah,0.0,,3.0,
Ohio,3.0,,6.0,
Texas,6.0,,9.0,
Oregon,9.0,,12.0,


In [191]:
series3 = frame['d']

In [192]:
frame

Unnamed: 0,b,d,e
Utah,0.0,1.0,2.0
Ohio,3.0,4.0,5.0
Texas,6.0,7.0,8.0
Oregon,9.0,10.0,11.0


In [193]:
series3

Utah       1.0
Ohio       4.0
Texas      7.0
Oregon    10.0
Name: d, dtype: float64

In [194]:
frame.sub(series3, axis='index')

Unnamed: 0,b,d,e
Utah,-1.0,0.0,1.0
Ohio,-1.0,0.0,1.0
Texas,-1.0,0.0,1.0
Oregon,-1.0,0.0,1.0


Column Series math operation against DF:  axis number passed is the axis to match on.  axis='index' is axis=row 0

In [195]:
frame = pd.DataFrame(np.random.randn(4, 3), columns=list('bde'),index=['Utah', 'Ohio', 'Texas', 'Oregon'])

In [196]:
np.abs(frame)

Unnamed: 0,b,d,e
Utah,0.603684,0.277707,1.312012
Ohio,0.138755,0.486776,0.512726
Texas,1.660581,2.685773,0.724365
Oregon,2.192732,0.526829,1.47856


In [201]:
f = lambda x: x.max() - x.min()

In [202]:
frame.apply(f)

b    3.853313
d    3.172549
e    0.965834
dtype: float64

DF.apply - this will apply a function working on 1D arrays to each row or column

In [200]:
frame.apply(f, axis='columns')

Utah      1.589719
Ohio      0.651481
Texas     4.346354
Oregon    3.671292
dtype: float64

Above funciton is being executed once per a row because of axis='columns', default is once per column??

In [204]:
def f(x):
    return pd.Series([x.min(), x.max()], index=['min', 'max'])

In [205]:
frame.apply(f)

Unnamed: 0,b,d,e
min,-1.660581,-0.486776,-1.47856
max,2.192732,2.685773,-0.512726


In [206]:
format = lambda x: '%.2f' % x

In [209]:
frame.applymap(format)

  frame.applymap(format)


Unnamed: 0,b,d,e
Utah,-0.6,0.28,-1.31
Ohio,0.14,-0.49,-0.51
Texas,-1.66,2.69,-0.72
Oregon,2.19,0.53,-1.48


In [210]:
frame['e'].map(format)

Utah      -1.31
Ohio      -0.51
Texas     -0.72
Oregon    -1.48
Name: e, dtype: object

In [211]:
obj = pd.Series(range(4), index=['d', 'a', 'b', 'c'])

In [212]:
obj.sort_index()

a    1
b    2
c    3
d    0
dtype: int64

In [213]:
frame = pd.DataFrame(np.arange(8).reshape((2, 4)), index=['three', 'one'], columns=['d', 'a', 'b', 'c'])

In [214]:
frame.sort_index()

Unnamed: 0,d,a,b,c
one,4,5,6,7
three,0,1,2,3


In [215]:
frame.sort_index(axis=1)

Unnamed: 0,a,b,c,d
three,1,2,3,0
one,5,6,7,4


In [216]:
frame.sort_index(axis=1, ascending=False)

Unnamed: 0,d,c,b,a
three,0,3,2,1
one,4,7,6,5


In [217]:
obj = pd.Series([4, 7, -3, 2])

In [218]:
obj.sort_values()

2   -3
3    2
0    4
1    7
dtype: int64

In [219]:
obj = pd.Series([4, np.nan, 7, np.nan, -3, 2])

In [220]:
obj.sort_values()

4   -3.0
5    2.0
0    4.0
2    7.0
1    NaN
3    NaN
dtype: float64

In [221]:
frame = pd.DataFrame({'b': [4, 7, -3, 2], 'a': [0, 1, 0, 1]})

In [222]:
frame

Unnamed: 0,b,a
0,4,0
1,7,1
2,-3,0
3,2,1


In [223]:
frame.sort_values(by='b')

Unnamed: 0,b,a
2,-3,0
3,2,1
0,4,0
1,7,1


In [224]:
frame.sort_values(by=['a', 'b'])

Unnamed: 0,b,a
2,-3,0
0,4,0
3,2,1
1,7,1


In [225]:
obj = pd.Series([7, -5, 7, 4, 2, 0, 4])

In [226]:
obj.rank()

0    6.5
1    1.0
2    6.5
3    4.5
4    3.0
5    2.0
6    4.5
dtype: float64

In [227]:
obj.rank(ascending=False, method='max')

0    2.0
1    7.0
2    2.0
3    4.0
4    5.0
5    6.0
6    4.0
dtype: float64

In [228]:
frame = pd.DataFrame({'b': [4.3, 7, -3, 2], 'a': [0, 1, 0, 1],

SyntaxError: incomplete input (103705808.py, line 1)