# Pandas - COT'D

## Sorting

In [2]:
import numpy as np
import pandas as pd

In [4]:
df = pd.DataFrame(np.random.randn(10,2), columns=["col1", "col2"])
print(df)

       col1      col2
0 -0.190102 -0.856092
1 -0.532785  1.244171
2 -0.033835  0.543075
3 -0.498245 -0.014567
4 -0.212716  1.440142
5  0.222953  0.637010
6  0.245273  0.482405
7  0.158374  0.277129
8  0.223669 -0.564784
9 -0.559904 -1.057435


In [7]:
df_sorted = df.sort_index(ascending=False)
df_sorted

Unnamed: 0,col1,col2
9,-0.559904,-1.057435
8,0.223669,-0.564784
7,0.158374,0.277129
6,0.245273,0.482405
5,0.222953,0.63701
4,-0.212716,1.440142
3,-0.498245,-0.014567
2,-0.033835,0.543075
1,-0.532785,1.244171
0,-0.190102,-0.856092


In [14]:
df_sort_vals = df.sort_values('col2')
df_sort_vals

Unnamed: 0,col1,col2
9,-0.559904,-1.057435
0,-0.190102,-0.856092
8,0.223669,-0.564784
3,-0.498245,-0.014567
7,0.158374,0.277129
6,0.245273,0.482405
2,-0.033835,0.543075
5,0.222953,0.63701
1,-0.532785,1.244171
4,-0.212716,1.440142


## Text

### Quick Python string recap

In [15]:
a = "The quick brown fox"

In [16]:
a.lower()

'the quick brown fox'

In [17]:
a.upper()

'THE QUICK BROWN FOX'

In [18]:
a.split(' ')

['The', 'quick', 'brown', 'fox']

In [19]:
a.strip()

'The quick brown fox'

In [22]:
a.replace('fox', 'dog')

'The quick brown dog'

In [24]:
a.startswith('The')

True

In [25]:
a.endswith('fox')

True

In [26]:
a.count('The')

1

In [27]:
a.find('brown')

10

In [28]:
a.find('foobar')

-1

In [29]:
a.islower()

False

In [30]:
a.isupper()

False

### Series of strings

In [31]:
s = pd.Series(['Foo', 'Bar', 'Baz', 123, 999])

In [32]:
s.str.lower()

0    foo
1    bar
2    baz
3    NaN
4    NaN
dtype: object

In [34]:
s.str.contains('Bar')

0    False
1     True
2    False
3      NaN
4      NaN
dtype: object

In [35]:
s.str.len()

0    3.0
1    3.0
2    3.0
3    NaN
4    NaN
dtype: float64

In [36]:
s.str.find('Baz')

0   -1.0
1   -1.0
2    0.0
3    NaN
4    NaN
dtype: float64

In [37]:
# etc

In [43]:
s1 = pd.Series(['Bim', 'Boff', 'Boo!', 'WOOPS'])
df = pd.DataFrame([s, s1])
df

Unnamed: 0,0,1,2,3,4
0,Foo,Bar,Baz,123,999.0
1,Bim,Boff,Boo!,WOOPS,


## Slice and Dice

### By Column

Use `df.loc[:]`

In [60]:
df = pd.DataFrame(np.random.randn(8,4), columns=['A','B','C','D'], index=['a','b','c','d','e','f','g','h'])

In [61]:
df.loc[:,'A'] # get A column

a   -0.339834
b    1.134331
c   -0.590290
d   -1.048234
e    0.500389
f    1.827451
g   -1.088621
h   -0.472174
Name: A, dtype: float64

In [62]:
df.loc[:] # the whole dataframe

Unnamed: 0,A,B,C,D
a,-0.339834,-1.083612,0.391475,-1.005183
b,1.134331,1.754749,0.900221,-0.168814
c,-0.59029,-1.012038,-0.339655,1.16944
d,-1.048234,-0.506276,-0.562645,0.410143
e,0.500389,0.235186,1.447104,0.193555
f,1.827451,-0.225501,0.614484,0.066983
g,-1.088621,2.305451,-0.330414,-1.696287
h,-0.472174,1.074445,-1.849264,-0.28349


In [64]:
df.loc['b':'c', ['B','C']] # rows b and c, cols B and C

Unnamed: 0,B,C
b,1.754749,0.900221
c,-1.012038,-0.339655


### By row
use `df.iloc[]`

In [59]:
df.index = ['a','b','c','d','e','f','g','h']
df.iloc[:4] # works by 0-index, not by

Unnamed: 0,A,B,C,D
a,-0.408181,-1.011517,2.161313,-2.814905
b,-1.454878,-0.366626,0.629204,0.193665
c,0.942004,-1.96019,1.391825,-1.664519
d,-1.057897,0.674681,1.088292,-0.544825


In [65]:
# etc