## More Pandas: Dropping Entries From An Axis

In [1]:
import pandas as pd
from pandas import Series
from pandas import DataFrame
import numpy as np
# dropping entries from an axis
# drop method will return a new object with the indicated value or values deleted from an axis
obj = Series(np.arange(5.), index=['a','b','c','d','e'])
new_obj = obj.drop('c')
print new_obj
obj.drop(['d','c'])
print obj

a    0
b    1
d    3
e    4
dtype: float64
a    0
b    1
c    2
d    3
e    4
dtype: float64


In [10]:
data = DataFrame(np.arange(16).reshape((4,4)), index=['Ohio','Colorado','Utah','New York'],
                columns=['one','two','three','four'])
print data
# drop based on indices
data.drop(['Colorado','Ohio'])
# drop a column
data.drop('two', axis=1)
data.drop(['two','four'], axis = 1)

          one  two  three  four
Ohio        0    1      2     3
Colorado    4    5      6     7
Utah        8    9     10    11
New York   12   13     14    15


Unnamed: 0,one,three
Ohio,0,2
Colorado,4,6
Utah,8,10
New York,12,14


## Indexing, Selection and Filtering

In [14]:
# can use series index values instead of only ints
obj = Series(np.arange(4.), index=['a','b','c','d'])
print obj['b']
print obj[1]
print obj[2:4]
print obj[['b','a','d']]
print obj[[1,3]]
print obj[obj<2] # all entries < 2

1.0
1.0
c    2
d    3
dtype: float64
b    1
a    0
d    3
dtype: float64
b    1
d    3
dtype: float64
a    0
b    1
dtype: float64


In [17]:
# slicing with labels includes the endpoint
print obj['b':'c']
# setting behaves as we would expect
obj['b':'c'] = 5
print obj

b    5
c    5
dtype: float64
a    0
b    5
c    5
d    3
dtype: float64


In [2]:
data = DataFrame(np.arange(16).reshape((4,4)), index = ['Ohio','Colorado','Utah','New York'],
                 columns = ['one', 'two', 'three', 'four'])
print data

          one  two  three  four
Ohio        0    1      2     3
Colorado    4    5      6     7
Utah        8    9     10    11
New York   12   13     14    15


In [20]:
# print first  two records
print data[:2]
# print only the records where dimension 'three' > 5
print data[data['three'] > 5]

          one  two  three  four
Ohio        0    1      2     3
Colorado    4    5      6     7
          one  two  three  four
Colorado    4    5      6     7
Utah        8    9     10    11
New York   12   13     14    15


In [22]:
data < 5
# notice the vectorization - tests for all entries in dataframe
# set all values in dataframe < 5 = 0
data[data < 5] = 0
print data

          one  two  three  four
Ohio        0    0      0     0
Colorado    0    5      6     7
Utah        8    9     10    11
New York   12   13     14    15


In [25]:
# DataFrame label indexing on the rows, ix helps, select a subset of rows and columns rom a DataFrame with numpy 
# like notation plus axis labels
data.ix['Colorado',['two','three']]
# select Colorado rows for columns in position 3,0,1
data.ix[['Colorado','Utah'],[3,0,1]]
# index rows in column three where value is greater than 5, and columns 0,1,2
data.ix[data.three > 5, :3]

Unnamed: 0,one,two,three
Colorado,0,5,6
Utah,8,9,10
New York,12,13,14


In [4]:
# obj[val] select single column or sequence of columns from dataframe, special cases: boolean array filter, slices
# obj.ix[val] selects single row or subset of rows from dataframe
# obj.ix[:, val] selects single column of subset of columns
print data.ix[:, 'three']
# obj.ix[val, val] select both rows and columns
# reindex method conform one or more axes to new indexes
# xs method select single row or column as a series by label
data.xs('Colorado')
# get_value, set_value methods select single value by row and column label
# icol, irow methods, select single column or row, as a series by integer location


Ohio         2
Colorado     6
Utah        10
New York    14
Name: three, dtype: int64


one      4
two      5
three    6
four     7
Name: Colorado, dtype: int64