# Selecting Entries

In [None]:
import numpy as np
from pandas import Series, DataFrame
import pandas as pd

In [None]:
# Selecting Entries in a Series:
ser1 = Series(np.arange(3),index=['A','B','C'])
ser1 = 2*ser1 #to avoid confusion in the future ser1
ser1

In [None]:
# You can grab an entry by index name: ser1['B'] returns 2 or by index value: ser1[1] returns 2 or by a range of values: ser1[0:2] returns rows A:0 and B:2 or by a list of index names: ser1[['A','B']] returns rows A:0 and B:2
# You can grab entries by logic: ser1[ser1>3] returns row C:4
# You can change values using logic: ser1[ser1>3] = 10 changes C

In [None]:
# Selecting Entries in a DataFrame:
dframe = DataFrame(np.arange(25).reshape((5,5)), index=['NYC','LA','SF','DC','Chi'],columns=['A','B','C','D','E'])


In [None]:
dframe

In [None]:
# You can grab entries by column name: 
dframe['B'] #returns all rows with column B values


In [None]:
# You can grab multiple columns with a list of names: 
dframe[['B','E']]


In [None]:
dframe

In [None]:
# You can grab specific rows using Boolean: 
dframe[dframe['E']>8]


In [None]:
# You can grab a specific cell by column and row: 
dframe['B']['LA']


In [None]:
#To show a Boolean DataFrame: 

dframe>10

# Data Alignment

In [None]:
# Data Alignment
ser1 = Series([0,1,2],index=['A','B','C'])
ser2 = Series([3,4,5,6],index=list('ABCD')) #a nice little shortcut
ser1

In [None]:
ser1 + ser2

In [None]:
# Because ser1 didn't have a value for D, it replaced it with a null.
# The same behavior occurs with DataFrames (null values are assigned for any unmatched field)
# Use .add to assign fill values:
ser1.add(ser2,fill_value=0) #this adds 0 to whatever hasn’t matched NOTE: ser2.add(ser1,fill_value=0) returns the same thing!
# When using .add/fill_value with dataframes, null values are assigned when there are no prior values in a cell (at the intersection where new rows from one DataFrame meet new columns from another)


In [None]:
# Operations Between a Series and a DataFrame
dframe1 = DataFrame(np.arange(9).reshape(3,3),columns=list('ADC'), index=['NYC','SF','LA'])
ser1 = dframe1.ix[0] #so ser1 takes the 'NYC' row and values


In [None]:
dframe1

In [None]:
ser1

In [None]:
dframe1 - ser1 #returns the dframe1 DataFrame, but now all the 'NYC' values = 0

In [None]:
from numpy.random import randn
dframe = DataFrame(randn(25).reshape((5,5)),index=['A','B','D','E','F'], columns=['col1','col2','col3','col4','col5'])

In [None]:
# To count the unique values in a DataFrame column:
dframe['col1'].value_counts() #returns the count from highest to lowest


# Summary Statistics in DataFrame

In [None]:
arr = np.array([[1,2,np.nan],[np.nan,3,4]]) #inserts null values
dframe1 = DataFrame(arr,index=['A','B'],columns = ['One','Two','Three'])
dframe1

In [None]:
dframe1.sum() 


In [None]:
dframe1.sum(axis=1)

In [None]:
dframe1.min() 

In [None]:
#Check the index which have minimum values 
dframe1.idxmin()

In [None]:
dframe1

In [None]:
dframe1.cumsum()

In [None]:
dframe1.describe()