# DataFrames and Series Basics
### Selecting Rows and Columns

In [1]:
people = {
    "first": ["Corey", 'Jane', 'John'], 
    "last": ["Schafer", 'Doe', 'Doe'], 
    "email": ["CoreyMSchafer@gmail.com", 'JaneDoe@email.com', 'JohnDoe@email.com']
}

In [2]:
people['email']

['CoreyMSchafer@gmail.com', 'JaneDoe@email.com', 'JohnDoe@email.com']

In [3]:
import pandas as pd

In [4]:
df = pd.DataFrame(people)

In [5]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [11]:
df['email'] # iterator, since columns = 1 

0    CoreyMSchafer@gmail.com
1          JaneDoe@email.com
2          JohnDoe@email.com
Name: email, dtype: object

In [12]:
type(df['email'])

pandas.core.series.Series

In [7]:
df.email # Careful with names

0    CoreyMSchafer@gmail.com
1          JaneDoe@email.com
2          JohnDoe@email.com
Name: email, dtype: object

In [14]:
df[['email', 'last']] # data frame, since columns > 1

Unnamed: 0,email,last
0,CoreyMSchafer@gmail.com,Schafer
1,JaneDoe@email.com,Doe
2,JohnDoe@email.com,Doe


In [15]:
df.columns

Index(['first', 'last', 'email'], dtype='object')

In [16]:
df.iloc[0] # integer location - first row

first                      Corey
last                     Schafer
email    CoreyMSchafer@gmail.com
Name: 0, dtype: object

In [17]:
df.iloc[[0, 1]]

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com


In [18]:
df.iloc[[0, 1], 2]

0    CoreyMSchafer@gmail.com
1          JaneDoe@email.com
Name: email, dtype: object

In [19]:
df.loc[0]

first                      Corey
last                     Schafer
email    CoreyMSchafer@gmail.com
Name: 0, dtype: object

In [20]:
df.loc[[0, 1], 'email']

0    CoreyMSchafer@gmail.com
1          JaneDoe@email.com
Name: email, dtype: object

In [21]:
df.loc[[0, 1], ['email', 'last']]

Unnamed: 0,email,last
0,CoreyMSchafer@gmail.com,Schafer
1,JaneDoe@email.com,Doe


# Indexes
### How to Set, Reset, and Use Indexes

In [22]:
df.set_index('email') #returns new DataFrame

Unnamed: 0_level_0,first,last
email,Unnamed: 1_level_1,Unnamed: 2_level_1
CoreyMSchafer@gmail.com,Corey,Schafer
JaneDoe@email.com,Jane,Doe
JohnDoe@email.com,John,Doe


In [23]:
df # No change

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [24]:
df.set_index('email', inplace = True) # inplace will change the actual DataFrame

In [25]:
df

Unnamed: 0_level_0,first,last
email,Unnamed: 1_level_1,Unnamed: 2_level_1
CoreyMSchafer@gmail.com,Corey,Schafer
JaneDoe@email.com,Jane,Doe
JohnDoe@email.com,John,Doe


In [26]:
df.index

Index(['CoreyMSchafer@gmail.com', 'JaneDoe@email.com', 'JohnDoe@email.com'], dtype='object', name='email')

In [30]:
df.loc['CoreyMSchafer@gmail.com']

first      Corey
last     Schafer
Name: CoreyMSchafer@gmail.com, dtype: object

In [29]:
df.loc['CoreyMSchafer@gmail.com', 'last']

'Schafer'

In [31]:
df.loc[0] # there is no more 0 index

TypeError: cannot do label indexing on <class 'pandas.core.indexes.base.Index'> with these indexers [0] of <class 'int'>

In [33]:
df.iloc[0] # iloc still uses indexes however

first      Corey
last     Schafer
Name: CoreyMSchafer@gmail.com, dtype: object

In [36]:
df.reset_index(inplace=True)
df

Unnamed: 0,index,email,first,last
0,0,CoreyMSchafer@gmail.com,Corey,Schafer
1,1,JaneDoe@email.com,Jane,Doe
2,2,JohnDoe@email.com,John,Doe


# Filtering
### Using Conditionals to Filter Rows and Columns

In [38]:
filt = (df['last'] == 'Doe')

In [39]:
filt

0    False
1     True
2     True
Name: last, dtype: bool

In [40]:
df[filt] #df[df['last'] == 'Doe']

Unnamed: 0,index,email,first,last
1,1,JaneDoe@email.com,Jane,Doe
2,2,JohnDoe@email.com,John,Doe


In [41]:
df.loc[filt]

Unnamed: 0,index,email,first,last
1,1,JaneDoe@email.com,Jane,Doe
2,2,JohnDoe@email.com,John,Doe


In [42]:
df.loc[filt, 'email']

1    JaneDoe@email.com
2    JohnDoe@email.com
Name: email, dtype: object

In [43]:
filt2 = (df['last'] == 'Doe') & (df['first'] == 'John') 
filt3 = (df['last'] == 'Schafer') | (df['first'] == 'John') 

In [44]:
df.loc[filt2]

Unnamed: 0,index,email,first,last
2,2,JohnDoe@email.com,John,Doe


In [45]:
df.loc[filt3]

Unnamed: 0,index,email,first,last
0,0,CoreyMSchafer@gmail.com,Corey,Schafer
2,2,JohnDoe@email.com,John,Doe


In [46]:
df.loc[~filt3] # "~" (tilda) inverses condition simmilar to "!"

Unnamed: 0,index,email,first,last
1,1,JaneDoe@email.com,Jane,Doe
