# DataFrames basics

In [60]:
people = {
    'first': ['Corey', 'jane', 'John'],
    'last': ['Schafer', 'Doe', 'Doe'],
    'email': ['CoreyMSchafer@gmail.com', 'JaneDoe@email.com', 'JohnDoe@email.com']
}

In [61]:
people['email']

['CoreyMSchafer@gmail.com', 'JaneDoe@email.com', 'JohnDoe@email.com']

In [62]:
import pandas as pd

In [63]:
df = pd.DataFrame(people)

In [64]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [65]:
df['email']

0    CoreyMSchafer@gmail.com
1          JaneDoe@email.com
2          JohnDoe@email.com
Name: email, dtype: object

In [66]:
type(df['email'])

pandas.core.series.Series

In [67]:
df.columns

Index(['first', 'last', 'email'], dtype='object')

### Accesing rows

In [68]:
df.iloc[[0, 1], 2]

0    CoreyMSchafer@gmail.com
1          JaneDoe@email.com
Name: email, dtype: object

In [69]:
# df.loc[[0, 1], ['email', 'last']]

### Part 3. Indexes

In [70]:
# df.set_index('email')
# not modified

In [71]:
# df.set_index('email', inplace=True)
# modifies df inplace

In [72]:
df.iloc[0]

first                      Corey
last                     Schafer
email    CoreyMSchafer@gmail.com
Name: 0, dtype: object

In [73]:
# df.reset_index(inplace=True)

In [74]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


### Part 4. Filtering

In [75]:
filt = df['last'] == 'Doe'

In [76]:
df[filt]

Unnamed: 0,first,last,email
1,jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [77]:
filt = (df['last'] == 'Doe') & (df['first'] == 'John')
# AND operator

In [78]:
df.loc[filt, 'email']

2    JohnDoe@email.com
Name: email, dtype: object

In [79]:
filt = (df['last'] == 'Schafer') | (df['first'] == 'John')
# OR operator

In [80]:
# df.loc[~filt, 'email']
# ~ reversing filter

### Part 5. Updating

In [81]:
df.columns

Index(['first', 'last', 'email'], dtype='object')

In [82]:
df.columns = ['first_name', 'last_name', 'email']

In [83]:
df

Unnamed: 0,first_name,last_name,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [84]:
df.columns = [x.lower() for x in df.columns]
df

Unnamed: 0,first_name,last_name,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [85]:
df.rename(columns={'first_name': 'first', 'last_name': 'last'}, inplace=True)

In [87]:
df.loc[2] = ['John', 'Smith', 'JohnSmith@email.com']

In [88]:
df.loc[2, ['last', 'email']] = ['Doe', 'JohnDoe@email.com']
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [89]:
df.at[2, 'last'] = 'Doe'
# Changing only lst name

In [90]:
filt = (df['email'] == 'JohnDoe@email.com')

In [91]:
df['email'].apply(len)

0    23
1    17
2    17
Name: email, dtype: int64

In [92]:
def update_email(email):
    return email.upper()

In [93]:
df['email'].apply(update_email)
# not modifying values

0    COREYMSCHAFER@GMAIL.COM
1          JANEDOE@EMAIL.COM
2          JOHNDOE@EMAIL.COM
Name: email, dtype: object

In [94]:
# df['email'] = df['email'].apply(update_email)
# modify values

In [95]:
df.apply(pd.Series.min)

first                      Corey
last                         Doe
email    COREYMSCHAFER@GMAIL.COM
dtype: object

In [96]:
df.applymap(len)

Unnamed: 0,first,last,email
0,5,7,23
1,4,3,17
2,4,3,17


In [97]:
df.applymap(str.lower)

Unnamed: 0,first,last,email
0,corey,schafer,coreymschafer@gmail.com
1,jane,doe,janedoe@email.com
2,john,doe,johndoe@email.com


In [98]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,COREYMSCHAFER@GMAIL.COM
1,jane,Doe,JANEDOE@EMAIL.COM
2,John,Doe,JOHNDOE@EMAIL.COM
