In [1]:
import pandas as pd

In [98]:
people = {
    'first': ['Corey', 'Jane', 'John'],
    'last': ['Schafer', 'Doe', 'Doe'],
    'email': ['CoreymSchafer@gmail.com', 'JaneDoe@email.com', 'JohnDoe@email.com']
}

In [3]:
df = pd.read_csv('data/survey_results_public.csv', index_col='Respondent')
schema_df = pd.read_csv('data/survey_results_schema.csv', index_col='Column')

pd.set_option('display.max_columns', 85)
pd.set_option('display.max_rows', 85)

In [99]:
df2 = pd.DataFrame(people)
df2

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreymSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [7]:
df2.columns

Index(['first', 'last', 'email'], dtype='object')

In [24]:
df2.columns = ['first_name', 'last_name', 'email']
df2

Unnamed: 0,first_name,last_name,email
0,Corey,Schafer,CoreySchafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Doe,johndoe@email.com


In [23]:
df2.columns = [x.upper() for x in df2.columns]
df2

Unnamed: 0,FIRST-NAME,LAST-NAME,EMAIL
0,Corey,Schafer,CoreySchafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Doe,johndoe@email.com


In [22]:
df2.columns = df2.columns.str.replace('_', '-')
df2

Unnamed: 0,first-name,last-name,email
0,Corey,Schafer,CoreySchafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Doe,johndoe@email.com


In [20]:
df2.columns = df2.columns.str.replace('-', '_')
df2

Unnamed: 0,first_name,last_name,email
0,Corey,Schafer,CoreySchafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Doe,johndoe@email.com


In [21]:
df2.columns = [x.lower() for x in df2.columns]
df2

Unnamed: 0,first_name,last_name,email
0,Corey,Schafer,CoreySchafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Doe,johndoe@email.com


In [30]:
df2.rename(columns={'first_name': 'first', 'last_name': 'last'})
df2

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreySchafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Doe,johndoe@email.com


In [31]:
df2.rename(columns={'first_name': 'first', 'last_name': 'last'}, inplace=True)
df2

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreySchafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Doe,johndoe@email.com


In [32]:
df2.loc[2]

first                 John
last                   Doe
email    johndoe@email.com
Name: 2, dtype: object

In [35]:
df2.loc[2] = ['John', 'Smith', 'JohnSmith@email.com']

In [36]:
df2

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreySchafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Smith,JohnSmith@email.com


In [37]:
df2.loc[2, ['last', 'email']]

last                   Smith
email    JohnSmith@email.com
Name: 2, dtype: object

In [38]:
df2.loc[2, ['last', 'email']] = ['Doe', 'JohnDoe@email.com']

In [39]:
df2

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreySchafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Doe,JohnDoe@email.com


In [42]:
df2.loc[2, 'last'] = 'Smith'

In [43]:
df2

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreySchafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Smith,JohnDoe@email.com


In [44]:
df2.at[2, 'last'] = 'Doe'

In [45]:
df2

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreySchafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Doe,JohnDoe@email.com


In [47]:
filt = (df2['email'] == 'JohnDoe@email.com')
df2[filt]

In [49]:
df2[filt]['last']

2    Doe
Name: last, dtype: object

In [50]:
type(df2[filt]['last'])

pandas.core.series.Series

In [51]:
type(df2[filt])

pandas.core.frame.DataFrame

In [72]:
filt = (df2['email'] == 'johndoe@email.com')
df2[filt]['last'] = 'Smith'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [101]:
df2

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreymSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [103]:
filt = (df2['email'] == 'JohnDoe@email.com')
df2.loc[filt, 'last'] = 'Smith'

In [104]:
df2

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreymSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Smith,JohnDoe@email.com


In [105]:
df2['email'].str.lower()

0    coreymschafer@gmail.com
1          janedoe@email.com
2          johndoe@email.com
Name: email, dtype: object

In [106]:
df2['email'] = df2['email'].str.upper()
df2

Unnamed: 0,first,last,email
0,Corey,Schafer,COREYMSCHAFER@GMAIL.COM
1,Jane,Doe,JANEDOE@EMAIL.COM
2,John,Smith,JOHNDOE@EMAIL.COM


In [107]:
df2['email'] = df2['email'].str.lower()
df2

Unnamed: 0,first,last,email
0,Corey,Schafer,coreymschafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Smith,johndoe@email.com


In [108]:
df2['email'].apply(len)

0    23
1    17
2    17
Name: email, dtype: int64

In [109]:
def update_email(email):
    return email.upper()

In [110]:
df2['email'].apply(update_email)

0    COREYMSCHAFER@GMAIL.COM
1          JANEDOE@EMAIL.COM
2          JOHNDOE@EMAIL.COM
Name: email, dtype: object

In [111]:
df2['email'] = df2['email'].apply(update_email)
df2

Unnamed: 0,first,last,email
0,Corey,Schafer,COREYMSCHAFER@GMAIL.COM
1,Jane,Doe,JANEDOE@EMAIL.COM
2,John,Smith,JOHNDOE@EMAIL.COM


In [112]:
df2['email'] = df2['email'].apply(lambda x: x.lower())
df2

Unnamed: 0,first,last,email
0,Corey,Schafer,coreymschafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Smith,johndoe@email.com


In [113]:
df2['email'].apply(len)

0    23
1    17
2    17
Name: email, dtype: int64

In [115]:
df2.apply(len)

first    3
last     3
email    3
dtype: int64

In [117]:
len(df2['email'])

3

In [119]:
df2.apply(len, axis='columns')

0    3
1    3
2    3
dtype: int64

In [120]:
df2.apply(pd.Series.min)

first                      Corey
last                         Doe
email    coreymschafer@gmail.com
dtype: object

In [122]:
df2.apply(lambda x: x.min())

first                      Corey
last                         Doe
email    coreymschafer@gmail.com
dtype: object

In [123]:
df2.applymap(len)

Unnamed: 0,first,last,email
0,5,7,23
1,4,3,17
2,4,5,17


In [126]:
df2.applymap(str.lower)

Unnamed: 0,first,last,email
0,corey,schafer,coreymschafer@gmail.com
1,jane,doe,janedoe@email.com
2,john,smith,johndoe@email.com


In [127]:
df2

Unnamed: 0,first,last,email
0,Corey,Schafer,coreymschafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Smith,johndoe@email.com


In [129]:
df2['first'].map({'Corey': 'Chris', 'Jane': 'Mary'})

0    Chris
1     Mary
2      NaN
Name: first, dtype: object

In [130]:
df2['first'].replace({'Corey': 'Chris', 'Jane': 'Mary'})

0    Chris
1     Mary
2     John
Name: first, dtype: object

In [133]:
df2['first'] = df2['first'].replace({'Corey': 'Chris', 'Jane': 'Mary'})

In [134]:
df2

Unnamed: 0,first,last,email
0,Chris,Schafer,coreymschafer@gmail.com
1,Mary,Doe,janedoe@email.com
2,John,Smith,johndoe@email.com
