In [1]:
people = {
    "first": ["Corey", 'Jane', 'John'], 
    "last": ["Schafer", 'Doe', 'Doe'], 
    "email": ["CoreyMSchafer@gmail.com", 'JaneDoe@email.com', 'JohnDoe@email.com']
}

import pandas as pd

df = pd.DataFrame(people)

In [2]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [3]:
df.columns

Index(['first', 'last', 'email'], dtype='object')

In [4]:
df.columns = ['first_name', 'last_name','email',] #Rename the columns. 

In [5]:
df

Unnamed: 0,first_name,last_name,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [7]:
df.columns = [x.upper() for x in df.columns] # converting the column names to CAPOTAL CASE
df

Unnamed: 0,FIRST_NAME,LAST_NAME,EMAIL
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [8]:
df.columns = df.columns.str.replace("_"," ")
df

Unnamed: 0,FIRST NAME,LAST NAME,EMAIL
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [10]:
df.columns = [x.lower() for x in df.columns]
df


Unnamed: 0,first name,last name,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [11]:
df.columns = df.columns.str.replace(" ","_")
df

Unnamed: 0,first_name,last_name,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [12]:
df.rename(columns={'first_name':'first','last_name':'last'},inplace = True)
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [13]:
df.loc[2]

first                 John
last                   Doe
email    JohnDoe@email.com
Name: 2, dtype: object

In [14]:
df.loc[2,'first'] = 'Smith'

In [15]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,Smith,Doe,JohnDoe@email.com


In [16]:
df.loc[2] = ['Smith','Doe','SmithDoe@email.com']

In [17]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,Smith,Doe,SmithDoe@email.com


In [18]:
df.loc[2,['last','email']] = ['Doe','JohnDoe@email.com']

In [19]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,Smith,Doe,JohnDoe@email.com


In [20]:
df.at[2,'last'] = 'Smith'

In [21]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,Smith,Smith,JohnDoe@email.com


In [22]:
df.loc[2] = ['John','Doe','JohnDoe@email.com']
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [28]:
filt = (df['email'] == 'JohnDoe@email.com')
filt

0    False
1    False
2     True
Name: email, dtype: bool

In [29]:
df[filt]

Unnamed: 0,first,last,email
2,John,Doe,JohnDoe@email.com


In [30]:
df[filt]['last'] = 'Smith'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [31]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [32]:
df.loc[filt, 'last'] ='Smith'
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Smith,JohnDoe@email.com


In [33]:
df['email'].str.lower()

0    coreymschafer@gmail.com
1          janedoe@email.com
2          johndoe@email.com
Name: email, dtype: object

In [34]:
df['email'] = df['email'].str.lower()

In [35]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,coreymschafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Smith,johndoe@email.com


In [None]:
# apply, map, applymap and replace

In [None]:
# apply works on series and a dataframe. applying a function to each value on the dataframe

In [36]:
df['email'].apply(len) # applying a function on each value.

0    23
1    17
2    17
Name: email, dtype: int64

In [37]:
# new Function
def update_email(email):
    return email.upper()

In [38]:
df['email'].apply(update_email) # fun without paranthesis

0    COREYMSCHAFER@GMAIL.COM
1          JANEDOE@EMAIL.COM
2          JOHNDOE@EMAIL.COM
Name: email, dtype: object

In [39]:
df['email'] = df['email'].apply(update_email)
df

Unnamed: 0,first,last,email
0,Corey,Schafer,COREYMSCHAFER@GMAIL.COM
1,Jane,Doe,JANEDOE@EMAIL.COM
2,John,Smith,JOHNDOE@EMAIL.COM


In [40]:
# simple lambda funs can be used.

df['email'] = df['email'].apply(lambda x:x.lower())
df

Unnamed: 0,first,last,email
0,Corey,Schafer,coreymschafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Smith,johndoe@email.com


In [41]:
df.apply(len)

first    3
last     3
email    3
dtype: int64

In [42]:
df.apply(len, axis='columns')

0    3
1    3
2    3
dtype: int64

In [43]:
df.apply(pd.Series.min) # finding the min using min function of the series method.

first                      Corey
last                         Doe
email    coreymschafer@gmail.com
dtype: object

In [44]:
df.apply(lambda x: x.min()) # contrived method. 

first                      Corey
last                         Doe
email    coreymschafer@gmail.com
dtype: object

In [45]:
# apply map on every value of the dataframe.  Apply works only on series or dataframe.
df.applymap(len)

Unnamed: 0,first,last,email
0,5,7,23
1,4,3,17
2,4,5,17


In [46]:
df.applymap(str.lower)

Unnamed: 0,first,last,email
0,corey,schafer,coreymschafer@gmail.com
1,jane,doe,janedoe@email.com
2,john,smith,johndoe@email.com


In [47]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,coreymschafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Smith,johndoe@email.com


In [48]:
# map works only on series. substituting the values. 
df['first'].map({'Corey': "Chris" })

0    Chris
1      NaN
2      NaN
Name: first, dtype: object

In [49]:
df['first'].replace({'Corey':'Chris'})

0    Chris
1     Jane
2     John
Name: first, dtype: object

In [50]:
df['first'] = df['first'].replace({'Corey':'Chris'})

In [51]:
df

Unnamed: 0,first,last,email
0,Chris,Schafer,coreymschafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Smith,johndoe@email.com


In [52]:

df = pd.read_csv(r"F:\Machine_learning\Pandas\data\survey_results_public.csv", index_col = "Respondent")
schema_df = pd.read_csv(r"F:\Machine_learning\Pandas\data\survey_results_schema.csv", index_col = "Column")
pd.set_option('display.max_columns', 85)
pd.set_option('display.max_rows', 85)

In [54]:
df.rename(columns = {'ConvertedComp': "Salary USD"}, inplace = True)

In [55]:
df['Salary USD']

Respondent
1            NaN
2            NaN
3         8820.0
4        61000.0
5            NaN
          ...   
88377        NaN
88601        NaN
88802        NaN
88816        NaN
88863        NaN
Name: Salary USD, Length: 88883, dtype: float64

In [56]:
df['Hobbyist']

Respondent
1        Yes
2         No
3        Yes
4         No
5        Yes
        ... 
88377    Yes
88601     No
88802     No
88816     No
88863    Yes
Name: Hobbyist, Length: 88883, dtype: object

In [57]:
df['Hobbyist'].map({'Yes': True,'No':False})

Respondent
1         True
2        False
3         True
4        False
5         True
         ...  
88377     True
88601    False
88802    False
88816    False
88863     True
Name: Hobbyist, Length: 88883, dtype: bool