#### Part 5 - Corey Schafer Pandas Series

In [18]:
import pandas as pd

people = {
    'fname' : ['Suminder', 'Deepinder', 'Nimrat'],
    'lname' : ['Singh', 'Kaur', 'Kaur'],
    'email' : ['suminder.singh@ex.com', 'dkaur@ex.com', 'nkaur@ex.com']
}

df_ppl = pd.DataFrame(people)

df = pd.read_csv('survey_results_public.csv')
schema_df = pd.read_csv('survey_results_schema.csv')

In [19]:
df_ppl.columns

Index(['fname', 'lname', 'email'], dtype='object')

In [20]:
df_ppl.columns = ['first_name', 'last_name', 'email_id']

In [21]:
df_ppl

Unnamed: 0,first_name,last_name,email_id
0,Suminder,Singh,suminder.singh@ex.com
1,Deepinder,Kaur,dkaur@ex.com
2,Nimrat,Kaur,nkaur@ex.com


In [22]:
df_ppl.columns = [x.upper() for x in df_ppl.columns]

In [23]:
df_ppl

Unnamed: 0,FIRST_NAME,LAST_NAME,EMAIL_ID
0,Suminder,Singh,suminder.singh@ex.com
1,Deepinder,Kaur,dkaur@ex.com
2,Nimrat,Kaur,nkaur@ex.com


In [24]:
df_ppl['EMAIL_ID'] = df_ppl['EMAIL_ID'].str.upper()

In [25]:
df_ppl

Unnamed: 0,FIRST_NAME,LAST_NAME,EMAIL_ID
0,Suminder,Singh,SUMINDER.SINGH@EX.COM
1,Deepinder,Kaur,DKAUR@EX.COM
2,Nimrat,Kaur,NKAUR@EX.COM


In [26]:
df_ppl['EMAIL_ID'] = df_ppl['EMAIL_ID'].str.lower()

In [27]:
df_ppl

Unnamed: 0,FIRST_NAME,LAST_NAME,EMAIL_ID
0,Suminder,Singh,suminder.singh@ex.com
1,Deepinder,Kaur,dkaur@ex.com
2,Nimrat,Kaur,nkaur@ex.com


In [28]:
df_ppl.columns = df_ppl.columns.str.replace("_", " ")

In [29]:
df_ppl

Unnamed: 0,FIRST NAME,LAST NAME,EMAIL ID
0,Suminder,Singh,suminder.singh@ex.com
1,Deepinder,Kaur,dkaur@ex.com
2,Nimrat,Kaur,nkaur@ex.com


In [40]:
df_ppl.columns = df_ppl.columns.str.replace(" ", "_")

In [43]:
df_ppl.columns = df_ppl.columns.str.lower()

In [44]:
df_ppl

Unnamed: 0,first_name,last_name,email_id
0,Suminder,Singh,suminder.singh@ex.com
1,Deepinder,Kaur,dkaur@ex.com
2,Nimrat,Kaur,nkaur@ex.com


In [45]:
df_ppl.rename(columns={'first_name': 'fname', 'last_name': 'lname', 'email_id': 'email'}, inplace=True)

In [46]:
df_ppl

Unnamed: 0,fname,lname,email
0,Suminder,Singh,suminder.singh@ex.com
1,Deepinder,Kaur,dkaur@ex.com
2,Nimrat,Kaur,nkaur@ex.com


In [47]:
df_ppl.loc[df_ppl['lname'] == 'Singh'] = 'Ting'

In [48]:
df_ppl

Unnamed: 0,fname,lname,email
0,Ting,Ting,Ting
1,Deepinder,Kaur,dkaur@ex.com
2,Nimrat,Kaur,nkaur@ex.com


In [49]:
df_ppl = pd.DataFrame(people)

In [50]:
df_ppl

Unnamed: 0,fname,lname,email
0,Suminder,Singh,suminder.singh@ex.com
1,Deepinder,Kaur,dkaur@ex.com
2,Nimrat,Kaur,nkaur@ex.com


In [53]:
df_ppl.loc[2] = ['John', 'Smith', 'johnsmith@ex.com']

In [54]:
df_ppl

Unnamed: 0,fname,lname,email
0,Suminder,Singh,suminder.singh@ex.com
1,Deepinder,Kaur,dkaur@ex.com
2,John,Smith,johnsmith@ex.com


In [55]:
df_ppl.loc[df_ppl['lname'] == 'Singh', 'lname'] = 'Ting'

In [56]:
df_ppl

Unnamed: 0,fname,lname,email
0,Suminder,Ting,suminder.singh@ex.com
1,Deepinder,Kaur,dkaur@ex.com
2,John,Smith,johnsmith@ex.com


In [58]:
df_ppl.loc[df_ppl['fname'] == 'Suminder', ['lname', 'email']] = ['Singh', 'xxx@ex.com']

In [59]:
df_ppl

Unnamed: 0,fname,lname,email
0,Suminder,Singh,xxx@ex.com
1,Deepinder,Kaur,dkaur@ex.com
2,John,Smith,johnsmith@ex.com


In [60]:
df_ppl.at[2, 'lname'] = 'Dodo' # similar to updating using loc

In [61]:
df_ppl

Unnamed: 0,fname,lname,email
0,Suminder,Singh,xxx@ex.com
1,Deepinder,Kaur,dkaur@ex.com
2,John,Dodo,johnsmith@ex.com


In [62]:
df_ppl['email'] = df_ppl['email'].str.upper()

In [63]:
df_ppl

Unnamed: 0,fname,lname,email
0,Suminder,Singh,XXX@EX.COM
1,Deepinder,Kaur,DKAUR@EX.COM
2,John,Dodo,JOHNSMITH@EX.COM


In [66]:
df_ppl.loc[:,'email'] = df_ppl.loc[:,'email'].str.lower()

In [67]:
df_ppl

Unnamed: 0,fname,lname,email
0,Suminder,Singh,xxx@ex.com
1,Deepinder,Kaur,dkaur@ex.com
2,John,Dodo,johnsmith@ex.com


* apply
* map
* applymap
* replace

In [69]:
df_ppl['email'].apply(len)
# apply will apply the supplied function to values in either a series or a dataframe

0    10
1    12
2    16
Name: email, dtype: int64

In [71]:
def update_email(x):
    return x.upper()

df_ppl['email'].apply(update_email)

0          XXX@EX.COM
1        DKAUR@EX.COM
2    JOHNSMITH@EX.COM
Name: email, dtype: object

In [72]:
df_ppl['email'].apply(lambda x: x.lower())

0          xxx@ex.com
1        dkaur@ex.com
2    johnsmith@ex.com
Name: email, dtype: object

In [74]:
df_ppl.apply(len) 
# applies the given function to the columns in the df (shows number of observations in each col)
# here apply is runing the values on series as defined by the columns in the df
# will return 3 series in the below eg

fname    3
lname    3
email    3
dtype: int64

In [76]:
df_ppl.apply(len, axis='columns') 
# axis can be 0 for rows and 1 for columns
# here the len is run and gives number of columns on each index

0    3
1    3
2    3
dtype: int64

In [78]:
len(df_ppl['email']) # means email has 3 observations

3

In [79]:
df_ppl.apply(pd.Series.min)

fname       Deepinder
lname            Dodo
email    dkaur@ex.com
dtype: object

In [80]:
df_ppl.apply(lambda x: x.min())

fname       Deepinder
lname            Dodo
email    dkaur@ex.com
dtype: object

In [81]:
# running apply on a series will run the specified function on all the values in the series,
# whereas when apply is run on a DF, it will run the spec func on the series(cols) in the df

In [82]:
# applymap will apply to only df... the specified func in run on all values in the df

In [83]:
df_ppl.applymap(lambda x: len(x))

Unnamed: 0,fname,lname,email
0,8,5,10
1,9,4,12
2,4,4,16


In [94]:
df_ppl.applymap(str.capitalize)

Unnamed: 0,fname,lname,email
0,Suminder,Singh,Xxx@ex.com
1,Deepinder,Kaur,Dkaur@ex.com
2,John,Dodo,Johnsmith@ex.com


In [95]:
df_ppl.applymap(str.title)

Unnamed: 0,fname,lname,email
0,Suminder,Singh,Xxx@Ex.Com
1,Deepinder,Kaur,Dkaur@Ex.Com
2,John,Dodo,Johnsmith@Ex.Com


In [96]:
df_ppl.loc[:, 'email'] = [x.lower() for x in df_ppl.email]

In [97]:
df_ppl

Unnamed: 0,fname,lname,email
0,Suminder,Singh,xxx@ex.com
1,Deepinder,Kaur,dkaur@ex.com
2,John,Dodo,johnsmith@ex.com


In [98]:
df_ppl.loc[:, 'email'] = df_ppl.loc[:, 'email'].str.upper()

In [99]:
df_ppl

Unnamed: 0,fname,lname,email
0,Suminder,Singh,XXX@EX.COM
1,Deepinder,Kaur,DKAUR@EX.COM
2,John,Dodo,JOHNSMITH@EX.COM


In [100]:
# map method - only works on a series