In [2]:
import pandas as pd

In [3]:
people = {'first': ['Ali','Veli','Cem'], 
         'last': ['İlk','Yedi','Sekiz'],
         'email': ['ilkali@pg.com','yediveli@pg.com','sekizcem@pg.com']}

In [4]:
df = pd.DataFrame(people)
df

Unnamed: 0,first,last,email
0,Ali,İlk,ilkali@pg.com
1,Veli,Yedi,yediveli@pg.com
2,Cem,Sekiz,sekizcem@pg.com


In [5]:
# selecting a column

df['email']

0      ilkali@pg.com
1    yediveli@pg.com
2    sekizcem@pg.com
Name: email, dtype: object

In [6]:
type(df['email'])

pandas.core.series.Series

In [7]:
# selecting a column

df.email

0      ilkali@pg.com
1    yediveli@pg.com
2    sekizcem@pg.com
Name: email, dtype: object

In [8]:
# selecting multiple columns

df[['last', 'email']]

Unnamed: 0,last,email
0,İlk,ilkali@pg.com
1,Yedi,yediveli@pg.com
2,Sekiz,sekizcem@pg.com


In [9]:
type(df[['last', 'email']])

pandas.core.frame.DataFrame

In [10]:
df.columns

Index(['first', 'last', 'email'], dtype='object')

In [11]:
# iloc : integer location, gives us the first row

df.iloc[0]

first              Ali
last               İlk
email    ilkali@pg.com
Name: 0, dtype: object

In [12]:
type(df.iloc[0])

pandas.core.series.Series

In [13]:
# selecting multiple rows

df.iloc[[0, 2]]

Unnamed: 0,first,last,email
0,Ali,İlk,ilkali@pg.com
2,Cem,Sekiz,sekizcem@pg.com


In [14]:
type(df.iloc[[0, 2]])

pandas.core.frame.DataFrame

In [15]:
# selecting multiple rows and columns

df.iloc[[0,1], 2] # can't use column name because iloc is for integer location. we use 2(index number) for email 

0      ilkali@pg.com
1    yediveli@pg.com
Name: email, dtype: object

In [16]:
# loc: search for label location

df.loc[[0,1], ['email', 'last']] # different than iloc we can use labels for the columns

Unnamed: 0,email,last
0,ilkali@pg.com,İlk
1,yediveli@pg.com,Yedi


In [17]:
df.loc[0:2, 'first':'email'] # while slicing we don't use square brackets, the second values are inclusive

Unnamed: 0,first,last,email
0,Ali,İlk,ilkali@pg.com
1,Veli,Yedi,yediveli@pg.com
2,Cem,Sekiz,sekizcem@pg.com


In [20]:
df.loc[1:, 'first'] 

1    Veli
2     Cem
Name: first, dtype: object

In [17]:
# set one the columns as index, by default inplace =False

df.set_index('email', inplace = True)
df

Unnamed: 0_level_0,first,last
email,Unnamed: 1_level_1,Unnamed: 2_level_1
ilkali@pg.com,Ali,İlk
yediveli@pg.com,Veli,Yedi
sekizcem@pg.com,Cem,Sekiz


In [18]:
df.index

Index(['ilkali@pg.com', 'yediveli@pg.com', 'sekizcem@pg.com'], dtype='object', name='email')

In [19]:
df.loc['ilkali@pg.com']

first    Ali
last     İlk
Name: ilkali@pg.com, dtype: object

In [20]:
# reset the index, by default inplace =False

df.reset_index(inplace=True)
df

Unnamed: 0,email,first,last
0,ilkali@pg.com,Ali,İlk
1,yediveli@pg.com,Veli,Yedi
2,sekizcem@pg.com,Cem,Sekiz


### filtering

In [21]:
filt = df['last'] == 'İlk'

In [22]:
df[filt]

Unnamed: 0,email,first,last
0,ilkali@pg.com,Ali,İlk


In [23]:
# Alternatively 

df[df['last'] == 'İlk']

Unnamed: 0,email,first,last
0,ilkali@pg.com,Ali,İlk


In [24]:
# Alternatively .loc function can be used. It allows to pass in column names.

df.loc[filt, ['first', 'email']]



Unnamed: 0,first,email
0,Ali,ilkali@pg.com


### and / or operator

In [25]:
# And operator

filt = (df['last'] == 'İlk') & (df['first'] =='Ali')
df.loc[filt, 'email']

0    ilkali@pg.com
Name: email, dtype: object

In [26]:
# Or operator

filt = (df['last'] == 'İlk') | (df['first'] =='Cem')
df.loc[filt, 'email']

0      ilkali@pg.com
2    sekizcem@pg.com
Name: email, dtype: object

In [27]:
# negate operator

filt = (df['last'] == 'İlk') & (df['first'] =='Ali')
df.loc[~filt, 'email']

1    yediveli@pg.com
2    sekizcem@pg.com
Name: email, dtype: object

### updating columns

In [28]:
# updating all the columns, some may remain the same

df.columns = ['email', 'first_name','last_name']

In [29]:
df.columns

Index(['email', 'first_name', 'last_name'], dtype='object')

In [30]:
# using list comprehension

df.columns = [x.upper() for x in df.columns]
df

Unnamed: 0,EMAIL,FIRST_NAME,LAST_NAME
0,ilkali@pg.com,Ali,İlk
1,yediveli@pg.com,Veli,Yedi
2,sekizcem@pg.com,Cem,Sekiz


In [31]:
# uisng str method

df.columns = df.columns.str.replace('_', ' ')
df

Unnamed: 0,EMAIL,FIRST NAME,LAST NAME
0,ilkali@pg.com,Ali,İlk
1,yediveli@pg.com,Veli,Yedi
2,sekizcem@pg.com,Cem,Sekiz


In [32]:
# undo the changes

df.columns = df.columns.str.replace(' ','_')
df.columns = [x.lower() for x in df.columns]
df

Unnamed: 0,email,first_name,last_name
0,ilkali@pg.com,Ali,İlk
1,yediveli@pg.com,Veli,Yedi
2,sekizcem@pg.com,Cem,Sekiz


In [33]:
# rename particular columns, by default inplace =False

df.rename(columns={'first_name':'first', 'last_name':'last'}, inplace = True)
df

Unnamed: 0,email,first,last
0,ilkali@pg.com,Ali,İlk
1,yediveli@pg.com,Veli,Yedi
2,sekizcem@pg.com,Cem,Sekiz


### updating rows

In [34]:
# updating the whole row

df.loc[2] = ['dokuzirem@pg.com', 'Irem', 'Dokuz']
df

Unnamed: 0,email,first,last
0,ilkali@pg.com,Ali,İlk
1,yediveli@pg.com,Veli,Yedi
2,dokuzirem@pg.com,Irem,Dokuz


In [35]:
# updating spesific values

df.loc[2, ['email', 'first']] = ['dokuzcem@pg.com','Cem']
df

Unnamed: 0,email,first,last
0,ilkali@pg.com,Ali,İlk
1,yediveli@pg.com,Veli,Yedi
2,dokuzcem@pg.com,Cem,Dokuz


In [36]:
# updating a single value, just an alternative, it can also be done with df.loc

df.at[2, 'last'] = 'Sekiz'
df

Unnamed: 0,email,first,last
0,ilkali@pg.com,Ali,İlk
1,yediveli@pg.com,Veli,Yedi
2,dokuzcem@pg.com,Cem,Sekiz


In [37]:
# using filters

# below is the common mistake

filt = df['email'] == 'dokuzcem@pg.com'
df[filt]['email'] = 'sekizcem@pg.com'


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [38]:
# here is the right version

df.loc[filt, 'email'] = 'sekizcem@pg.com'
df

Unnamed: 0,email,first,last
0,ilkali@pg.com,Ali,İlk
1,yediveli@pg.com,Veli,Yedi
2,sekizcem@pg.com,Cem,Sekiz


In [39]:
# updating multiple rows

df['email'] = df['email'].str.upper()
df

Unnamed: 0,email,first,last
0,ILKALI@PG.COM,Ali,İlk
1,YEDIVELI@PG.COM,Veli,Yedi
2,SEKIZCEM@PG.COM,Cem,Sekiz


### apply, map, applymap and replace methods

In [40]:
# apply function on series
# we can use built in functions

df['email'].apply(len)

0    13
1    15
2    15
Name: email, dtype: int64

In [41]:
# we can use our functions as welll

def update_email(email):
    return email.lower()

df['email'] = df['email'].apply(update_email) # we should assign it to a column for changes to take place
df

Unnamed: 0,email,first,last
0,ilkali@pg.com,Ali,İlk
1,yediveli@pg.com,Veli,Yedi
2,sekizcem@pg.com,Cem,Sekiz


In [42]:
# we can use lambda functions as well

df['email'] = df['email'].apply(lambda x: x.upper()) 
df

Unnamed: 0,email,first,last
0,ILKALI@PG.COM,Ali,İlk
1,YEDIVELI@PG.COM,Veli,Yedi
2,SEKIZCEM@PG.COM,Cem,Sekiz


In [43]:
# apply function on dataframe


# it's not applying the function to every value in the dataframe
# it's applying the function to each series in the dataframe
# by default axis = 'rows'
df.apply(len)

email    3
first    3
last     3
dtype: int64

In [44]:
# we can get the same result for a spesific column

len(df['email'])

3

In [45]:
# we can aplly it to the rows

df.apply(len, axis ='columns')

0    3
1    3
2    3
dtype: int64

In [46]:
df.apply(min)

email    ILKALI@PG.COM
first              Ali
last             Sekiz
dtype: object

In [47]:
# lambda version

df.apply(lambda x: x.min()) 

email    ILKALI@PG.COM
first              Ali
last             Sekiz
dtype: object

In [48]:
# applymap method applies a function to every value in the dataframe,
# it doesn't work on series

df.applymap(len)

Unnamed: 0,email,first,last
0,13,3,3
1,15,4,4
2,15,3,5


In [49]:
df = df.applymap(str.upper)
df

Unnamed: 0,email,first,last
0,ILKALI@PG.COM,ALI,İLK
1,YEDIVELI@PG.COM,VELI,YEDI
2,SEKIZCEM@PG.COM,CEM,SEKIZ


In [50]:
# map method only works on series
# used for subsituting each value in a series wtih another value

df['first'].map({'ALI': 'Jim', 'VELI': 'Jane'})

0     Jim
1    Jane
2     NaN
Name: first, dtype: object

In [51]:
# when we use map method, values that didn't subsitutted returns NaN 
# if we don't want this, we can use replace method

df['first'].replace({'ALI': 'Jim', 'VELI': 'Jane'})

0     Jim
1    Jane
2     CEM
Name: first, dtype: object

### Add /Remove rows and columns

In [52]:
# Columns

df['full_name'] = df['first'] + ' ' + df['last']
df

Unnamed: 0,email,first,last,full_name
0,ILKALI@PG.COM,ALI,İLK,ALI İLK
1,YEDIVELI@PG.COM,VELI,YEDI,VELI YEDI
2,SEKIZCEM@PG.COM,CEM,SEKIZ,CEM SEKIZ


In [53]:
df.drop(columns=['first', 'last'], inplace=True)
df

Unnamed: 0,email,full_name
0,ILKALI@PG.COM,ALI İLK
1,YEDIVELI@PG.COM,VELI YEDI
2,SEKIZCEM@PG.COM,CEM SEKIZ


In [54]:
# recreate the dropped columns

df['full_name'].str.split(' ')

# we get the 'first' and 'last in a list'.
# to assign these two to two different columns we expand the split method

0      [ALI, İLK]
1    [VELI, YEDI]
2    [CEM, SEKIZ]
Name: full_name, dtype: object

In [55]:
df['full_name'].str.split(' ', expand=True)

Unnamed: 0,0,1
0,ALI,İLK
1,VELI,YEDI
2,CEM,SEKIZ


In [56]:
df[['first', 'last']] = df['full_name'].str.split(' ', expand=True)
df

Unnamed: 0,email,full_name,first,last
0,ILKALI@PG.COM,ALI İLK,ALI,İLK
1,YEDIVELI@PG.COM,VELI YEDI,VELI,YEDI
2,SEKIZCEM@PG.COM,CEM SEKIZ,CEM,SEKIZ


In [57]:
# Rows

df.append({'first':'Berk'}, ignore_index=True)


Unnamed: 0,email,full_name,first,last
0,ILKALI@PG.COM,ALI İLK,ALI,İLK
1,YEDIVELI@PG.COM,VELI YEDI,VELI,YEDI
2,SEKIZCEM@PG.COM,CEM SEKIZ,CEM,SEKIZ
3,,,Berk,


In [58]:
people = {'first': ['Can','David'], 
         'last': ['Doe','Joe'],
         'email': ['doecan@pg.com','joedavid@pg.com']}

df2 = pd.DataFrame(people)
df2

Unnamed: 0,first,last,email
0,Can,Doe,doecan@pg.com
1,David,Joe,joedavid@pg.com


In [59]:
df.append(df2)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,


Unnamed: 0,email,first,full_name,last
0,ILKALI@PG.COM,ALI,ALI İLK,İLK
1,YEDIVELI@PG.COM,VELI,VELI YEDI,YEDI
2,SEKIZCEM@PG.COM,CEM,CEM SEKIZ,SEKIZ
0,doecan@pg.com,Can,,Doe
1,joedavid@pg.com,David,,Joe


In [60]:
# ignore_index=True to fix the index
# sort=False, not to sort the columns and get rid of the future warning
# assign it to df for changes to take place in.

df = df.append(df2, ignore_index=True, sort =False)

In [61]:
df

Unnamed: 0,email,full_name,first,last
0,ILKALI@PG.COM,ALI İLK,ALI,İLK
1,YEDIVELI@PG.COM,VELI YEDI,VELI,YEDI
2,SEKIZCEM@PG.COM,CEM SEKIZ,CEM,SEKIZ
3,doecan@pg.com,,Can,Doe
4,joedavid@pg.com,,David,Joe


In [62]:
# to remove a row

df.drop(index=4)

Unnamed: 0,email,full_name,first,last
0,ILKALI@PG.COM,ALI İLK,ALI,İLK
1,YEDIVELI@PG.COM,VELI YEDI,VELI,YEDI
2,SEKIZCEM@PG.COM,CEM SEKIZ,CEM,SEKIZ
3,doecan@pg.com,,Can,Doe


In [63]:
# use filtering for droping a row

filt = df['last'] =='YEDI'
df.drop(index=df[filt].index)

Unnamed: 0,email,full_name,first,last
0,ILKALI@PG.COM,ALI İLK,ALI,İLK
2,SEKIZCEM@PG.COM,CEM SEKIZ,CEM,SEKIZ
3,doecan@pg.com,,Can,Doe
4,joedavid@pg.com,,David,Joe


Source:
* [Corey Schafer - Python Pandas Tutorial](https://www.youtube.com/watch?v=ZyhVh-qRZPA&list=PL-osiE80TeTsWmV9i9c58mdDCSskIFdDS&index=1)