In [1]:
import numpy as np;
import pandas as pd;

### DML

In [2]:
cities = {
    'population':[1.9,0.31,1.84],
    'area':[1484,331,603],
    'state':['Delhi','Maharashtra','Maharashtra']
}


In [3]:
cities_frame = pd.DataFrame(
    cities,
    index=['Delhi','Pune','Mumbai'],
    columns=['area','population','state','type','tier','old_name']
)
cities_frame

Unnamed: 0,area,population,state,type,tier,old_name
Delhi,1484,1.9,Delhi,,,
Pune,331,0.31,Maharashtra,,,
Mumbai,603,1.84,Maharashtra,,,


#### Inserts

In [4]:
# insert into cities values (206.1,1.49,'West Bengal',None,None,'Calcutta')

cities_frame.loc['Chennai'] = (726,0.7,'Tamilnadu',None,None,'Madras')
cities_frame

Unnamed: 0,area,population,state,type,tier,old_name
Delhi,1484,1.9,Delhi,,,
Pune,331,0.31,Maharashtra,,,
Mumbai,603,1.84,Maharashtra,,,
Chennai,726,0.7,Tamilnadu,,,Madras


In [5]:
# insert into cities (area,population,state,type,tier,old_name) values (206.1,1.49,'West Bengal',None,None,'Calcutta')
cities_frame.append(
    pd.Series(
        [206.1,1.49,'West Bengal',None,None,'Calcutta'],
        name='Kolkata',
        index=cities_frame.columns
    )
)
# This creates a new data frame, original remaind intact

Unnamed: 0,area,population,state,type,tier,old_name
Delhi,1484.0,1.9,Delhi,,,
Pune,331.0,0.31,Maharashtra,,,
Mumbai,603.0,1.84,Maharashtra,,,
Chennai,726.0,0.7,Tamilnadu,,,Madras
Kolkata,206.1,1.49,West Bengal,,,Calcutta


#### Updates

In [6]:
# update cities set type = 'Unassigned'
# Update all values in column to a single value
cities_frame.type = 'Unassigned'
cities_frame

Unnamed: 0,area,population,state,type,tier,old_name
Delhi,1484,1.9,Delhi,Unassigned,,
Pune,331,0.31,Maharashtra,Unassigned,,
Mumbai,603,1.84,Maharashtra,Unassigned,,
Chennai,726,0.7,Tamilnadu,Unassigned,,Madras


In [7]:
# Update values with a list
cities_frame.type = ['UT','Normal','State Capital','State Capital']
cities_frame

Unnamed: 0,area,population,state,type,tier,old_name
Delhi,1484,1.9,Delhi,UT,,
Pune,331,0.31,Maharashtra,Normal,,
Mumbai,603,1.84,Maharashtra,State Capital,,
Chennai,726,0.7,Tamilnadu,State Capital,,Madras


In [8]:
# Update values with a series
cities_frame.old_name = cities_frame.index
cities_frame.type = cities_frame.area > 500
cities_frame

Unnamed: 0,area,population,state,type,tier,old_name
Delhi,1484,1.9,Delhi,True,,Delhi
Pune,331,0.31,Maharashtra,False,,Pune
Mumbai,603,1.84,Maharashtra,True,,Mumbai
Chennai,726,0.7,Tamilnadu,True,,Chennai


In [9]:
# Update values with a series
cities_frame.type = cities_frame.area.apply(lambda v : 'Big' if v > 500 else 'Small')
cities_frame

Unnamed: 0,area,population,state,type,tier,old_name
Delhi,1484,1.9,Delhi,Big,,Delhi
Pune,331,0.31,Maharashtra,Small,,Pune
Mumbai,603,1.84,Maharashtra,Big,,Mumbai
Chennai,726,0.7,Tamilnadu,Big,,Chennai


In [10]:
# Update values with a series (which has subset of keys)
cities_frame.type = pd.Series(['UT','Capital'],index=['Delhi','Mumbai'])
cities_frame

Unnamed: 0,area,population,state,type,tier,old_name
Delhi,1484,1.9,Delhi,UT,,Delhi
Pune,331,0.31,Maharashtra,,,Pune
Mumbai,603,1.84,Maharashtra,Capital,,Mumbai
Chennai,726,0.7,Tamilnadu,,,Chennai


In [11]:
# update cities set type = 'Big' where area > 600
cities_frame.loc[cities_frame.area>600,'type']='Big'
cities_frame

Unnamed: 0,area,population,state,type,tier,old_name
Delhi,1484,1.9,Delhi,Big,,Delhi
Pune,331,0.31,Maharashtra,,,Pune
Mumbai,603,1.84,Maharashtra,Big,,Mumbai
Chennai,726,0.7,Tamilnadu,Big,,Chennai


In [12]:
# update cities set type = 'Big',tier = 1 where area > 600
cities_frame.loc[cities_frame.area>600,['type','tier']]=('Big',1)
cities_frame

Unnamed: 0,area,population,state,type,tier,old_name
Delhi,1484,1.9,Delhi,Big,1.0,Delhi
Pune,331,0.31,Maharashtra,,,Pune
Mumbai,603,1.84,Maharashtra,Big,1.0,Mumbai
Chennai,726,0.7,Tamilnadu,Big,1.0,Chennai


In [30]:
# update cities set area = area+1 where area>700

cities_frame.loc[cities_frame.area>700,'area']=cities_frame.area+1
# cities_frame.loc[cities_frame.area>700,'area'] : This gives a sub-view on series and the assignment only changes the values in the sub-series

cities_frame

Unnamed: 0,area,population,state,type,tier,old_name
Delhi,1485.0,1.9,Delhi,Big,1.0,Delhi
Pune,331.0,0.31,Maharashtra,,,Pune
Mumbai,603.0,1.84,Maharashtra,Big,1.0,Mumbai
Chennai,728.0,0.7,Tamilnadu,Big,1.0,Chennai
Panjim,,,Goa,,,


In [14]:
# update cities set type = 'Big',tier = 1 where index in ('Delhi','Mumbai')
cities_frame.loc[['Delhi','Mumbai'],['type','tier']]=('Big',1)
cities_frame

Unnamed: 0,area,population,state,type,tier,old_name
Delhi,1485,1.9,Delhi,Big,1.0,Delhi
Pune,331,0.31,Maharashtra,,,Pune
Mumbai,603,1.84,Maharashtra,Big,1.0,Mumbai
Chennai,727,0.7,Tamilnadu,Big,1.0,Chennai


In [15]:
# Set all value in All rows having area>1000 to None

cities_frame.loc[cities_frame.area>1000] = None
cities_frame


Unnamed: 0,area,population,state,type,tier,old_name
Delhi,,,,,,
Pune,331.0,0.31,Maharashtra,,,Pune
Mumbai,603.0,1.84,Maharashtra,Big,1.0,Mumbai
Chennai,727.0,0.7,Tamilnadu,Big,1.0,Chennai


In [16]:
cities_frame.loc['Delhi'] = (1484,1.90,'Delhi','Big',1,'Delhi')
cities_frame

Unnamed: 0,area,population,state,type,tier,old_name
Delhi,1484.0,1.9,Delhi,Big,1.0,Delhi
Pune,331.0,0.31,Maharashtra,,,Pune
Mumbai,603.0,1.84,Maharashtra,Big,1.0,Mumbai
Chennai,727.0,0.7,Tamilnadu,Big,1.0,Chennai


#### Update using Replace

In [17]:
series = pd.Series([2,3,5,1,4,5])

In [18]:
series.replace(5,0)

0    2
1    3
2    0
3    1
4    4
5    0
dtype: int64

In [19]:
series.replace(5,0,inplace=True)
series

0    2
1    3
2    0
3    1
4    4
5    0
dtype: int64

In [20]:
# update n where n in ()
series.replace([1,2,3],-1)

0   -1
1   -1
2    0
3   -1
4    4
5    0
dtype: int64

In [21]:
series.replace([1,2,3],[-1,-2,-3])

0   -2
1   -3
2    0
3   -1
4    4
5    0
dtype: int64

In [22]:
# replace all values in all columns and all indices
cities_frame.replace('Delhi','Dilli')

Unnamed: 0,area,population,state,type,tier,old_name
Delhi,1484.0,1.9,Dilli,Big,1.0,Dilli
Pune,331.0,0.31,Maharashtra,,,Pune
Mumbai,603.0,1.84,Maharashtra,Big,1.0,Mumbai
Chennai,727.0,0.7,Tamilnadu,Big,1.0,Chennai


In [23]:
# replace multiple values with multiple values
cities_frame.replace(['Delhi','Mumbai'],['Dilli','Bombay'])

Unnamed: 0,area,population,state,type,tier,old_name
Delhi,1484.0,1.9,Dilli,Big,1.0,Dilli
Pune,331.0,0.31,Maharashtra,,,Pune
Mumbai,603.0,1.84,Maharashtra,Big,1.0,Bombay
Chennai,727.0,0.7,Tamilnadu,Big,1.0,Chennai


In [24]:
# replace with regex
cities_frame.replace(r'(.+)i$',r'\1y',regex=True)

Unnamed: 0,area,population,state,type,tier,old_name
Delhi,1484.0,1.9,Delhy,Big,1.0,Delhy
Pune,331.0,0.31,Maharashtra,,,Pune
Mumbai,603.0,1.84,Maharashtra,Big,1.0,Mumbay
Chennai,727.0,0.7,Tamilnadu,Big,1.0,Chennay


In [25]:
# replace with dictionary
cities_frame.replace(to_replace={'Delhi':'Dilli','Mumbai':'Bombay'})

Unnamed: 0,area,population,state,type,tier,old_name
Delhi,1484.0,1.9,Dilli,Big,1.0,Dilli
Pune,331.0,0.31,Maharashtra,,,Pune
Mumbai,603.0,1.84,Maharashtra,Big,1.0,Bombay
Chennai,727.0,0.7,Tamilnadu,Big,1.0,Chennai


In [26]:
# replace with dictionary, mentioning the specific columns
cities_frame.replace(to_replace={'state':{'Delhi':'Dilli'},'old_name':{'Mumbai':'Bombay'}})

Unnamed: 0,area,population,state,type,tier,old_name
Delhi,1484.0,1.9,Dilli,Big,1.0,Delhi
Pune,331.0,0.31,Maharashtra,,,Pune
Mumbai,603.0,1.84,Maharashtra,Big,1.0,Bombay
Chennai,727.0,0.7,Tamilnadu,Big,1.0,Chennai


In [27]:
# replace inplace : mutate original values
mutable_cities_frame = cities_frame.copy(deep=True)
mutable_cities_frame.replace(to_replace={'state':{'Delhi':'Dilli'},'old_name':{'Mumbai':'Bombay'}},inplace=True)
mutable_cities_frame

Unnamed: 0,area,population,state,type,tier,old_name
Delhi,1484.0,1.9,Dilli,Big,1.0,Delhi
Pune,331.0,0.31,Maharashtra,,,Pune
Mumbai,603.0,1.84,Maharashtra,Big,1.0,Bombay
Chennai,727.0,0.7,Tamilnadu,Big,1.0,Chennai


#### Deletes

In [28]:
cities_frame.loc['Panjim']=(None,None,'Goa',None,None,None)
cities_frame

Unnamed: 0,area,population,state,type,tier,old_name
Delhi,1484.0,1.9,Delhi,Big,1.0,Delhi
Pune,331.0,0.31,Maharashtra,,,Pune
Mumbai,603.0,1.84,Maharashtra,Big,1.0,Mumbai
Chennai,727.0,0.7,Tamilnadu,Big,1.0,Chennai
Panjim,,,Goa,,,


In [29]:
cities_frame.drop('Panjim')
# Creates a new data frame

Unnamed: 0,area,population,state,type,tier,old_name
Delhi,1484.0,1.9,Delhi,Big,1.0,Delhi
Pune,331.0,0.31,Maharashtra,,,Pune
Mumbai,603.0,1.84,Maharashtra,Big,1.0,Mumbai
Chennai,727.0,0.7,Tamilnadu,Big,1.0,Chennai
