In [1]:
import numpy as np;
import pandas as pd;

### Index and Column arrangement

#### Reorder the columns/index while frame creation

In [2]:
temperatures = {
    "Bengaluru" : {'rain':20,'summer':35,'winter':10}, # Column1
    "Delhi" : {'rain':32,'summer':40,'winter':10}, # Column2
    "Mumbai" : {'rain':32,'summer':40,'winter':30}, # Column3
}

# Columns are already named in the data, we just re-order them, if given differnt name it becomes a NaN column
temperatures_frame = pd.DataFrame(temperatures
             ,index=['winter','rain','summer']
             ,columns=['Mumbai','Delhi','Bengaluru']
)

temperatures_frame

Unnamed: 0,Mumbai,Delhi,Bengaluru
winter,30,10,10
rain,32,32,20
summer,40,40,35


 #### Name the columns/index while frame creation

In [3]:
temperatures = [
    [27,33,35,42,41,43,34,35,37,41,26,23],
    [9,10,35,42,41,43,34,35,37,38,12,8],
    [9,10,30,34,36,35,30,28,22,27,12,8]
]


# Note : While creating this data-frame we are just naming the index and columns (in case of list-of-dict,
# dict-of-list,dict-of-dict 
# columns/index could be already named, in those cases we just re-order while creation)
temperatures_frame = pd.DataFrame(temperatures
             ,columns=['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
             ,index=['Mumbai','Delhi','Bengaluru']
            
)

temperatures_frame




Unnamed: 0,Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec
Mumbai,27,33,35,42,41,43,34,35,37,41,26,23
Delhi,9,10,35,42,41,43,34,35,37,38,12,8
Bengaluru,9,10,30,34,36,35,30,28,22,27,12,8


#### Reindexing : only re-order/re-select the column/index

In [4]:
# Re-indexing : Change/Drop the index/columns and/or change their order

# This gives out a new dataFrame
temperatures_frame.reindex(columns=['Dec','May','Jul','Oct','NewMonth'],index=['Bengaluru','Mumbai','NewCity'])

# Note as the original data didn't have the data for some columns/index, all data in those columns/index is NaN


Unnamed: 0,Dec,May,Jul,Oct,NewMonth
Bengaluru,8.0,36.0,30.0,27.0,
Mumbai,23.0,41.0,34.0,41.0,
NewCity,,,,,


In [5]:
# This is same as above, as reindex anyways give out a new dataFrame

pd.DataFrame(temperatures_frame,columns=['Dec','May','Jul','Oct'],index=['Bengaluru','Mumbai'])

Unnamed: 0,Dec,May,Jul,Oct
Bengaluru,8,36,30,27
Mumbai,23,41,34,41


In [6]:
# Re-index and columns as another dataFrame

random_temperatures = pd.DataFrame(
    5+45*np.random.random(size=(1,12)),
    columns=['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'],
    index=['Delhi']
);


In [7]:
# As random_temperatures has lesser number of columns and index, after re-indexing only few column/index have values
random_temperatures.reindex_like(temperatures_frame)


Unnamed: 0,Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec
Mumbai,,,,,,,,,,,,
Delhi,14.031646,41.317133,19.317562,17.507894,7.71734,44.969181,21.272061,44.047875,28.474567,30.476996,8.881613,17.14685
Bengaluru,,,,,,,,,,,,


In [8]:
# ffill : fill like next row
random_temperatures.reindex_like(temperatures_frame,method='ffill')

Unnamed: 0,Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec
Mumbai,14.031646,41.317133,19.317562,17.507894,7.71734,44.969181,21.272061,44.047875,28.474567,30.476996,8.881613,17.14685
Delhi,14.031646,41.317133,19.317562,17.507894,7.71734,44.969181,21.272061,44.047875,28.474567,30.476996,8.881613,17.14685
Bengaluru,,,,,,,,,,,,


In [9]:
# bfill : fill like previous row
random_temperatures.reindex_like(temperatures_frame,method='bfill')

Unnamed: 0,Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec
Mumbai,,,,,,,,,,,,
Delhi,14.031646,41.317133,19.317562,17.507894,7.71734,44.969181,21.272061,44.047875,28.474567,30.476996,8.881613,17.14685
Bengaluru,14.031646,41.317133,19.317562,17.507894,7.71734,44.969181,21.272061,44.047875,28.474567,30.476996,8.881613,17.14685


#### Renaming column/index : provide maps (old=>new) for renaming

In [10]:
temperatures_frame.rename(index={'Mumbai':'Mum'},columns={'Jan':'January','Oct':'October'},inplace=True)
temperatures_frame

Unnamed: 0,January,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,October,Nov,Dec
Mum,27,33,35,42,41,43,34,35,37,41,26,23
Delhi,9,10,35,42,41,43,34,35,37,38,12,8
Bengaluru,9,10,30,34,36,35,30,28,22,27,12,8


#### Existing column as index 

In [11]:

cities = {
    'name':['Delhi','Pune','Mumbai'],
    'population':[1.9,0.31,1.84],
    'area':[1484,331,603],
    'state':['Central','Maharashtra','Maharashtra']
}


In [12]:
pd.DataFrame(cities)

Unnamed: 0,area,name,population,state
0,1484,Delhi,1.9,Central
1,331,Pune,0.31,Maharashtra
2,603,Mumbai,1.84,Maharashtra


In [13]:
# A nice idea to use an existing column as index

pd.DataFrame(cities,
             index=cities['name'], # This gives all the city names
             columns=cities.keys()-['name'] # Now exclude the 'name' column as it is already the index column
            )

Unnamed: 0,state,population,area
Delhi,Central,1.9,1484
Pune,Maharashtra,0.31,331
Mumbai,Maharashtra,1.84,603


In [14]:
# Another inplace and simpler way to set existing column as index

cities_frame = pd.DataFrame(cities)
cities_frame.set_index('name',inplace=True)
cities_frame

Unnamed: 0_level_0,area,population,state
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Delhi,1484,1.9,Central
Pune,331,0.31,Maharashtra
Mumbai,603,1.84,Maharashtra


#### Insert new columns 

In [15]:
# alter table add column country  (at the end and with same value)
cities_with_country = cities_frame.copy(deep=True)
cities_with_country['country'] = 'India'
cities_with_country

Unnamed: 0_level_0,area,population,state,country
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Delhi,1484,1.9,Central,India
Pune,331,0.31,Maharashtra,India
Mumbai,603,1.84,Maharashtra,India


In [16]:
# alter table add column weather  (at the end)
cities_with_weather = cities_frame.copy(deep=True)
cities_with_weather['weather'] = ['Hot','Pleasant','Humid']
cities_with_weather

Unnamed: 0_level_0,area,population,state,weather
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Delhi,1484,1.9,Central,Hot
Pune,331,0.31,Maharashtra,Pleasant
Mumbai,603,1.84,Maharashtra,Humid


In [17]:
# alter table add column weather  (at the end)
cities_with_weather = cities_frame.copy(deep=True)
cities_with_weather['weather'] = pd.Series(['Hot','Humid','Pleasant'],index=['Delhi','Mumbai','Pune'])
cities_with_weather

Unnamed: 0_level_0,area,population,state,weather
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Delhi,1484,1.9,Central,Hot
Pune,331,0.31,Maharashtra,Pleasant
Mumbai,603,1.84,Maharashtra,Humid


In [18]:
# alter table add column region  (at specific position)
cities_with_regions = cities_frame.copy(deep=True)
cities_with_regions.insert(1,'region',None)
cities_with_regions

Unnamed: 0_level_0,area,region,population,state
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Delhi,1484,,1.9,Central
Pune,331,,0.31,Maharashtra
Mumbai,603,,1.84,Maharashtra


In [19]:
# alter table add column it default True
cities_with_it = cities_frame.copy(deep=True)
cities_with_it.insert(2,'it',True)
cities_with_it

Unnamed: 0_level_0,area,population,it,state
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Delhi,1484,1.9,True,Central
Pune,331,0.31,True,Maharashtra
Mumbai,603,1.84,True,Maharashtra


In [20]:
# new column value derived from an existing column
# alter table add column region default ....
cities_with_readable_population = cities_frame.copy(deep=True)
cities_with_readable_population.insert(
    loc=3,
    column='readable population',
    value = cities_with_readable_population.population.apply(lambda p : str(p)+' lacs')
)
cities_with_readable_population

Unnamed: 0_level_0,area,population,state,readable population
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Delhi,1484,1.9,Central,1.9 lacs
Pune,331,0.31,Maharashtra,0.31 lacs
Mumbai,603,1.84,Maharashtra,1.84 lacs


#### Drop column

In [21]:
# alter table cities drop column population (with dictionary syntax 'del')

cities_without_population = cities_frame.copy(deep=True)
del cities_without_population['population']
cities_without_population

Unnamed: 0_level_0,area,state
name,Unnamed: 1_level_1,Unnamed: 2_level_1
Delhi,1484,Central
Pune,331,Maharashtra
Mumbai,603,Maharashtra


In [22]:
# alter table cities drop column population (using 'pop')

cities_without_population = cities_frame.copy(deep=True)
cities_without_population.pop('population')
cities_without_population

Unnamed: 0_level_0,area,state
name,Unnamed: 1_level_1,Unnamed: 2_level_1
Delhi,1484,Central
Pune,331,Maharashtra
Mumbai,603,Maharashtra
