In [1]:
import numpy as np
import pandas as pd

In [2]:
cities = {
    'population':[1.9,0.31,1.84],
    'area':[1484,331,603],
    'state':['Delhi','Maharashtra','Maharashtra']
}


In [3]:
cities_frame = pd.DataFrame(cities,index=['Delhi','Pune','Mumbai'])
cities_frame

Unnamed: 0,area,population,state
Delhi,1484,1.9,Delhi
Pune,331,0.31,Maharashtra
Mumbai,603,1.84,Maharashtra


### Operations on dataFrames

In [4]:
# Transform index and columns
cities_frame.T

Unnamed: 0,Delhi,Pune,Mumbai
area,1484,331,603
population,1.9,0.31,1.84
state,Delhi,Maharashtra,Maharashtra


In [5]:
# all statistics for numeric columns
cities_frame.describe()

Unnamed: 0,area,population
count,3.0,3.0
mean,806.0,1.35
std,602.709715,0.901166
min,331.0,0.31
25%,467.0,1.075
50%,603.0,1.84
75%,1043.5,1.87
max,1484.0,1.9


In [6]:
# all statistics for string columns
cities_frame.describe(include='object')

Unnamed: 0,state
count,3
unique,2
top,Maharashtra
freq,2


In [7]:
# all statistics for all columns
cities_frame.describe(include='all')

Unnamed: 0,area,population,state
count,3.0,3.0,3
unique,,,2
top,,,Maharashtra
freq,,,2
mean,806.0,1.35,
std,602.709715,0.901166,
min,331.0,0.31,
25%,467.0,1.075,
50%,603.0,1.84,
75%,1043.5,1.87,


In [8]:
# kind of apply on complete dataframe, better while doing chaining
cities_frame.pipe(pd.DataFrame.describe) \
            .pipe(pd.DataFrame.count)

area          8
population    8
dtype: int64

### Operations on rows

In [9]:
cities_frame

Unnamed: 0,area,population,state
Delhi,1484,1.9,Delhi
Pune,331,0.31,Maharashtra
Mumbai,603,1.84,Maharashtra


In [10]:
# apply on each column series
cities_frame.apply(print)

Delhi     1484
Pune       331
Mumbai     603
Name: area, dtype: object
Delhi      1.9
Pune      0.31
Mumbai    1.84
Name: population, dtype: object
Delhi           Delhi
Pune      Maharashtra
Mumbai    Maharashtra
Name: state, dtype: object


area          None
population    None
state         None
dtype: object

In [11]:
# apply on each series, return a series as a result, finally create a dataFrame which has columns as result series
cities_frame.apply(lambda s : s[['Delhi','Pune']])

Unnamed: 0,area,population,state
Delhi,1484,1.9,Delhi
Pune,331,0.31,Maharashtra


In [12]:
# default : axis = 0 : sum of all index series
print(cities_frame.sum())
print('\nSame as above\n')
print(cities_frame.loc['Delhi']+cities_frame.loc['Pune']+cities_frame.loc['Mumbai'])
print('\nSame as above\n')
print(cities_frame.apply(lambda col : col.sum()))

area                                 2418
population                           4.05
state         DelhiMaharashtraMaharashtra
dtype: object

Same as above

area                                 2418
population                           4.05
state         DelhiMaharashtraMaharashtra
dtype: object

Same as above

area                                 2418
population                           4.05
state         DelhiMaharashtraMaharashtra
dtype: object


In [13]:
# Count non null values
cities_frame.count()

area          3
population    3
state         3
dtype: int64

In [14]:
cities_frame.mean()

area          806.00
population      1.35
dtype: float64

### Operations on columns

In [15]:
cities_frame

Unnamed: 0,area,population,state
Delhi,1484,1.9,Delhi
Pune,331,0.31,Maharashtra
Mumbai,603,1.84,Maharashtra


In [16]:
# apply on each index series
cities_frame.apply(print,axis=1)

area           1484
population      1.9
state         Delhi
Name: Delhi, dtype: object
area                  331
population           0.31
state         Maharashtra
Name: Pune, dtype: object
area                  603
population           1.84
state         Maharashtra
Name: Mumbai, dtype: object


Delhi     None
Pune      None
Mumbai    None
dtype: object

In [17]:
# axis = 1 : sum of all column series
print(cities_frame.sum(axis=1))
print('\nSame as above\n')
print(cities_frame['area']+cities_frame['population'])


Delhi     1485.90
Pune       331.31
Mumbai     604.84
dtype: float64

Same as above

Delhi     1485.90
Pune       331.31
Mumbai     604.84
dtype: float64


### Operations on all elements

In [18]:
cities_frame.applymap(str) \
            .applymap(lambda s : f'< {s} >')

Unnamed: 0,area,population,state
Delhi,< 1484 >,< 1.9 >,< Delhi >
Pune,< 331 >,< 0.31 >,< Maharashtra >
Mumbai,< 603 >,< 1.84 >,< Maharashtra >
