In [1]:
#basics apply-map-vectorised function
import pandas as pd
import numpy as np

data = np.round(np.random.normal(size=(4, 3)), 2)
df = pd.DataFrame(data, columns=["A", "B", "C"])
df.head()

Unnamed: 0,A,B,C
0,0.63,-0.37,-0.43
1,-0.37,-0.15,0.37
2,-0.53,-0.68,-0.53
3,-1.03,-1.66,0.5


In [2]:
#apply : entire dataframe, vectorised fashion
df.apply(lambda x: 1 + np.abs(x))

Unnamed: 0,A,B,C
0,1.63,1.37,1.43
1,1.37,1.15,1.37
2,1.53,1.68,1.53
3,2.03,2.66,1.5


In [3]:
df.A.apply(np.abs)

0    0.63
1    0.37
2    0.53
3    1.03
Name: A, dtype: float64

In [4]:
df

Unnamed: 0,A,B,C
0,0.63,-0.37,-0.43
1,-0.37,-0.15,0.37
2,-0.53,-0.68,-0.53
3,-1.03,-1.66,0.5


In [5]:
def double_if_positive(x):
    x[x > 0] *= 2
    return x

df.apply(double_if_positive)

Unnamed: 0,A,B,C
0,1.26,-0.37,-0.43
1,-0.37,-0.15,0.74
2,-0.53,-0.68,-0.53
3,-1.03,-1.66,1.0


In [6]:
#map operates on series and uses dictionaries based inputs
series = pd.Series(["Neha", "Rekha", "Jaya", "Susma"])

In [7]:
series.map({"Neha" : "Neha Kakkar"})

0    Neha Kakkar
1            NaN
2            NaN
3            NaN
dtype: object

In [8]:
series.map(lambda d: f"I am {d}")

0     I am Neha
1    I am Rekha
2     I am Jaya
3    I am Susma
dtype: object

In [9]:
#vectorised function
display(df, df.abs())

Unnamed: 0,A,B,C
0,1.26,-0.37,-0.43
1,-0.37,-0.15,0.74
2,-0.53,-0.68,-0.53
3,-1.03,-1.66,1.0


Unnamed: 0,A,B,C
0,1.26,0.37,0.43
1,0.37,0.15,0.74
2,0.53,0.68,0.53
3,1.03,1.66,1.0


In [10]:
series = pd.Series(["Neha Oberoi", "Rekha Rajput", "Jaya Sharma", "Susma Khan", "Nirma Yadav"])
series

0     Neha Oberoi
1    Rekha Rajput
2     Jaya Sharma
3      Susma Khan
4     Nirma Yadav
dtype: object

In [11]:
series[0].split()

['Neha', 'Oberoi']

In [13]:
series.str.split(expand=True)

Unnamed: 0,0,1
0,Neha,Oberoi
1,Rekha,Rajput
2,Jaya,Sharma
3,Susma,Khan
4,Nirma,Yadav


In [14]:
series.str.contains("Neha Oberoi")

0     True
1    False
2    False
3    False
4    False
dtype: bool

In [16]:
series.str.upper().str.split()

0     [NEHA, OBEROI]
1    [REKHA, RAJPUT]
2     [JAYA, SHARMA]
3      [SUSMA, KHAN]
4     [NIRMA, YADAV]
dtype: object

In [17]:
#user-defined functions
#Vectorising everything you can is the key to speeding up your code.
#Once you've done that, you should use other tools to investigate. 
#PyCharm Professional has a great optimisation tool built in. 
#Jupyter has %lprun (line profiler) command you can find here: https://github.com/rkern/line_profiler

In [18]:
data2 = np.random.normal(10, 2, size=[100000, 2])
df2 = pd.DataFrame(data2, columns=["x", "y"])
df2.head(1)

Unnamed: 0,x,y
0,8.841744,9.050302


In [19]:
hypot = (df2.x**2 + df2.y**2)**0.5
print(hypot[0])

12.652446183266013


In [21]:
for i in range(0, 100000):
    df["hypot"] = hypot[i]

In [25]:
#df.head(2)
#df.tail(2)
#df.sample(2)

Unnamed: 0,A,B,C,hypot
1,-0.37,-0.15,0.74,15.937994
0,1.26,-0.37,-0.43,15.937994
