In [1]:
import pandas as pd
import sys

In [2]:
print('Python version ' + sys.version)
print('Pandas version ' + pd.__version__)

Python version 3.5.1 |Anaconda custom (64-bit)| (default, Feb 16 2016, 09:49:46) [MSC v.1900 64 bit (AMD64)]
Pandas version 0.20.1


# Lambda functions are just another way to define a function

In [3]:
# function
def square(x):
    return x*x

# lambda
result = lambda x: x*x

In [4]:
square(2), result(2)

(4, 4)

In [5]:
square(5), result(5)

(25, 25)

# Lets try examples with dataframes

In [6]:
df = pd.DataFrame({'Close':[10,11,12,13,14,15],
                   'Open':[321,12,654,8,987,13]})
df

Unnamed: 0,Close,Open
0,10,321
1,11,12
2,12,654
3,13,8
4,14,987
5,15,13


# Which rows are greater than 10?  

Since we are doing a comparison, we get back boolean values  

> TIP: for simple comparisons I would go with method 1 all of the time.

In [7]:
# method 1
df.Close > 10

0    False
1     True
2     True
3     True
4     True
5     True
Name: Close, dtype: bool

In [8]:
# method 2
df.Close.apply(lambda x: x > 10)

0    False
1     True
2     True
3     True
4     True
5     True
Name: Close, dtype: bool

In [9]:
# method 3
def tenplus(d):
    return d > 10

df.Close.apply(tenplus)

0    False
1     True
2     True
3     True
4     True
5     True
Name: Close, dtype: bool

# Which rows are greater than 10?  

### Return values instead of booleans

In [10]:
# method 1

mask = df.Close > 10
df[mask]

Unnamed: 0,Close,Open
1,11,12
2,12,654
3,13,8
4,14,987
5,15,13


In [11]:
# method 2

mask = df.Close.apply(lambda x: x > 10)
df[mask]

Unnamed: 0,Close,Open
1,11,12
2,12,654
3,13,8
4,14,987
5,15,13


In [12]:
# method 3
def tenplus(d):
    return d > 10

mask = df.Close.apply(tenplus)
df[mask]

Unnamed: 0,Close,Open
1,11,12
2,12,654
3,13,8
4,14,987
5,15,13


# Create a new column where Closing prices are greater than 10  

> TIP: ***Method 1*** should be faster than any operation that uses ***apply***

In [13]:
# method 1

df['10+'] = df.Close.where(df.Close > 10)
df

Unnamed: 0,Close,Open,10+
0,10,321,
1,11,12,11.0
2,12,654,12.0
3,13,8,13.0
4,14,987,14.0
5,15,13,15.0


In [14]:
# method 2

# If x > 10 return x
# else return NaN
df['10+'] = df.Close.apply(lambda x: x if x > 10 else None)
df

Unnamed: 0,Close,Open,10+
0,10,321,
1,11,12,11.0
2,12,654,12.0
3,13,8,13.0
4,14,987,14.0
5,15,13,15.0


In [15]:
# method 3
def tenplus(d):
    return d if d > 10 else None

df['10+'] = df.Close.apply(tenplus)
df

Unnamed: 0,Close,Open,10+
0,10,321,
1,11,12,11.0
2,12,654,12.0
3,13,8,13.0
4,14,987,14.0
5,15,13,15.0


# Change the sign of column ***"10+"*** if column ***"Open"*** is less then 300  

### col1, col2, col3, ...., coln = x[0], x[1], x[2], ..., x[n]

In [16]:
# method 1
df['10+'] = df['10+'].where(df['Open'] >= 300, df['10+']*-1)
df

Unnamed: 0,Close,Open,10+
0,10,321,
1,11,12,-11.0
2,12,654,12.0
3,13,8,-13.0
4,14,987,14.0
5,15,13,-15.0


In [17]:
# method 2
df['10+'] = df.apply(lambda x: x['10+']*-1 if x['Open'] < 300 else x['10+'], axis=1)
df

## This will also work
# df[['Open','10+']].apply(lambda x: x[1]*-1 if x[0] < 300 else x[1], axis=1)

Unnamed: 0,Close,Open,10+
0,10,321,
1,11,12,11.0
2,12,654,12.0
3,13,8,13.0
4,14,987,14.0
5,15,13,15.0


In [18]:
# method 3
def tenplus(d):
    return d['10+']*-1 if d['Open'] < 300 else d['10+']

df['10+'] = df.apply(tenplus, axis=1)
df

Unnamed: 0,Close,Open,10+
0,10,321,
1,11,12,-11.0
2,12,654,12.0
3,13,8,-13.0
4,14,987,14.0
5,15,13,-15.0


# Find all values where column ***"10+"*** is greater than zero and ***"Close"*** is equal to 14

## Method 1

In [19]:
# method 1

msk1 = df['10+'] > 0
msk2 = df.Close == 14

In [20]:
# if we apply msk1
df[msk1]

Unnamed: 0,Close,Open,10+
2,12,654,12.0
4,14,987,14.0


In [21]:
# if we apply msk2
df[msk2]

Unnamed: 0,Close,Open,10+
4,14,987,14.0


In [22]:
# if we apply both
# remember we need to place paranthesis when you have multiple filters
df[(msk1) & (msk2)]

Unnamed: 0,Close,Open,10+
4,14,987,14.0


## Method 2

In [23]:
# method 2

msk1 = df['Close'].apply(lambda x: x == 14)
msk2 = df['10+'].apply(lambda x: x > 0)
msk3 = df[['Close','10+']].apply(lambda x: x[0] == 14 and x[1] > 0, axis=1)

df[msk3]

Unnamed: 0,Close,Open,10+
4,14,987,14.0


In [24]:
msk3

0    False
1    False
2    False
3    False
4     True
5    False
dtype: bool

**Author:** [HEDARO](http://www.hedaro.com)  