# **Python - Boolean Function**

In [1]:
import pandas as pd

In [2]:
data = {
    "var1" : [True, True, False],
    "var2" : [1, 2, 3],
    "var3" : [True, True, True]
}

In [3]:
df = pd.DataFrame(data)

In [4]:
# check if all value in each column data is TRUE
print(df.all())

var1    False
var2     True
var3     True
dtype: bool


In [5]:
# check if all value in each row data is TRUE
print(df.all(axis = 1))

0     True
1     True
2    False
dtype: bool


In [6]:
# check if all values in Data Frame is TRUE
# print(df.all().all())
print(df.all(axis = None))

False


### **Function Application**
- The appropriate method to use depends on whether your function expects to operate on an entire DataFrame, row- or column-wise, or elementwise.
  -  Table wise Function Application: pipe()
  - Row or Column Wise Function Application: apply()
  - Element wise Function Application: applymap()


In [7]:
import pandas as pd
import numpy as np
import random

In [8]:
df = pd.DataFrame(np.random.randint(1, 16, size=(5, 3)), columns = ["A", "B", "C"])
df

Unnamed: 0,A,B,C
0,2,6,4
1,15,3,12
2,7,9,11
3,14,2,1
4,4,2,10


In [9]:
def adder(num1, num2):
  return num1 + num2

In [10]:
df.pipe(adder,2)

Unnamed: 0,A,B,C
0,4,8,6
1,17,5,14
2,9,11,13
3,16,4,3
4,6,4,12


In [11]:
np.random.seed(0)
df = pd.DataFrame(np.random.randint(1, 16, size = (5, 3)), columns = ["A", "B", "C"])
df

Unnamed: 0,A,B,C
0,13,6,1
1,4,12,4
2,8,10,4
3,6,3,5
4,8,7,9


In [12]:
df.apply(np.mean)

Unnamed: 0,0
A,7.8
B,7.6
C,4.6


In [13]:
df.apply(np.mean, axis = 1)

Unnamed: 0,0
0,6.666667
1,6.666667
2,7.333333
3,4.666667
4,8.0


In [14]:
np.random.seed(0)
df = pd.DataFrame(np.random.randint(1, 16, size = (5, 3)), columns = ["A", "B", "C"])
df

Unnamed: 0,A,B,C
0,13,6,1
1,4,12,4
2,8,10,4
3,6,3,5
4,8,7,9


In [15]:
df.map(lambda x: x*100)

Unnamed: 0,A,B,C
0,1300,600,100
1,400,1200,400
2,800,1000,400
3,600,300,500
4,800,700,900


### **Pandas - Iteration**

In [24]:
# Iterating over a DataFrame

import pandas as pd
import numpy as np

n = 20
np.random.seed(0)
df = pd.DataFrame({
    "A" : pd.date_range(start = "2016-01-01", periods = n, freq = "D"),
    "x" : np.linspace(0, stop = n - 1, num = n),
    "y" : np.random.rand(n),
    "C" : np.random.choice(["Low", "Medium", "High"], n).tolist(),
    "D" : np.random.normal(100, 10, size = (n)).tolist()
})
df

Unnamed: 0,A,x,y,C,D
0,2016-01-01,0.0,0.548814,High,95.842553
1,2016-01-02,1.0,0.715189,Low,94.754878
2,2016-01-03,2.0,0.602763,Medium,108.131013
3,2016-01-04,3.0,0.544883,Medium,97.707494
4,2016-01-05,4.0,0.423655,High,121.617174
5,2016-01-06,5.0,0.645894,Low,90.430686
6,2016-01-07,6.0,0.437587,Medium,100.673108
7,2016-01-08,7.0,0.891773,Medium,102.064988
8,2016-01-09,8.0,0.963663,Medium,95.431187
9,2016-01-10,9.0,0.383442,Low,89.400242


In [25]:
for col in df:
  print(col)

A
x
y
C
D


In [29]:
for key, value in df.C.items():
  print(key, value)

0 High
1 Low
2 Medium
3 Medium
4 High
5 Low
6 Medium
7 Medium
8 Medium
9 Low
10 High
11 Low
12 High
13 High
14 Low
15 High
16 Low
17 Low
18 Low
19 Medium


In [34]:
for key, value in df.loc[0:5].iterrows():
  print(key, value)

0 A    2016-01-01 00:00:00
x                    0.0
y               0.548814
C                   High
D              95.842553
Name: 0, dtype: object
1 A    2016-01-02 00:00:00
x                    1.0
y               0.715189
C                    Low
D              94.754878
Name: 1, dtype: object
2 A    2016-01-03 00:00:00
x                    2.0
y               0.602763
C                 Medium
D             108.131013
Name: 2, dtype: object
3 A    2016-01-04 00:00:00
x                    3.0
y               0.544883
C                 Medium
D              97.707494
Name: 3, dtype: object
4 A    2016-01-05 00:00:00
x                    4.0
y               0.423655
C                   High
D             121.617174
Name: 4, dtype: object
5 A    2016-01-06 00:00:00
x                    5.0
y               0.645894
C                    Low
D              90.430686
Name: 5, dtype: object


### **Pandas - Sorting**

In [37]:
np.random.seed(0)

unsorted_df = pd.DataFrame(np.random.randint(1, 16, size = (10,3)), columns = ["a", "b", "c"])
unsorted_df

Unnamed: 0,a,b,c
0,13,6,1
1,4,12,4
2,8,10,4
3,6,3,5
4,8,7,9
5,9,13,11
6,2,7,8
7,8,15,9
8,2,6,10
9,14,9,10


In [39]:
sorted_df = unsorted_df.sort_index(ascending = False)
sorted_df

Unnamed: 0,a,b,c
9,14,9,10
8,2,6,10
7,8,15,9
6,2,7,8
5,9,13,11
4,8,7,9
3,6,3,5
2,8,10,4
1,4,12,4
0,13,6,1


In [40]:
sorted_df = unsorted_df.sort_index(axis = 1)
sorted_df

Unnamed: 0,a,b,c
0,13,6,1
1,4,12,4
2,8,10,4
3,6,3,5
4,8,7,9
5,9,13,11
6,2,7,8
7,8,15,9
8,2,6,10
9,14,9,10


In [42]:
sorted_df = unsorted_df.sort_values(by = ["a", "b", "c"])
sorted_df

Unnamed: 0,a,b,c
8,2,6,10
6,2,7,8
1,4,12,4
3,6,3,5
4,8,7,9
2,8,10,4
7,8,15,9
5,9,13,11
0,13,6,1
9,14,9,10
