# Learning material

This notebook is for handy code snippets.

In [1]:
import pandas as pd
import numpy as np

### Error handling

#### Try, except

In [2]:
total_marks = int(input('Enter total marks'))
num_sections = int(input('Enter number of sections'))
try:
    marks_per_section = total_marks / num_sections
except ZeroDivisionError:
    print('The number of sections cannot equal 0')
else:
    print(marks_per_section)

The number of sections cannot equal 0


#### Assert

In [3]:
total_marks = int(input('Enter total marks'))
num_sections = int(input('Enter number of sections'))
assert(num_sections != 0), 'The number of sections cannot equal 0' 
marks_per_section = total_marks / num_sections

AssertionError: The number of sections cannot equal 0

### Pandas

#### String formatting

In [4]:
a = 'Rachel'
b = 'Georgia'
d = 'Charlotte'
"The story of {0}, {1}, and {c}".format(a, b, c=d)

'The story of Rachel, Georgia, and Charlotte'

#### Shift

In [5]:
df = pd.DataFrame({'balance': [15, 21, 23, 24], 
                  'value': [5, 6, 2, 1],})

cols = ['value', 'balance']
df = df[cols]

df['manual balance'] = df['balance'].shift() + df['value']
df['check balance'] = df['balance'] == df['manual balance']
df

Unnamed: 0,value,balance,manual balance,check balance
0,5,15,,False
1,6,21,21.0,True
2,2,23,23.0,True
3,1,24,24.0,True


#### Cumulative sum and groupby

In [6]:
test = pd.DataFrame({'period': [1,1,1,2,2,3,3],
              'cost': [10,20,15,10,5,20,5]})

test = test[['period', 'cost']]
test['cumulative_sum'] = test.groupby('period')['cost'].cumsum()
test

Unnamed: 0,period,cost,cumulative_sum
0,1,10,10
1,1,20,30
2,1,15,45
3,2,10,10
4,2,5,15
5,3,20,20
6,3,5,25


#### Index of minimum/maximum

In [7]:
df = pd.DataFrame(np.random.random(size=(5, 10)), columns=list('abcdefghij'))
df

Unnamed: 0,a,b,c,d,e,f,g,h,i,j
0,0.417302,0.564039,0.47216,0.021414,0.473879,0.111375,0.220834,0.021943,0.302064,0.499635
1,0.272572,0.826592,0.928433,0.252842,0.84861,0.195139,0.833422,0.540047,0.037363,0.293067
2,0.571057,0.606077,0.05447,0.312728,0.795333,0.220606,0.09093,0.459067,0.792471,0.227873
3,0.485263,0.031578,0.767331,0.704187,0.657237,0.071818,0.351575,0.144385,0.229331,0.000757
4,0.444842,0.764407,0.395921,0.892525,0.939369,0.806987,0.492483,0.633786,0.309149,0.614207


In [8]:
df.sum(axis=0).idxmin()

'f'

#### Creating random numbers

In [9]:
df = pd.DataFrame(data = {'A': list(range(1, 101)), 
                     'B': np.random.randint(low = 0, high = 100, size = 100)})
df.head(10)

Unnamed: 0,A,B
0,1,68
1,2,27
2,3,26
3,4,38
4,5,70
5,6,62
6,7,10
7,8,62
8,9,38
9,10,91


In [10]:
pd.DataFrame(data = np.random.random(size = (10, 5)), 
            columns = ['oh','laddergoat', 'you', 'so', 'random'])

Unnamed: 0,oh,laddergoat,you,so,random
0,0.875451,0.646413,0.260759,0.027375,0.583851
1,0.378901,0.218207,0.264821,0.813663,0.113641
2,0.376279,0.366123,0.367463,0.949454,0.493792
3,0.173308,0.140531,0.347677,0.952784,0.799978
4,0.438527,0.890829,0.337454,0.477186,0.64853
5,0.16935,0.303634,0.63441,0.122752,0.212578
6,0.71208,0.445925,0.10638,0.693789,0.011987
7,0.950631,0.71041,0.666196,0.596781,0.026948
8,0.102425,0.528391,0.241486,0.95969,0.001752
9,0.952372,0.281158,0.853551,0.489005,0.837507


In [42]:
# Keep same random numbers
np.random.seed(99)
df = pd.DataFrame(data = np.random.random(size = (10, 5)),
             columns = list('abcde'))
df

Unnamed: 0,a,b,c,d,e
0,0.672279,0.488078,0.825495,0.031446,0.80805
1,0.565617,0.297622,0.046696,0.990627,0.006826
2,0.769793,0.746767,0.377439,0.494147,0.928948
3,0.395454,0.973956,0.524415,0.093613,0.813308
4,0.211687,0.554346,0.292269,0.816142,0.828043
5,0.221577,0.644835,0.095182,0.411663,0.096865
6,0.144011,0.212196,0.476656,0.077614,0.235044
7,0.006553,0.898644,0.552234,0.167547,0.928878
8,0.542088,0.041759,0.524877,0.640143,0.800658
9,0.834919,0.252632,0.968274,0.466238,0.264091


#### Assigning values in a dataframe to bins

In [11]:
df = pd.DataFrame(data = {'A': np.random.randint(low = 0, high = 100, size = 15), 
                     'B': np.random.randint(low = 0, high = 100, size = 15)})
df

Unnamed: 0,A,B
0,9,9
1,37,41
2,40,95
3,62,96
4,9,2
5,18,71
6,86,98
7,59,51
8,34,95
9,89,5


In [12]:
my_range = np.arange(0,101,10)
my_range

array([  0,  10,  20,  30,  40,  50,  60,  70,  80,  90, 100])

In [13]:
pd.cut(df['A'],my_range)

0       (0, 10]
1      (30, 40]
2      (30, 40]
3      (60, 70]
4       (0, 10]
5      (10, 20]
6      (80, 90]
7      (50, 60]
8      (30, 40]
9      (80, 90]
10     (60, 70]
11     (80, 90]
12     (10, 20]
13    (90, 100]
14     (80, 90]
Name: A, dtype: category
Categories (10, interval[int64]): [(0, 10] < (10, 20] < (20, 30] < (30, 40] ... (60, 70] < (70, 80] < (80, 90] < (90, 100]]

#### Conditional formatting

##### Highlight null values

In [33]:
df = pd.DataFrame(data = np.random.randint(low = 0, high = 9, size = (10, 5)), 
                  columns = list('abcde'))
df = df.replace(0, np.nan)
df.style.highlight_null(null_color='#ccccff')

Unnamed: 0,a,b,c,d,e
0,3.0,3,3.0,7.0,4.0
1,8.0,4,,8.0,5.0
2,6.0,2,1.0,8.0,8.0
3,2.0,2,6.0,7.0,
4,2.0,4,4.0,2.0,2.0
5,6.0,3,6.0,2.0,6.0
6,3.0,4,2.0,6.0,
7,6.0,1,4.0,8.0,
8,,5,2.0,6.0,1.0
9,5.0,4,7.0,,6.0


##### Create heatmap

In [34]:
import seaborn as sns
df = pd.DataFrame(data = np.random.random(size = (10, 5)), 
                  columns = list('abcde'))
cm = sns.light_palette("green", as_cmap=True)
s = df.style.background_gradient(cmap=cm)
s

Unnamed: 0,a,b,c,d,e
0,0.0175254,0.859514,0.488256,0.997785,0.168681
1,0.713983,0.738983,0.660937,0.874423,0.756113
2,0.718816,0.368995,0.565245,0.615641,0.573208
3,0.597271,0.509879,0.236198,0.497867,0.133892
4,0.296724,0.92701,0.411124,0.197754,0.487174
5,0.320295,0.646083,0.834642,0.959893,0.134493
6,0.113011,0.321828,0.6097,0.142814,0.876541
7,0.268036,0.0918052,0.658602,0.393717,0.214975
8,0.744343,0.359201,0.545172,0.652246,0.818735
9,0.107049,0.679104,0.729655,0.218031,0.99992


##### Highlight min and max

In [41]:
df.style.highlight_max(axis=0, color = 'pink')
# df.style.highlight_min(axis=0, color = 'blue')

Unnamed: 0,a,b,c,d,e
0,0.0175254,0.859514,0.488256,0.997785,0.168681
1,0.713983,0.738983,0.660937,0.874423,0.756113
2,0.718816,0.368995,0.565245,0.615641,0.573208
3,0.597271,0.509879,0.236198,0.497867,0.133892
4,0.296724,0.92701,0.411124,0.197754,0.487174
5,0.320295,0.646083,0.834642,0.959893,0.134493
6,0.113011,0.321828,0.6097,0.142814,0.876541
7,0.268036,0.0918052,0.658602,0.393717,0.214975
8,0.744343,0.359201,0.545172,0.652246,0.818735
9,0.107049,0.679104,0.729655,0.218031,0.99992


#### Stack and unstack

In [69]:
df = pd.DataFrame({'a': (1.0, 2.0),
                  'b': (3.0, 4.0)})
df

Unnamed: 0,a,b
0,1.0,3.0
1,2.0,4.0


In [74]:
s = df.stack()
s

0  a    1.0
   b    3.0
1  a    2.0
   b    4.0
dtype: float64

In [75]:
s.unstack()

Unnamed: 0,a,b
0,1.0,3.0
1,2.0,4.0


##### Creating multi-index dataframes

In [76]:
index = pd.MultiIndex.from_tuples([('one', 'a'), ('one', 'b'),
                                    ('two', 'a'), ('two', 'b')])
s = pd.Series(np.arange(1.0, 5.0), index=index)
s

one  a    1.0
     b    2.0
two  a    3.0
     b    4.0
dtype: float64