# PANDAS -- COT'D

In [24]:
import pandas as pd
import numpy as np

Create custom function to use processeing df's:

## PIPING & MAPPING

In [25]:
def addr(el1, el2):
    return el1 + el2a

In [26]:
addr(2,4)

6

`pipe` will pipe all cells through _fn_

In [27]:
df = pd.DataFrame({'col1': [2,4,6,8,10], 'col2': [1,3,5,7,9]})
df.pipe(addr, 100)

Unnamed: 0,col1,col2
0,102,101
1,104,103
2,106,105
3,108,107
4,110,109


Also use lambdas, even though python sucks at functional programming

df.apply(lambda x: f'woot {x}!')

`map` and `applymap` also work more or less as expected. `map` for series, `applymap` for the whole df matrix

In [30]:
s = pd.Series([2,4,6,8,10])
s.map(lambda x: x**2)

0      4
1     16
2     36
3     64
4    100
dtype: int64

Works to create pipelines:

In [49]:
df = pd.DataFrame(np.random.rand(6,3))
remapped = df.applymap(lambda x: x*1000).applymap(lambda x: x**2)
print(df)
print(remapped)

          0         1         2
0  0.633890  0.239110  0.920154
1  0.539429  0.429812  0.162327
2  0.838172  0.150985  0.951643
3  0.976483  0.762269  0.765549
4  0.248653  0.385857  0.501840
5  0.734799  0.546707  0.463533
               0              1              2
0  401816.826658   57173.698626  846682.924634
1  290983.384934  184738.096054   26349.936037
2  702533.118020   22796.330437  905624.153949
3  953518.528603  581054.278964  586064.840641
4   61828.425781  148885.752334  251842.972723
5  539929.478891  298889.065966  214863.225113


## REINDEXING

Reindixing works to change the columns names of a data frame.

You can use the structure of another data frame to define the oclumns names of your data frame.

Missing columns will be added to the reindexed data frame, with NaN values.

In [None]:
reindexed = remapped.reindex(index=[0,1,2])
print(reindexed)

In [63]:
df1 = pd.DataFrame({'A': np.random.randn(10), 'B': np.random.randn(10), 'C': np.random.randn(10)})
df2 = pd.DataFrame({'X': np.random.randn(10), 'Y': np.random.randn(10), 'Z': np.random.randn(10), 'C': np.random.rand(10)})
df2.rename(columns={'X': 'A'}, inplace=True)
df3 = df1.reindex_like(df2)
print(df1)
print(df2)
print(df3)

          A         B         C
0  0.300634 -0.169127  0.704655
1 -0.931712 -0.037198  1.410104
2  1.107523  0.482154 -0.707348
3  0.315614 -0.346014 -0.658430
4 -0.450878  1.561257  0.642320
5 -0.806351  1.046092 -0.029105
6  1.832157  0.773136  0.517094
7 -2.130670  0.729200 -0.975521
8  0.890611  0.149242  1.454887
9 -0.295555 -1.639139 -0.344170
          A         Y         Z         C
0  0.263268 -0.861715  0.781624  0.053748
1  0.488605  0.401249  1.326551  0.893643
2  0.268801 -0.221843 -0.000745  0.077228
3 -0.731858 -0.185144  1.331920  0.626657
4  1.132939  1.606025  0.949436  0.295891
5 -0.591856 -0.695724  1.204060  0.665730
6 -0.153472 -0.808662 -1.483313  0.383576
7 -3.246947  0.044714  0.675657  0.057484
8  0.881708  0.200123  0.038495  0.575606
9  0.186121  0.025765 -1.268912  0.254138
          A   Y   Z         C
0  0.300634 NaN NaN  0.704655
1 -0.931712 NaN NaN  1.410104
2  1.107523 NaN NaN -0.707348
3  0.315614 NaN NaN -0.658430
4 -0.450878 NaN NaN  0.642320
5 -0.8

## Iteration

### Column-wise over data frame

In [69]:
for col in df1:
    print(col)
    print('-'*3)
    print(df1[col])
    print('-'*10)

A
---
0    0.300634
1   -0.931712
2    1.107523
3    0.315614
4   -0.450878
5   -0.806351
6    1.832157
7   -2.130670
8    0.890611
9   -0.295555
Name: A, dtype: float64
----------
B
---
0   -0.169127
1   -0.037198
2    0.482154
3   -0.346014
4    1.561257
5    1.046092
6    0.773136
7    0.729200
8    0.149242
9   -1.639139
Name: B, dtype: float64
----------
C
---
0    0.704655
1    1.410104
2   -0.707348
3   -0.658430
4    0.642320
5   -0.029105
6    0.517094
7   -0.975521
8    1.454887
9   -0.344170
Name: C, dtype: float64
----------


### cellwise using `iteritems()`

in the iteration below, `k` represents the column name, and `v` represents the list of values in that column

In [77]:
for k, v in df1.iteritems():
    print(f'(((({k}))))')
    print(f'v -->{v}')

((((A))))
v -->0    0.300634
1   -0.931712
2    1.107523
3    0.315614
4   -0.450878
5   -0.806351
6    1.832157
7   -2.130670
8    0.890611
9   -0.295555
Name: A, dtype: float64
((((B))))
v -->0   -0.169127
1   -0.037198
2    0.482154
3   -0.346014
4    1.561257
5    1.046092
6    0.773136
7    0.729200
8    0.149242
9   -1.639139
Name: B, dtype: float64
((((C))))
v -->0    0.704655
1    1.410104
2   -0.707348
3   -0.658430
4    0.642320
5   -0.029105
6    0.517094
7   -0.975521
8    1.454887
9   -0.344170
Name: C, dtype: float64


### iterate over rows with `iterrows()`

In [80]:
for k,v in df1.iterrows():
    print(f'row {k}')
    print(f'{v}')
    print('-'*10)

row 0
A    0.300634
B   -0.169127
C    0.704655
Name: 0, dtype: float64
----------
row 1
A   -0.931712
B   -0.037198
C    1.410104
Name: 1, dtype: float64
----------
row 2
A    1.107523
B    0.482154
C   -0.707348
Name: 2, dtype: float64
----------
row 3
A    0.315614
B   -0.346014
C   -0.658430
Name: 3, dtype: float64
----------
row 4
A   -0.450878
B    1.561257
C    0.642320
Name: 4, dtype: float64
----------
row 5
A   -0.806351
B    1.046092
C   -0.029105
Name: 5, dtype: float64
----------
row 6
A    1.832157
B    0.773136
C    0.517094
Name: 6, dtype: float64
----------
row 7
A   -2.130670
B    0.729200
C   -0.975521
Name: 7, dtype: float64
----------
row 8
A    0.890611
B    0.149242
C    1.454887
Name: 8, dtype: float64
----------
row 9
A   -0.295555
B   -1.639139
C   -0.344170
Name: 9, dtype: float64
----------


### A Note
Do _not_ modify the original dataframe while iterating, in-place modifications will be discarded

## Sorting