Table-wise functions as taken from https://www.tutorialspoint.com/python_pandas/python_pandas_function_application.htm

In [2]:
import numpy as np
import pandas as pd

In [4]:
#Table-wise Function Application - define a function and 
def adder(el_1, el_2):
    return el_1 + el_2
df = pd.DataFrame(np.random.randn(5,3),columns=['col1','col2','col3'])
print(df)
#applies function adder to every element in df along with argument 2
print(df.pipe(adder,2))

       col1      col2      col3
0  0.313281  0.409340  0.457341
1  0.951622  1.415148  0.462158
2 -1.308850 -0.031640  1.387701
3  2.093822  1.006021  1.739814
4 -0.845375  0.580417 -0.643794
       col1      col2      col3
0  2.313281  2.409340  2.457341
1  2.951622  3.415148  2.462158
2  0.691150  1.968360  3.387701
3  4.093822  3.006021  3.739814
4  1.154625  2.580417  1.356206


In [7]:
#Apply performs column-wise changes to the dataframe; results show the avg column by column
print(df.apply(np.mean))

col1    0.240900
col2    0.675857
col3    0.680644
dtype: float64


Unnamed: 0,col1,col2,col3
0,0.313281,0.40934,0.457341
1,0.951622,1.415148,0.462158
2,-1.30885,-0.03164,1.387701
3,2.093822,1.006021,1.739814
4,-0.845375,0.580417,-0.643794


In [8]:
#Apply element-wise changes to dataframe using the applymap function; code below multiplies every element in df by 100
print(df.applymap(lambda x:x*100))

         col1        col2        col3
0   31.328069   40.933963   45.734066
1   95.162197  141.514811   46.215798
2 -130.885011   -3.163957  138.770070
3  209.382178  100.602121  173.981354
4  -84.537484   58.041722  -64.379365


In [11]:
#can use the reindex function on a dataframe to perform reindexing and change the column names associated with cols of data
N=20

df = pd.DataFrame({
   'A': pd.date_range(start='2016-01-01',periods=N,freq='D'),
   'x': np.linspace(0,stop=N-1,num=N),
   'y': np.random.rand(N),
   'C': np.random.choice(['Low','Medium','High'],N).tolist(),
   'D': np.random.normal(100, 10, size=(N)).tolist()
})

print(df)

#reindex the DataFrame - will get the rows 0, 2, 5 and columns A, C, B (B doesn't exist in original df)
df_reindexed = df.reindex(index=[0,2,5], columns=['A', 'C', 'B'])

print(df_reindexed)

            A       C           D     x         y
0  2016-01-01  Medium   77.077069   0.0  0.374716
1  2016-01-02     Low  106.840689   1.0  0.253132
2  2016-01-03     Low   89.831835   2.0  0.334890
3  2016-01-04    High   99.031716   3.0  0.306382
4  2016-01-05     Low   94.041233   4.0  0.680890
5  2016-01-06  Medium   91.061000   5.0  0.200855
6  2016-01-07  Medium  104.377500   6.0  0.711942
7  2016-01-08    High   97.136818   7.0  0.987762
8  2016-01-09     Low  106.319802   8.0  0.328427
9  2016-01-10    High  102.934321   9.0  0.821126
10 2016-01-11  Medium   95.131264  10.0  0.976571
11 2016-01-12    High  112.102360  11.0  0.563189
12 2016-01-13  Medium   89.040268  12.0  0.389725
13 2016-01-14     Low   95.281643  13.0  0.339035
14 2016-01-15     Low   82.971247  14.0  0.179539
15 2016-01-16     Low  109.983983  15.0  0.959838
16 2016-01-17     Low   97.782516  16.0  0.334872
17 2016-01-18  Medium  105.613435  17.0  0.319831
18 2016-01-19     Low  111.455473  18.0  0.126516


In [13]:
#concatenating two dataframes (i.e. adding the rows of one df to the rows of another)
one = pd.DataFrame({
   'Name': ['Alex', 'Amy', 'Allen', 'Alice', 'Ayoung'],
   'subject_id':['sub1','sub2','sub4','sub6','sub5'],
   'Marks_scored':[98,90,87,69,78]},
   index=[1,2,3,4,5])

two = pd.DataFrame({
   'Name': ['Billy', 'Brian', 'Bran', 'Bryce', 'Betty'],
   'subject_id':['sub2','sub4','sub3','sub6','sub5'],
   'Marks_scored':[89,80,79,97,88]},
   index=[1,2,3,4,5])
print(pd.concat([one,two]))

   Marks_scored    Name subject_id
1            98    Alex       sub1
2            90     Amy       sub2
3            87   Allen       sub4
4            69   Alice       sub6
5            78  Ayoung       sub5
1            89   Billy       sub2
2            80   Brian       sub4
3            79    Bran       sub3
4            97   Bryce       sub6
5            88   Betty       sub5


In [14]:
#Can use the categorical function from pd to deal with categorical data and perform some useful functions
#The function has the following signature - pandas.Categorical(values, categories, ordered)
cat = pd.Categorical(['a','b','c','a','b','c','d'], ['c', 'b', 'a'])
print(cat)

[a, b, c, a, b, c, NaN]
Categories (3, object): [c, b, a]
