In [1]:
import pandas as pd
import numpy as np

In [2]:
df=pd.DataFrame({'int_col' : [1,2,6,8,-1], 'float_col' : [0.1, 0.2,0.2,10.1,None], 'str_col' : ['a','b',None,'c','a']})

In [3]:
df.int_col.value_counts()

 6    1
-1    1
 2    1
 1    1
 8    1
Name: int_col, dtype: int64

In [4]:
list(set([i for i in df.str_col]))

['a', 'c', None, 'b']

In [5]:
df.sort_values(by=['int_col'], ascending=False).head(1)

Unnamed: 0,int_col,float_col,str_col
3,8,10.1,c


In [6]:
for i in [i for i in df['str_col'].unique()]:
    df[i]=df['str_col']==i

In [7]:
df

Unnamed: 0,int_col,float_col,str_col,a,b,None,c
0,1,0.1,a,True,False,False,False
1,2,0.2,b,False,True,False,False
2,6,0.2,,False,False,False,False
3,8,10.1,c,False,False,False,True
4,-1,,a,True,False,False,False


In [8]:
df[df['int_col']>1]

Unnamed: 0,int_col,float_col,str_col,a,b,None,c
1,2,0.2,b,False,True,False,False
2,6,0.2,,False,False,False,False
3,8,10.1,c,False,False,False,True


In [9]:
df['int_col']+df['float_col']

0     1.1
1     2.2
2     6.2
3    18.1
4     NaN
dtype: float64

# .str usage

https://pandas.pydata.org/pandas-docs/stable/user_guide/text.html

In [10]:
s2 = pd.Series(['a_b_c', 'c_d_e', np.nan, 'f_g_h'])

In [11]:
s2.str.split('_', expand=True)

Unnamed: 0,0,1,2
0,a,b,c
1,c,d,e
2,,,
3,f,g,h


In [12]:
s2.str.split('_', expand=True)[0].str.cat()

'acf'

In [13]:
s2.str.split('_', expand=True)[2].str.cat(sep=',', na_rep='-')

'c,e,-,h'

In [14]:
s2.str.split('_', expand=True, n=1)

Unnamed: 0,0,1
0,a,b_c
1,c,d_e
2,,
3,f,g_h


In [15]:
s2.str.split('_', expand=True, n=1)

Unnamed: 0,0,1
0,a,b_c
1,c,d_e
2,,
3,f,g_h


In [16]:
s = pd.Series(['a', 'b', 'c', 'd'])
t = pd.Series(['a', 'b', np.nan, 'd'])

In [17]:
s

0    a
1    b
2    c
3    d
dtype: object

In [18]:
t

0      a
1      b
2    NaN
3      d
dtype: object

In [19]:
t.str.cat(sep=',')

'a,b,d'

In [20]:
t.str.cat(sep=',', na_rep='-')

'a,b,-,d'

In [21]:
s.str.cat(t, na_rep='-')

0    aa
1    bb
2    c-
3    dd
dtype: object

In [22]:
test=pd.DataFrame({'a':[' fjdkajhfk; ', ' dfasdf fdsaf ' ]})

In [23]:
test.a.str.upper()

0       FJDKAJHFK; 
1     DFASDF FDSAF 
Name: a, dtype: object

In [24]:
[i for i in test.a.str.strip()]

['fjdkajhfk;', 'dfasdf fdsaf']

In [25]:
test.a.str.contains('a')

0    True
1    True
Name: a, dtype: bool

# Merge, join, and concatenate

https://pandas.pydata.org/pandas-docs/stable/user_guide/merging.html

In [26]:
In [1]: df1 = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'],
   ...:                     'B': ['B0', 'B1', 'B2', 'B3'],
   ...:                     'C': ['C0', 'C1', 'C2', 'C3'],
   ...:                     'D': ['D0', 'D1', 'D2', 'D3']},
   ...:                    index=[0, 1, 2, 3])
   ...: 

In [2]: df2 = pd.DataFrame({'A': ['A4', 'A5', 'A6', 'A7'],
   ...:                     'B': ['B4', 'B5', 'B6', 'B7'],
   ...:                     'C': ['C4', 'C5', 'C6', 'C7'],
   ...:                     'D': ['D4', 'D5', 'D6', 'D7']},
   ...:                    index=[4, 5, 6, 7])
   ...: 

In [3]: df3 = pd.DataFrame({'A': ['A8', 'A9', 'A10', 'A11'],
   ...:                     'B': ['B8', 'B9', 'B10', 'B11'],
   ...:                     'C': ['C8', 'C9', 'C10', 'C11'],
   ...:                     'D': ['D8', 'D9', 'D10', 'D11']},
   ...:                    index=[8, 9, 10, 11])

In [27]:
pd.concat([df1,df2,df3])

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,C1,D1
2,A2,B2,C2,D2
3,A3,B3,C3,D3
4,A4,B4,C4,D4
5,A5,B5,C5,D5
6,A6,B6,C6,D6
7,A7,B7,C7,D7
8,A8,B8,C8,D8
9,A9,B9,C9,D9


In [28]:
df_xyz1=pd.concat([df1,df2,df3], keys=['x','y','z'])

In [29]:
df_xyz1.loc['z']

Unnamed: 0,A,B,C,D
8,A8,B8,C8,D8
9,A9,B9,C9,D9
10,A10,B10,C10,D10
11,A11,B11,C11,D11


In [30]:
df_xyz1['B']

x  0      B0
   1      B1
   2      B2
   3      B3
y  4      B4
   5      B5
   6      B6
   7      B7
z  8      B8
   9      B9
   10    B10
   11    B11
Name: B, dtype: object

In [31]:
pd.concat([df1, df2], axis=0)

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,C1,D1
2,A2,B2,C2,D2
3,A3,B3,C3,D3
4,A4,B4,C4,D4
5,A5,B5,C5,D5
6,A6,B6,C6,D6
7,A7,B7,C7,D7


In [32]:
pd.concat({'x':df1, 'y':df2, 'z':df3})

Unnamed: 0,Unnamed: 1,A,B,C,D
x,0,A0,B0,C0,D0
x,1,A1,B1,C1,D1
x,2,A2,B2,C2,D2
x,3,A3,B3,C3,D3
y,4,A4,B4,C4,D4
y,5,A5,B5,C5,D5
y,6,A6,B6,C6,D6
y,7,A7,B7,C7,D7
z,8,A8,B8,C8,D8
z,9,A9,B9,C9,D9


In [33]:
dfs=[df1, df2, df3]

In [34]:
df_reset=[i.reset_index(drop=True) for i in dfs]

In [35]:
dfss=pd.concat(df_reset, axis=1, keys=['x','y','z'])

In [36]:
dfss

Unnamed: 0_level_0,x,x,x,x,y,y,y,y,z,z,z,z
Unnamed: 0_level_1,A,B,C,D,A,B,C,D,A,B,C,D
0,A0,B0,C0,D0,A4,B4,C4,D4,A8,B8,C8,D8
1,A1,B1,C1,D1,A5,B5,C5,D5,A9,B9,C9,D9
2,A2,B2,C2,D2,A6,B6,C6,D6,A10,B10,C10,D10
3,A3,B3,C3,D3,A7,B7,C7,D7,A11,B11,C11,D11


In [37]:
dfss['y']['A']

0    A4
1    A5
2    A6
3    A7
Name: A, dtype: object

In [38]:
df.groupby('str_col').count()

Unnamed: 0_level_0,int_col,float_col,a,b,None,c
str_col,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
a,2,1,2,2,2,2
b,1,1,1,1,1,1
c,1,1,1,1,1,1
