In [2]:
import pandas as pd

df_one = pd.DataFrame({
    'k1':['A','A','B','B','C','C'],
    'col1':[100,200,300,300,400,500],
    'col2':['NY','CA','WA','WA','AK','NV']
})

In [3]:
df_one

Unnamed: 0,k1,col1,col2
0,A,100,NY
1,A,200,CA
2,B,300,WA
3,B,300,WA
4,C,400,AK
5,C,500,NV


In [4]:
df_one['col2'].unique()

array(['NY', 'CA', 'WA', 'AK', 'NV'], dtype=object)

In [5]:
df_one['k1'].unique() # returns unique values

array(['A', 'B', 'C'], dtype=object)

In [6]:
df_one['k1'].nunique()  # return the sum of the list

3

In [7]:
df_one['col2'].value_counts()

WA    2
NY    1
CA    1
AK    1
NV    1
Name: col2, dtype: int64

In [8]:
df_one.drop_duplicates()

Unnamed: 0,k1,col1,col2
0,A,100,NY
1,A,200,CA
2,B,300,WA
4,C,400,AK
5,C,500,NV


In [9]:
df_one['NEW'] = df_one['col1'] * 10

In [10]:
df_one

Unnamed: 0,k1,col1,col2,NEW
0,A,100,NY,1000
1,A,200,CA,2000
2,B,300,WA,3000
3,B,300,WA,3000
4,C,400,AK,4000
5,C,500,NV,5000


In [11]:
def grab_first_letter(state):
    return state[0]

In [12]:
grab_first_letter('NY')

'N'

In [13]:
df_one['col2'].apply(grab_first_letter)

0    N
1    C
2    W
3    W
4    A
5    N
Name: col2, dtype: object

In [14]:
df_one['first letter'] = df_one['col2'].apply(grab_first_letter)

In [15]:
df_one

Unnamed: 0,k1,col1,col2,NEW,first letter
0,A,100,NY,1000,N
1,A,200,CA,2000,C
2,B,300,WA,3000,W
3,B,300,WA,3000,W
4,C,400,AK,4000,A
5,C,500,NV,5000,N


In [16]:
def complex_letter(state):
    if state[0] == "W":
        return "Washington"
    else:
        return"Error"

In [17]:
df_one['col2'].apply(complex_letter)

0         Error
1         Error
2    Washington
3    Washington
4         Error
5         Error
Name: col2, dtype: object

In [18]:
df_one['k1']

0    A
1    A
2    B
3    B
4    C
5    C
Name: k1, dtype: object

In [20]:
my_map = {'A':1,'B':2,'C':3}

In [21]:
df_one['k1'].map(my_map)

0    1
1    1
2    2
3    2
4    3
5    3
Name: k1, dtype: int64

In [22]:
df_one

Unnamed: 0,k1,col1,col2,NEW,first letter
0,A,100,NY,1000,N
1,A,200,CA,2000,C
2,B,300,WA,3000,W
3,B,300,WA,3000,W
4,C,400,AK,4000,A
5,C,500,NV,5000,N


In [23]:
df_one['num'] = df_one['k1'].map(my_map)

In [24]:
df_one

Unnamed: 0,k1,col1,col2,NEW,first letter,num
0,A,100,NY,1000,N,1
1,A,200,CA,2000,C,1
2,B,300,WA,3000,W,2
3,B,300,WA,3000,W,2
4,C,400,AK,4000,A,3
5,C,500,NV,5000,N,3


In [25]:
df_one['col1'].max()

500

In [26]:
df_one['col1'].idxmax()

5

In [27]:
df_one['col1'].idxmin()

0

In [29]:
df_one.columns

Index(['k1', 'col1', 'col2', 'NEW', 'first letter', 'num'], dtype='object')

In [30]:
df_one.columns = ['c1', 'c2', 'c3', 'c4', 'c5', 'c6']

In [31]:
df_one

Unnamed: 0,c1,c2,c3,c4,c5,c6
0,A,100,NY,1000,N,1
1,A,200,CA,2000,C,1
2,B,300,WA,3000,W,2
3,B,300,WA,3000,W,2
4,C,400,AK,4000,A,3
5,C,500,NV,5000,N,3


In [32]:
df_one.sort_values('c3')

Unnamed: 0,c1,c2,c3,c4,c5,c6
4,C,400,AK,4000,A,3
1,A,200,CA,2000,C,1
5,C,500,NV,5000,N,3
0,A,100,NY,1000,N,1
2,B,300,WA,3000,W,2
3,B,300,WA,3000,W,2


In [33]:
df_one.sort_values('c3', ascending=False)

Unnamed: 0,c1,c2,c3,c4,c5,c6
2,B,300,WA,3000,W,2
3,B,300,WA,3000,W,2
0,A,100,NY,1000,N,1
5,C,500,NV,5000,N,3
1,A,200,CA,2000,C,1
4,C,400,AK,4000,A,3


In [34]:
features = pd.DataFrame({
    'A':[100,200,300,400,500],
    'B':[12,13,14,15,16]
})

predictions = pd.DataFrame({
    'pred':[0,1,1,0,1]
})

In [35]:
features

Unnamed: 0,A,B
0,100,12
1,200,13
2,300,14
3,400,15
4,500,16


In [37]:
predictions

Unnamed: 0,pred
0,0
1,1
2,1
3,0
4,1


In [38]:
pd.concat([features, predictions])

Unnamed: 0,A,B,pred
0,100.0,12.0,
1,200.0,13.0,
2,300.0,14.0,
3,400.0,15.0,
4,500.0,16.0,
0,,,0.0
1,,,1.0
2,,,1.0
3,,,0.0
4,,,1.0


In [40]:
pd.concat([features,predictions],axis=1)

Unnamed: 0,A,B,pred
0,100,12,0
1,200,13,1
2,300,14,1
3,400,15,0
4,500,16,1


In [41]:
df_one

Unnamed: 0,c1,c2,c3,c4,c5,c6
0,A,100,NY,1000,N,1
1,A,200,CA,2000,C,1
2,B,300,WA,3000,W,2
3,B,300,WA,3000,W,2
4,C,400,AK,4000,A,3
5,C,500,NV,5000,N,3


In [42]:
df_one['c1']

0    A
1    A
2    B
3    B
4    C
5    C
Name: c1, dtype: object

In [43]:
pd.get_dummies(df_one['c1'])

Unnamed: 0,A,B,C
0,1,0,0
1,1,0,0
2,0,1,0
3,0,1,0
4,0,0,1
5,0,0,1
