In [1]:
import numpy as np
import pandas as pd
from numpy.random import randint

In [2]:
columns= ['W', 'X', 'Y', 'Z'] 
index= ['A', 'B', 'C', 'D', 'E'] 

In [3]:
np.random.seed(42)
data = randint(-100,100,(5,4))

In [4]:
df = pd.DataFrame(data,index,columns)

In [5]:
df

Unnamed: 0,W,X,Y,Z
A,2,79,-8,-86
B,6,-29,88,-80
C,2,21,-26,-13
D,16,-1,3,51
E,30,49,-48,-99


In [9]:
# grabbing columns
df[['W','Y']]

Unnamed: 0,W,Y
A,2,-8
B,6,88
C,2,-26
D,16,3
E,30,-48


In [10]:
# grabbing rows
df.loc[['A','C']]

Unnamed: 0,W,X,Y,Z
A,2,79,-8,-86
C,2,21,-26,-13


In [12]:
# create new clm
df['new'] = df['W'] + df['Y']
df

Unnamed: 0,W,X,Y,Z,new
A,2,79,-8,-86,-6
B,6,-29,88,-80,94
C,2,21,-26,-13,-24
D,16,-1,3,51,19
E,30,49,-48,-99,-18


In [14]:
# remove columns
df = df.drop('new',1)
df

Unnamed: 0,W,X,Y,Z
A,2,79,-8,-86
B,6,-29,88,-80
C,2,21,-26,-13
D,16,-1,3,51
E,30,49,-48,-99


In [16]:
# select single row by index 
df.iloc[2]

W     2
X    21
Y   -26
Z   -13
Name: C, dtype: int32

In [17]:
# select multiple rows by index 
df.iloc[2:4]

Unnamed: 0,W,X,Y,Z
C,2,21,-26,-13
D,16,-1,3,51


In [18]:
# selecting subset of rows + columns at same time
df.loc[['C','B'],['X','Y']]

Unnamed: 0,X,Y
C,21,-26
B,-29,88


In [6]:
df

Unnamed: 0,W,X,Y,Z
A,2,79,-8,-86
B,6,-29,88,-80
C,2,21,-26,-13
D,16,-1,3,51
E,30,49,-48,-99


In [8]:
df[df>0]

Unnamed: 0,W,X,Y,Z
A,2,79.0,,
B,6,,88.0,
C,2,21.0,,
D,16,,3.0,51.0
E,30,49.0,,


In [6]:
df

Unnamed: 0,W,X,Y,Z
A,2,79,-8,-86
B,6,-29,88,-80
C,2,21,-26,-13
D,16,-1,3,51
E,30,49,-48,-99


In [21]:
# conditional selection- select feature W where X is positive
df[df['X']>0]['W']

A     2
C     2
E    30
Name: W, dtype: int32

In [27]:
# select first row, where X is positive
df[df['X']>0].iloc[0]

W     2
X    79
Y    -8
Z   -86
Name: A, dtype: int32

In [29]:
# select all data, where W is positive and Y >1
df[(df['W']>1) & (df['Y']>1)]

Unnamed: 0,W,X,Y,Z
B,6,-29,88,-80
D,16,-1,3,51


In [34]:
# reset df index to integers
df.reset_index() # NOT inplace!!

Unnamed: 0,index,W,X,Y,Z
0,A,2,79,-8,-86
1,B,6,-29,88,-80
2,C,2,21,-26,-13
3,D,16,-1,3,51
4,E,30,49,-48,-99


In [37]:
# add new column "Emirate" and set it into new index
new_ind= ['AUH', 'DXB', 'SHJ', 'FUJ', 'RAK']
df['Emirate'] = new_ind
df.set_index('Emirate')

Unnamed: 0_level_0,W,X,Y,Z
Emirate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AUH,2,79,-8,-86
DXB,6,-29,88,-80
SHJ,2,21,-26,-13
FUJ,16,-1,3,51
RAK,30,49,-48,-99


In [38]:
#show df columns
df.columns

Index(['W', 'X', 'Y', 'Z', 'Emirate'], dtype='object')

In [39]:
# show df statistics 
df.describe()

Unnamed: 0,W,X,Y,Z
count,5.0,5.0,5.0,5.0
mean,11.2,23.8,1.8,-45.4
std,11.96662,42.109381,51.915316,63.366395
min,2.0,-29.0,-48.0,-99.0
25%,2.0,-1.0,-26.0,-86.0
50%,6.0,21.0,-8.0,-80.0
75%,16.0,49.0,3.0,-13.0
max,30.0,79.0,88.0,51.0


In [42]:
# show df information
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, A to E
Data columns (total 5 columns):
W          5 non-null int32
X          5 non-null int32
Y          5 non-null int32
Z          5 non-null int32
Emirate    5 non-null object
dtypes: int32(4), object(1)
memory usage: 320.0+ bytes


In [46]:
# show df data types
df.dtypes

W           int32
X           int32
Y           int32
Z           int32
Emirate    object
dtype: object