In [1]:
import numpy as np
import pandas as pd

In [2]:
columns = ['W', 'X', 'Y', 'Z']

In [3]:
index = ['A', 'B', 'C', 'D', 'E']

In [4]:
from numpy.random import randint

In [7]:
np.random.seed(42)
data = randint(-100, 100, (5,4))

In [8]:
data

array([[  2,  79,  -8, -86],
       [  6, -29,  88, -80],
       [  2,  21, -26, -13],
       [ 16,  -1,   3,  51],
       [ 30,  49, -48, -99]])

In [9]:
df = pd.DataFrame(data, index, columns)

In [10]:
df

Unnamed: 0,W,X,Y,Z
A,2,79,-8,-86
B,6,-29,88,-80
C,2,21,-26,-13
D,16,-1,3,51
E,30,49,-48,-99


In [15]:
df['W']

A     2
B     6
C     2
D    16
E    30
Name: W, dtype: int32

In [17]:
type(df['W'])

pandas.core.series.Series

Dataframes are series groups

In [18]:
df['new'] = df['W'] + df['Y']

In [19]:
df

Unnamed: 0,W,X,Y,Z,new
A,2,79,-8,-86,-6
B,6,-29,88,-80,94
C,2,21,-26,-13,-24
D,16,-1,3,51,19
E,30,49,-48,-99,-18


In [21]:
df.drop('new', axis=1)

Unnamed: 0,W,X,Y,Z
A,2,79,-8,-86
B,6,-29,88,-80
C,2,21,-26,-13
D,16,-1,3,51
E,30,49,-48,-99


In [22]:
df 

Unnamed: 0,W,X,Y,Z,new
A,2,79,-8,-86,-6
B,6,-29,88,-80,94
C,2,21,-26,-13,-24
D,16,-1,3,51,19
E,30,49,-48,-99,-18


In [23]:
df = df.drop('new', axis=1)

In [24]:
df

Unnamed: 0,W,X,Y,Z
A,2,79,-8,-86
B,6,-29,88,-80
C,2,21,-26,-13
D,16,-1,3,51
E,30,49,-48,-99


In [25]:
df['A']

KeyError: 'A'

In [26]:
df.loc['A']

W     2
X    79
Y    -8
Z   -86
Name: A, dtype: int32

We can see columns and lines are series

V position with index

In [29]:
df.iloc[-1]

W    30
X    49
Y   -48
Z   -99
Name: E, dtype: int32

In [31]:
df.loc[['A', 'C'],['W', 'Y']]

Unnamed: 0,W,Y
A,2,-8
C,2,-26


In [32]:
df > 0

Unnamed: 0,W,X,Y,Z
A,True,True,False,False
B,True,False,True,False
C,True,True,False,False
D,True,False,True,True
E,True,True,False,False


In [33]:
df[df > 0]

Unnamed: 0,W,X,Y,Z
A,2,79.0,,
B,6,,88.0,
C,2,21.0,,
D,16,,3.0,51.0
E,30,49.0,,


Columns are features (can be filtered)

In [34]:
df['X'] > 0

A     True
B    False
C     True
D    False
E     True
Name: X, dtype: bool

In [37]:
df[df['X'] > 0]

Unnamed: 0,W,X,Y,Z
A,2,79,-8,-86
C,2,21,-26,-13
E,30,49,-48,-99


In [38]:
df[df['X'] > 0].iloc[0]

W     2
X    79
Y    -8
Z   -86
Name: A, dtype: int32

In [46]:
# and
#(df['W'] > 0) & (df['Y'] > 1)
# or
#(df['W'] > 0) | (df['Y'] > 1)
df[(df['W'] > 0) & (df['Y'] > 1)]

Unnamed: 0,W,X,Y,Z
B,6,-29,88,-80
D,16,-1,3,51


In [47]:
# or
df[(df['W'] > 0) | (df['Y'] > 1)]

Unnamed: 0,W,X,Y,Z
A,2,79,-8,-86
B,6,-29,88,-80
C,2,21,-26,-13
D,16,-1,3,51
E,30,49,-48,-99


In [48]:
df.reset_index()

Unnamed: 0,index,W,X,Y,Z
0,A,2,79,-8,-86
1,B,6,-29,88,-80
2,C,2,21,-26,-13
3,D,16,-1,3,51
4,E,30,49,-48,-99


In [49]:
new_ind = ['CA', 'NY', 'WY', 'OR', 'CO']

In [50]:
df['States'] = new_ind

In [51]:
df.set_index('States')

Unnamed: 0_level_0,W,X,Y,Z
States,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CA,2,79,-8,-86
NY,6,-29,88,-80
WY,2,21,-26,-13
OR,16,-1,3,51
CO,30,49,-48,-99


In [52]:
df

Unnamed: 0,W,X,Y,Z,States
A,2,79,-8,-86,CA
B,6,-29,88,-80,NY
C,2,21,-26,-13,WY
D,16,-1,3,51,OR
E,30,49,-48,-99,CO


In [53]:
df=df.set_index('States')

In [54]:
df

Unnamed: 0_level_0,W,X,Y,Z
States,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CA,2,79,-8,-86
NY,6,-29,88,-80
WY,2,21,-26,-13
OR,16,-1,3,51
CO,30,49,-48,-99


Becarefull, index isn't column, it's a spécial serial, but not column !

In [55]:
df.describe()

Unnamed: 0,W,X,Y,Z
count,5.0,5.0,5.0,5.0
mean,11.2,23.8,1.8,-45.4
std,11.96662,42.109381,51.915316,63.366395
min,2.0,-29.0,-48.0,-99.0
25%,2.0,-1.0,-26.0,-86.0
50%,6.0,21.0,-8.0,-80.0
75%,16.0,49.0,3.0,-13.0
max,30.0,79.0,88.0,51.0


In [57]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, CA to CO
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   W       5 non-null      int32
 1   X       5 non-null      int32
 2   Y       5 non-null      int32
 3   Z       5 non-null      int32
dtypes: int32(4)
memory usage: 120.0+ bytes


In [58]:
df.dtypes

W    int32
X    int32
Y    int32
Z    int32
dtype: object

In [59]:
type(df)

pandas.core.frame.DataFrame