In [2]:
import pandas as pd
import numpy as np
from numpy.random import randn
np.random.seed(2023)

In [3]:
# Create a dataframe
# Sintax: pd.DataFrame(data=None,index=None,columns=None)

df = pd.DataFrame(randn(5,4), index = ['A','B','C','D','E'], columns = ['W','X','Y','Z'])
df = pd.DataFrame(randn(5,4),['A','B','C','D','E'],['W','X','Y','Z'])

In [4]:
df

Unnamed: 0,W,X,Y,Z
A,0.711674,-0.324485,-1.001871,0.236251
B,-0.10216,-1.141293,2.654407,1.440605
C,0.098902,-3.121532,-1.076522,-0.325682
D,-1.035498,-0.42632,-1.029361,-0.521774
E,-0.422758,-0.099244,-1.380258,0.301903


In [5]:
# Get the size of DataFrame
df.shape

(5, 4)

In [6]:
type(df)

pandas.core.frame.DataFrame

In [7]:
df.dtypes

W    float64
X    float64
Y    float64
Z    float64
dtype: object

In [8]:
# Reverse the dataframe
df[::-1]

Unnamed: 0,W,X,Y,Z
E,-0.422758,-0.099244,-1.380258,0.301903
D,-1.035498,-0.42632,-1.029361,-0.521774
C,0.098902,-3.121532,-1.076522,-0.325682
B,-0.10216,-1.141293,2.654407,1.440605
A,0.711674,-0.324485,-1.001871,0.236251


# Columns
Each column of the Dataframe is actually a pandas series

In [9]:
# Display one column
df['Z']

A    0.236251
B    1.440605
C   -0.325682
D   -0.521774
E    0.301903
Name: Z, dtype: float64

In [10]:
type(df['W'])

pandas.core.series.Series

In [11]:
# Display many columns, we specify a list contains name of the columns to be diplayed
df[['W','Z']]

Unnamed: 0,W,Z
A,0.711674,0.236251
B,-0.10216,1.440605
C,0.098902,-0.325682
D,-1.035498,-0.521774
E,-0.422758,0.301903


In [12]:
# Add a new column to the DataFrame
df['new']=df['W'] + df['Z']   

In [13]:
df

Unnamed: 0,W,X,Y,Z,new
A,0.711674,-0.324485,-1.001871,0.236251,0.947924
B,-0.10216,-1.141293,2.654407,1.440605,1.338445
C,0.098902,-3.121532,-1.076522,-0.325682,-0.22678
D,-1.035498,-0.42632,-1.029361,-0.521774,-1.557272
E,-0.422758,-0.099244,-1.380258,0.301903,-0.120855


In [14]:
# Remove a column of the DataFrame
df.drop('new',axis=1)
df.drop('new',axis=1,inplace=False)

Unnamed: 0,W,X,Y,Z
A,0.711674,-0.324485,-1.001871,0.236251
B,-0.10216,-1.141293,2.654407,1.440605
C,0.098902,-3.121532,-1.076522,-0.325682
D,-1.035498,-0.42632,-1.029361,-0.521774
E,-0.422758,-0.099244,-1.380258,0.301903


In [15]:
df

Unnamed: 0,W,X,Y,Z,new
A,0.711674,-0.324485,-1.001871,0.236251,0.947924
B,-0.10216,-1.141293,2.654407,1.440605,1.338445
C,0.098902,-3.121532,-1.076522,-0.325682,-0.22678
D,-1.035498,-0.42632,-1.029361,-0.521774,-1.557272
E,-0.422758,-0.099244,-1.380258,0.301903,-0.120855


In [16]:
# Remove many columns of the DataFrame
df.drop(['W','X'],axis=1) 

Unnamed: 0,Y,Z,new
A,-1.001871,0.236251,0.947924
B,2.654407,1.440605,1.338445
C,-1.076522,-0.325682,-0.22678
D,-1.029361,-0.521774,-1.557272
E,-1.380258,0.301903,-0.120855


In [17]:
df

Unnamed: 0,W,X,Y,Z,new
A,0.711674,-0.324485,-1.001871,0.236251,0.947924
B,-0.10216,-1.141293,2.654407,1.440605,1.338445
C,0.098902,-3.121532,-1.076522,-0.325682,-0.22678
D,-1.035498,-0.42632,-1.029361,-0.521774,-1.557272
E,-0.422758,-0.099244,-1.380258,0.301903,-0.120855


In [18]:
# Remove a column of the DataFrame once forever
df.drop('new',axis=1,inplace=True)

In [19]:
df

Unnamed: 0,W,X,Y,Z
A,0.711674,-0.324485,-1.001871,0.236251
B,-0.10216,-1.141293,2.654407,1.440605
C,0.098902,-3.121532,-1.076522,-0.325682
D,-1.035498,-0.42632,-1.029361,-0.521774
E,-0.422758,-0.099244,-1.380258,0.301903


# Rows

In [20]:
# Display a row, we specify the index
df.loc['A']

W    0.711674
X   -0.324485
Y   -1.001871
Z    0.236251
Name: A, dtype: float64

In [21]:
# Display a row, we specify the location of index
df.iloc[0]

W    0.711674
X   -0.324485
Y   -1.001871
Z    0.236251
Name: A, dtype: float64

In [22]:
# Remove a row of the DataFrame
df.drop('A')
df.drop('A',axis=0)

Unnamed: 0,W,X,Y,Z
B,-0.10216,-1.141293,2.654407,1.440605
C,0.098902,-3.121532,-1.076522,-0.325682
D,-1.035498,-0.42632,-1.029361,-0.521774
E,-0.422758,-0.099244,-1.380258,0.301903


In [23]:
df

Unnamed: 0,W,X,Y,Z
A,0.711674,-0.324485,-1.001871,0.236251
B,-0.10216,-1.141293,2.654407,1.440605
C,0.098902,-3.121532,-1.076522,-0.325682
D,-1.035498,-0.42632,-1.029361,-0.521774
E,-0.422758,-0.099244,-1.380258,0.301903


In [24]:
# # Remove many rows of the DataFrame
df.drop(['A','B'],axis=0)

Unnamed: 0,W,X,Y,Z
C,0.098902,-3.121532,-1.076522,-0.325682
D,-1.035498,-0.42632,-1.029361,-0.521774
E,-0.422758,-0.099244,-1.380258,0.301903


In [25]:
df

Unnamed: 0,W,X,Y,Z
A,0.711674,-0.324485,-1.001871,0.236251
B,-0.10216,-1.141293,2.654407,1.440605
C,0.098902,-3.121532,-1.076522,-0.325682
D,-1.035498,-0.42632,-1.029361,-0.521774
E,-0.422758,-0.099244,-1.380258,0.301903


In [26]:
# DataFrame works as numpy
df.loc['B','Y']

2.654407260948932

In [27]:
# Select a subset DataFrame
df.loc[['A','B'],['W','Y']]

Unnamed: 0,W,Y
A,0.711674,-1.001871
B,-0.10216,2.654407
