# Notebook that explains the basics of DataFrames using Pandas lib.

In [2]:
import pandas as pd

In [9]:
# 0 is the initial value for all elements in the DataFrame
df1 = pd.DataFrame(0, index=['X', 'Y', 'Z'], columns=['C1', 'C2', 'C3'])
df1

Unnamed: 0,C1,C2,C3
X,0,0,0
Y,0,0,0
Z,0,0,0


In [10]:
# Reset the index of the DataFrame.
# It uses the default one instead.
df1.reset_index()

Unnamed: 0,index,C1,C2,C3
0,X,0,0,0
1,Y,0,0,0
2,Z,0,0,0


In [11]:
import numpy as np
from numpy.random import randn

### What is `numpy.random.seed()`?
It is a function in the NumPy library that sets the seed for generating random numbers. By specifying a seed value, the function ensures that the sequence of random numbers generated remains the same across multiple runs, providing deterministic behavior and allowing reproducibility in random number generation.

In [34]:
# We make the seed fix to give us the same results for each time we run the code.
np.random.seed(123)
# Put random number for each value in the DataFrame.
df2 = pd.DataFrame(randn(3, 3), index=['X', 'Y', 'Z'], columns=['C1', 'C2', 'C3'])
df2

Unnamed: 0,C1,C2,C3
X,-1.085631,0.997345,0.282978
Y,-1.506295,-0.5786,1.651437
Z,-2.426679,-0.428913,1.265936


## Dealing with columns

### Catch a specific column

In [15]:
df2['C1']

X   -0.866740
Y    1.491390
Z   -0.434351
Name: C1, dtype: float64

In [17]:
# Catch many columns but not all
df2[['C2', 'C3']]

Unnamed: 0,C2,C3
X,-0.678886,-0.094709
Y,-0.638902,-0.443982
Z,2.20593,2.186786


### Add new column

In [19]:
# Add a new column to our DataFrame.
df2['C4'] = df2['C1'] + df2['C2']
df2

Unnamed: 0,C1,C2,C3,C4
X,-0.86674,-0.678886,-0.094709,-1.545627
Y,1.49139,-0.638902,-0.443982,0.852488
Z,-0.434351,2.20593,2.186786,1.771579


## Dealing with rows

### Catch a specific row

In [21]:
# Catch it with the name
df2.loc['X']

C1   -0.866740
C2   -0.678886
C3   -0.094709
C4   -1.545627
Name: X, dtype: float64

In [23]:
# Catch it with the index of it
df2.iloc[0]

C1   -0.866740
C2   -0.678886
C3   -0.094709
C4   -1.545627
Name: X, dtype: float64

In [26]:
# Catch many rows [with loc]
df2.loc[['X', 'Y']]

Unnamed: 0,C1,C2,C3,C4
X,-0.86674,-0.678886,-0.094709,-1.545627
Y,1.49139,-0.638902,-0.443982,0.852488


In [27]:
# Catch many rows [with iloc]
df2.iloc[[0, 1]]

Unnamed: 0,C1,C2,C3,C4
X,-0.86674,-0.678886,-0.094709,-1.545627
Y,1.49139,-0.638902,-0.443982,0.852488


## Dealing with cells

In [30]:
# Using loc
df2.loc[['X', 'Z'], ['C1', 'C3']]

Unnamed: 0,C1,C3
X,-0.86674,-0.094709
Z,-0.434351,2.186786


In [31]:
# Using iloc
df2.iloc[[0, 2], [0, 2]]

Unnamed: 0,C1,C3
X,-0.86674,-0.094709
Z,-0.434351,2.186786


In [32]:
# Access a specific cell
df2.loc['X', 'C3']

-0.09470896893689112

## Drop

In [41]:
# Return DataFrame that doesn't contain the specific column
df2.drop('C1', axis=1)

Unnamed: 0,C2,C3
X,0.997345,0.282978
Y,-0.5786,1.651437
Z,-0.428913,1.265936


In [42]:
# Return DataFrame that doesn't contain the specific row
df2.drop('X', axis=0)

Unnamed: 0,C1,C2,C3
Y,-1.506295,-0.5786,1.651437
Z,-2.426679,-0.428913,1.265936


In [45]:
# Delete a specific column from the DataFrame
df2.drop('C1', axis=1, inplace=True)
df2

Unnamed: 0,C2,C3
X,0.997345,0.282978
Y,-0.5786,1.651437
Z,-0.428913,1.265936


In [48]:
# Delete a specific row from the DataFrame
df2.drop('Y', axis=0, inplace=True)
df2

Unnamed: 0,C2,C3
X,0.997345,0.282978
Z,-0.428913,1.265936


In [49]:
# Delete many columns
df2.drop(['C2', 'C3'], axis=1, inplace=True)
df2

X
Z


In [50]:
# Delete many rows
df2.drop(['X', 'Z'], axis=0, inplace=True)
df2  # Empty DataFrame