In [3]:
import pandas as pd

# Defining the sample (and dummy) table
We generate some dummy data to work with.

In [6]:
# Define the data for the DataFrame
data = {
    'Date': ['1/1/22', '1/1/22', '1/1/22', '1/1/22', '2/1/22','5/1/22', '6/1/22', '7/1/22', '8/1/22', '9/1/22'],
    'Expenses': [143.5, 25, 25, 24, 3.75, 20, 5, 100, 24.5, 2],
    'Type': ['Gift', 'Clothes', 'Gaming', 'Clothes', 'Coffee', 'Gift', 'Clothes', 'Gaming', 'Clothes', 'Coffee']
}

# Create a DataFrame
df = pd.DataFrame(data)

# Convert the 'Date' column to datetime format
df['Date'] = pd.to_datetime(df['Date'], format='%d/%m/%y')

df.head(5)


Unnamed: 0,Date,Expenses,Type
0,2022-01-01,143.5,Gift
1,2022-01-01,25.0,Clothes
2,2022-01-01,25.0,Gaming
3,2022-01-01,24.0,Clothes
4,2022-01-02,3.75,Coffee


In [3]:
# Generate an extended dataframe with day, month and year. 
df_ext = df

# Extract the 'Day' and 'Year' from the 'Date' column
df_ext['Day'] = df['Date'].dt.day
df_ext['Month'] = df['Date'].dt.month
df_ext['Year'] = df['Date'].dt.year

# Create a DataFrame
df_ext

Unnamed: 0,Date,Expenses,Type,Day,Month,Year
0,2022-01-01,143.5,Gift,1,1,2022
1,2022-01-01,25.0,Clothes,1,1,2022
2,2022-01-01,25.0,Gaming,1,1,2022
3,2022-01-01,24.0,Clothes,1,1,2022
4,2022-01-02,3.75,Coffee,2,1,2022
5,2022-01-05,20.0,Gift,5,1,2022
6,2022-01-06,5.0,Clothes,6,1,2022
7,2022-01-07,100.0,Gaming,7,1,2022
8,2022-01-08,24.5,Clothes,8,1,2022
9,2022-01-09,2.0,Coffee,9,1,2022


# 1. Exploring our data

## head()
**head()**: Allows to get the N first rows of a dataframe. 

You can go check the post in the following [link](). 

In [4]:
df.head(5)

Unnamed: 0,Date,Expenses,Type,Day,Month,Year
0,2022-01-01,143.5,Gift,1,1,2022
1,2022-01-01,25.0,Clothes,1,1,2022
2,2022-01-01,25.0,Gaming,1,1,2022
3,2022-01-01,24.0,Clothes,1,1,2022
4,2022-01-02,3.75,Coffee,2,1,2022


## info(): 
**info()**: Allows to get a fast grasp of the dataframe with all the columns, the number of non-null registers and the type of each. 

You can go check the post in the following [link](). 

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 6 columns):
 #   Column    Non-Null Count  Dtype         
---  ------    --------------  -----         
 0   Date      10 non-null     datetime64[ns]
 1   Expenses  10 non-null     float64       
 2   Type      10 non-null     object        
 3   Day       10 non-null     int32         
 4   Month     10 non-null     int32         
 5   Year      10 non-null     int32         
dtypes: datetime64[ns](1), float64(1), int32(3), object(1)
memory usage: 488.0+ bytes


## describe(): 
**describe()**: Allows to get a fast description of the dataframe with all the selected columns. It will give us the count of records, together with: 
- mean value
- min value
- percentile 25, 50 and 70
- max value

You can go check the post in the following [link](). 

In [6]:
# Describe all
df.describe(include=('all'))

# Select only a specific type of data to describe (object, float,  int or datetime)
df.describe(include=(["datetime"]))

Unnamed: 0,Date
count,10
mean,2022-01-04 02:24:00
min,2022-01-01 00:00:00
25%,2022-01-01 00:00:00
50%,2022-01-03 12:00:00
75%,2022-01-06 18:00:00
max,2022-01-09 00:00:00


## query()
**Query():** It allows to select rows using a SQL-like query string. 

You can go check the post in the following [link](). 

In [7]:
df.query("""Expenses<25""")

Unnamed: 0,Date,Expenses,Type,Day,Month,Year
3,2022-01-01,24.0,Clothes,1,1,2022
4,2022-01-02,3.75,Coffee,2,1,2022
5,2022-01-05,20.0,Gift,5,1,2022
6,2022-01-06,5.0,Clothes,6,1,2022
8,2022-01-08,24.5,Clothes,8,1,2022
9,2022-01-09,2.0,Coffee,9,1,2022


## isnull()
**isnull():** It allows to spot all null data within our table.

Yet to come...

In [8]:
df.isnull()

Unnamed: 0,Date,Expenses,Type,Day,Month,Year
0,False,False,False,False,False,False
1,False,False,False,False,False,False
2,False,False,False,False,False,False
3,False,False,False,False,False,False
4,False,False,False,False,False,False
5,False,False,False,False,False,False
6,False,False,False,False,False,False
7,False,False,False,False,False,False
8,False,False,False,False,False,False
9,False,False,False,False,False,False


# 2. Cleaning the data

# 3. Transforming the data