# Calculations: Numeric and Date  
- [Numeric Columns](#Numeric-Columns)  
  - [Calculations Done Row by Row](#Calculations-Done-Row-by-Row)  
  - [Calculations Done on Entire Column/Series](#Calculations-Done-on-Entire-Column/Series)


- [Date Columns](#Date-Columns)  
  - [Create new Columns based on a DateTime column](#Create-new-Columns-based-on-a-DateTime-column)  
  - [Subset based on Date](#Subset-based-on-Date)

In [1]:
import pandas as pd

# Numeric Columns

In [2]:
# Read the csv file
df_shoes = pd.read_csv("Data/ShoeData_Small.csv")

# Display the first five rows of the dataframe
df_shoes.head()

Unnamed: 0,OrderID,Order Date,Product,Size (US),Colour,Price ($),Quantity
0,10,04/01/2015,Boots,12,Black,25,5
1,7,03/01/2015,Boots,10,Black,25,1
2,8,03/01/2015,Boots,9,Black,25,10
3,11,05/01/2015,Boots,7,Black,25,22
4,2,02/01/2015,Boots,4,Black,25,20


#### Do Initial Cleaning of Data

In [3]:
# Display the column names of the dataframe
df_shoes.columns

Index(['OrderID', 'Order Date', 'Product', 'Size (US)', 'Colour', 'Price ($)',
       'Quantity'],
      dtype='object')

In [4]:
# Rename Size (US) and Price ($) columns to:  Size and Price
df_shoes = df_shoes.rename({'Price ($)': 'Price',
                'Size (US)': 'Size'}, axis='columns')
df_shoes.head()

Unnamed: 0,OrderID,Order Date,Product,Size,Colour,Price,Quantity
0,10,04/01/2015,Boots,12,Black,25,5
1,7,03/01/2015,Boots,10,Black,25,1
2,8,03/01/2015,Boots,9,Black,25,10
3,11,05/01/2015,Boots,7,Black,25,22
4,2,02/01/2015,Boots,4,Black,25,20


In [5]:
# Display the data types of the columns
df_shoes.dtypes

OrderID        int64
Order Date    object
Product       object
Size           int64
Colour        object
Price          int64
Quantity       int64
dtype: object

In [6]:
# Change data type of Price to float and Order Date to datetime and check
df_shoes['Price'] = pd.to_numeric(df_shoes['Price']).astype(float)
df_shoes['Order Date'] = pd.to_datetime(df_shoes['Order Date'])
df_shoes.dtypes

OrderID                int64
Order Date    datetime64[ns]
Product               object
Size                   int64
Colour                object
Price                float64
Quantity               int64
dtype: object

### Calculations Done Row by Row

In [7]:
# Create new column with a calculation
df_shoes['order_subtotal'] = df_shoes['Price'] * df_shoes['Quantity']
df_shoes.head(3)

Unnamed: 0,OrderID,Order Date,Product,Size,Colour,Price,Quantity,order_subtotal
0,10,2015-04-01,Boots,12,Black,25.0,5,125.0
1,7,2015-03-01,Boots,10,Black,25.0,1,25.0
2,8,2015-03-01,Boots,9,Black,25.0,10,250.0


In [8]:
# Create new column with a calculation
df_shoes['estimated_sales_tax'] = df_shoes['order_subtotal'] * .10
df_shoes.head(3)

Unnamed: 0,OrderID,Order Date,Product,Size,Colour,Price,Quantity,order_subtotal,estimated_sales_tax
0,10,2015-04-01,Boots,12,Black,25.0,5,125.0,12.5
1,7,2015-03-01,Boots,10,Black,25.0,1,25.0,2.5
2,8,2015-03-01,Boots,9,Black,25.0,10,250.0,25.0


### Calculations Done on Entire Column/Series

In [9]:
# Sum the entire estimated_sales_tax column and display the value
total_sales_tax = df_shoes['estimated_sales_tax'].sum()

#print("Estimated Sales Tax Total:  ", total_sales_tax)
print(f'Estimated Sales Tax Total:  ${total_sales_tax:,.2f}')

Estimated Sales Tax Total:  $240.00


In [10]:
# Sum the entire Quantity column and display the value
total_shoes_ordered = df_shoes['Quantity'].sum()
print("Total Number of Shoes Ordered:  ", total_shoes_ordered)

Total Number of Shoes Ordered:   113


In [11]:
# Count the number of OrderIDs and display
number_of_orders = df_shoes['OrderID'].count()
print("Number of Orders:  ", number_of_orders)

Number of Orders:   12


In [12]:
# Sum of the order subtotal and display the value
total_sales = df_shoes['order_subtotal'].sum()
#print('Total Sales are ', total_sales)
print(f'Total Sales are   ${total_sales:,.2f}')

Total Sales are   $2,400.00


In [13]:
# Calculate the Average sale and display the value
average_order = total_sales / number_of_orders
#print('The Average Order Amount is ', average_order)
print(f'The Average Order Amount is ${average_order:,.2f}')

The Average Order Amount is $200.00


# Date Columns

# Create new Columns based on a DateTime column  
![Datetime Properties](Images/DatetimeProperties.png)

### Create new year column

In [14]:
df_shoes['year'] = df_shoes['Order Date'].dt.year

df_shoes.head(3)

Unnamed: 0,OrderID,Order Date,Product,Size,Colour,Price,Quantity,order_subtotal,estimated_sales_tax,year
0,10,2015-04-01,Boots,12,Black,25.0,5,125.0,12.5,2015
1,7,2015-03-01,Boots,10,Black,25.0,1,25.0,2.5,2015
2,8,2015-03-01,Boots,9,Black,25.0,10,250.0,25.0,2015


### Create Month Number and Name columns

In [15]:
df_shoes['month_number'] = df_shoes['Order Date'].dt.month
df_shoes['month_name'] = df_shoes['Order Date'].dt.strftime('%b')

df_shoes.head(3)

Unnamed: 0,OrderID,Order Date,Product,Size,Colour,Price,Quantity,order_subtotal,estimated_sales_tax,year,month_number,month_name
0,10,2015-04-01,Boots,12,Black,25.0,5,125.0,12.5,2015,4,Apr
1,7,2015-03-01,Boots,10,Black,25.0,1,25.0,2.5,2015,3,Mar
2,8,2015-03-01,Boots,9,Black,25.0,10,250.0,25.0,2015,3,Mar


### Create new Quarter column

In [16]:
df_shoes['quarter'] = df_shoes['Order Date'].dt.to_period("Q")

df_shoes.head(3)

Unnamed: 0,OrderID,Order Date,Product,Size,Colour,Price,Quantity,order_subtotal,estimated_sales_tax,year,month_number,month_name,quarter
0,10,2015-04-01,Boots,12,Black,25.0,5,125.0,12.5,2015,4,Apr,2015Q2
1,7,2015-03-01,Boots,10,Black,25.0,1,25.0,2.5,2015,3,Mar,2015Q1
2,8,2015-03-01,Boots,9,Black,25.0,10,250.0,25.0,2015,3,Mar,2015Q1


### Create weekday_number and weekday_name columns

In [17]:
df_shoes['weekday_number'] = df_shoes['Order Date'].dt.weekday
df_shoes['weekday_name'] = df_shoes['Order Date'].dt.weekday_name

# Display top rows
df_shoes.head(3)

Unnamed: 0,OrderID,Order Date,Product,Size,Colour,Price,Quantity,order_subtotal,estimated_sales_tax,year,month_number,month_name,quarter,weekday_number,weekday_name
0,10,2015-04-01,Boots,12,Black,25.0,5,125.0,12.5,2015,4,Apr,2015Q2,2,Wednesday
1,7,2015-03-01,Boots,10,Black,25.0,1,25.0,2.5,2015,3,Mar,2015Q1,6,Sunday
2,8,2015-03-01,Boots,9,Black,25.0,10,250.0,25.0,2015,3,Mar,2015Q1,6,Sunday


# Subset based on Date

In [18]:
# Display all rows of the df_shoes dataframe
df_shoes

Unnamed: 0,OrderID,Order Date,Product,Size,Colour,Price,Quantity,order_subtotal,estimated_sales_tax,year,month_number,month_name,quarter,weekday_number,weekday_name
0,10,2015-04-01,Boots,12,Black,25.0,5,125.0,12.5,2015,4,Apr,2015Q2,2,Wednesday
1,7,2015-03-01,Boots,10,Black,25.0,1,25.0,2.5,2015,3,Mar,2015Q1,6,Sunday
2,8,2015-03-01,Boots,9,Black,25.0,10,250.0,25.0,2015,3,Mar,2015Q1,6,Sunday
3,11,2015-05-01,Boots,7,Black,25.0,22,550.0,55.0,2015,5,May,2015Q2,4,Friday
4,2,2015-02-01,Boots,4,Black,25.0,20,500.0,50.0,2015,2,Feb,2015Q1,6,Sunday
5,1,2015-02-01,Boots,7,Gold,25.0,1,25.0,2.5,2015,2,Feb,2015Q1,6,Sunday
6,6,2015-03-01,Boots,5,Gold,25.0,2,50.0,5.0,2015,3,Mar,2015Q1,6,Sunday
7,3,2015-02-01,Boots,6,Red,25.0,12,300.0,30.0,2015,2,Feb,2015Q1,6,Sunday
8,5,2015-03-01,Boots,6,Silver,25.0,15,375.0,37.5,2015,3,Mar,2015Q1,6,Sunday
9,4,2015-03-01,House Slippers,11,Black,8.0,10,80.0,8.0,2015,3,Mar,2015Q1,6,Sunday


In [19]:
# Display data types for df_Shoe
df_shoes.dtypes

OrderID                         int64
Order Date             datetime64[ns]
Product                        object
Size                            int64
Colour                         object
Price                         float64
Quantity                        int64
order_subtotal                float64
estimated_sales_tax           float64
year                            int64
month_number                    int64
month_name                     object
quarter                 period[Q-DEC]
weekday_number                  int64
weekday_name                   object
dtype: object

In [20]:
# Convert Order Date to DateTime and check
df_shoes['Order Date'] = pd.to_datetime(df_shoes['Order Date'])
df_shoes.dtypes

OrderID                         int64
Order Date             datetime64[ns]
Product                        object
Size                            int64
Colour                         object
Price                         float64
Quantity                        int64
order_subtotal                float64
estimated_sales_tax           float64
year                            int64
month_number                    int64
month_name                     object
quarter                 period[Q-DEC]
weekday_number                  int64
weekday_name                   object
dtype: object

In [21]:
# Display the number of rows
print("The number of Rows in df_shoes are:  ", df_shoes.shape[0])

The number of Rows in df_shoes are:   12


#### Selected Quarter Data

In [22]:
# Create Timestamp and create a new subsetted dataframe using that
#ts = pd.to_datetime('4/1/2015')
ts = pd.to_datetime('2015/04/01')
ts

Timestamp('2015-04-01 00:00:00')

In [23]:
# Create the filter to subset for Q1 data only (i.e, Date < April 1)
filter = df_shoes['Order Date'] < ts

# Create a new df_Q1 dataframe with just First Quarter Sales 
df_Q1 = df_shoes[filter]
df_Q1

Unnamed: 0,OrderID,Order Date,Product,Size,Colour,Price,Quantity,order_subtotal,estimated_sales_tax,year,month_number,month_name,quarter,weekday_number,weekday_name
1,7,2015-03-01,Boots,10,Black,25.0,1,25.0,2.5,2015,3,Mar,2015Q1,6,Sunday
2,8,2015-03-01,Boots,9,Black,25.0,10,250.0,25.0,2015,3,Mar,2015Q1,6,Sunday
4,2,2015-02-01,Boots,4,Black,25.0,20,500.0,50.0,2015,2,Feb,2015Q1,6,Sunday
5,1,2015-02-01,Boots,7,Gold,25.0,1,25.0,2.5,2015,2,Feb,2015Q1,6,Sunday
6,6,2015-03-01,Boots,5,Gold,25.0,2,50.0,5.0,2015,3,Mar,2015Q1,6,Sunday
7,3,2015-02-01,Boots,6,Red,25.0,12,300.0,30.0,2015,2,Feb,2015Q1,6,Sunday
8,5,2015-03-01,Boots,6,Silver,25.0,15,375.0,37.5,2015,3,Mar,2015Q1,6,Sunday
9,4,2015-03-01,House Slippers,11,Black,8.0,10,80.0,8.0,2015,3,Mar,2015Q1,6,Sunday
10,9,2015-03-01,House Slippers,8,Black,8.0,5,40.0,4.0,2015,3,Mar,2015Q1,6,Sunday


In [24]:
# Display the number of rows
print("The number of Rows in df_Q1 are:  ", df_Q1.shape[0])

The number of Rows in df_Q1 are:   9


#### Selected Month Total

In [25]:
# Display df_shoes columns
df_shoes.columns

Index(['OrderID', 'Order Date', 'Product', 'Size', 'Colour', 'Price',
       'Quantity', 'order_subtotal', 'estimated_sales_tax', 'year',
       'month_number', 'month_name', 'quarter', 'weekday_number',
       'weekday_name'],
      dtype='object')

In [26]:
# Create a month_number and month_name colum in df_Shoe
df_shoes['month_number'] = df_shoes['Order Date'].dt.month
df_shoes['month_name'] = df_shoes['Order Date'].dt.strftime('%b')

# Display top rows
df_shoes.head()

Unnamed: 0,OrderID,Order Date,Product,Size,Colour,Price,Quantity,order_subtotal,estimated_sales_tax,year,month_number,month_name,quarter,weekday_number,weekday_name
0,10,2015-04-01,Boots,12,Black,25.0,5,125.0,12.5,2015,4,Apr,2015Q2,2,Wednesday
1,7,2015-03-01,Boots,10,Black,25.0,1,25.0,2.5,2015,3,Mar,2015Q1,6,Sunday
2,8,2015-03-01,Boots,9,Black,25.0,10,250.0,25.0,2015,3,Mar,2015Q1,6,Sunday
3,11,2015-05-01,Boots,7,Black,25.0,22,550.0,55.0,2015,5,May,2015Q2,4,Friday
4,2,2015-02-01,Boots,4,Black,25.0,20,500.0,50.0,2015,2,Feb,2015Q1,6,Sunday


In [27]:
# Create df_feb_shoes: Only sales in February

# Build filter
filter = df_shoes['month_number'] == 2

# Subset the df_Show dataframe using the filter
df_feb_shoes = df_shoes[filter]

# Display the top rows
df_feb_shoes

Unnamed: 0,OrderID,Order Date,Product,Size,Colour,Price,Quantity,order_subtotal,estimated_sales_tax,year,month_number,month_name,quarter,weekday_number,weekday_name
4,2,2015-02-01,Boots,4,Black,25.0,20,500.0,50.0,2015,2,Feb,2015Q1,6,Sunday
5,1,2015-02-01,Boots,7,Gold,25.0,1,25.0,2.5,2015,2,Feb,2015Q1,6,Sunday
7,3,2015-02-01,Boots,6,Red,25.0,12,300.0,30.0,2015,2,Feb,2015Q1,6,Sunday


In [28]:
# Calculate the sum of February sales and display
feb_sales = df_feb_shoes['order_subtotal'].sum()

print("Our February Sales were: ", feb_sales)

Our February Sales were:  825.0
