In [4]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random

### Create Data

In [5]:
# Create lists of lists, grades and number one column
location_list = ['Austin','Austin','San Antonio','Laredo','Dallas','El Paso','Chicago','Naperville','Springfield','Miami','Orlando','Austin','Austin','San Antonio','Laredo','Dallas','El Paso','Chicago','Naperville','Springfield','Miami','Orlando','Austin','Austin','San Antonio','Laredo','Dallas','El Paso','Chicago','Naperville','Springfield','Miami','Orlando']
state_list = ['TX','TX','TX','TX','TX','TX','IL','IL','IL','FL','FL','TX','TX','TX','TX','TX','TX','IL','IL','IL','FL','FL','TX','TX','TX','TX','TX','TX','IL','IL','IL','FL','FL']
name_list = ['Juan','Ellie','George','Maria','Test','Gina','Andrew','Joe','Mary','Tabelon','Wuda','Juan','Ellie','George','Maria','Test','Gina','Andrew','Joe','Mary','Tabelon','Wuda','Juan','Ellie','George','Maria','Test','Gina','Andrew','Joe','Mary','Tabelon','Wuda']
lastname_list = ['Martinez','Smith','Garcia','Logit','Account','Perdue','Lablanc','Dile','Smith','Legruti','Jenkins','Martinez','Smith','Garcia','Logit','Account','Perdue','Lablanc','Dile','Smith','Legruti','Jenkins','Martinez','Smith','Garcia','Logit','Account','Perdue','Lablanc','Dile','Smith','Legruti','Jenkins']
challenge_grades = [100,88,100,100,55,75,81,77,32,100,30,99,95,66,100,60,86,83,45,75,95,0,70,100,83,100,90,65,55,81,99,81,75]
challenge_number = [1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3]

# Create dictionary of lists
class_data = { 'location': location_list,
               'state': state_list,
               'name': name_list,
               'lastname': lastname_list,
               'challenge_grades': challenge_grades,
               'challenge_number': challenge_number
             }

# create df
df = pd.DataFrame(data=class_data)
df.sort_values(by=["name","challenge_number"]).head(5)

Unnamed: 0,location,state,name,lastname,challenge_grades,challenge_number
6,Chicago,IL,Andrew,Lablanc,81,1
17,Chicago,IL,Andrew,Lablanc,83,2
28,Chicago,IL,Andrew,Lablanc,55,3
1,Austin,TX,Ellie,Smith,88,1
12,Austin,TX,Ellie,Smith,95,2


In [6]:
# Utility function to generate random date
def generate_random_date():
    """Return random date between start_date and end_date"""
    # First class
    start_date = datetime(2021, 10, 19)
    # Current class
    end_date = datetime(2021, 11, 18)
    # Calculate days between dates
    time_between_dates = end_date - start_date
    days_between_dates = time_between_dates.days
    # Generate random number of days between those two dates
    random_number_of_days = random.randrange(days_between_dates)    
    # Use delta function to add date to start_date
    random_date = start_date + timedelta(days=random_number_of_days)
    return random_date


In [20]:
generate_random_date()

datetime.datetime(2021, 11, 4, 0, 0)

For each record, generate a random submission date

In [8]:
# Generate random submission date
df['submission_date'] = [generate_random_date() for n in range(len(df))]
# Create day of the week based on submission_date
df['day_of_week'] = df['submission_date'].dt.day_name()

In [22]:
df.head()

Unnamed: 0,location,state,name,lastname,challenge_grades,challenge_number,submission_date,day_of_week
0,Austin,TX,Juan,Martinez,100,1,2021-10-22,Friday
1,Austin,TX,Ellie,Smith,88,1,2021-10-29,Friday
2,San Antonio,TX,George,Garcia,100,1,2021-11-04,Thursday
3,Laredo,TX,Maria,Logit,100,1,2021-10-27,Wednesday
4,Dallas,TX,Test,Account,55,1,2021-11-08,Monday


In [24]:
df.dtypes

location                    object
state                       object
name                        object
lastname                    object
challenge_grades             int64
challenge_number             int64
submission_date     datetime64[ns]
day_of_week                 object
dtype: object

In [10]:
# Find earliest submission date
df['submission_date'].min()

Timestamp('2021-10-19 00:00:00')

In [11]:
# Find latest submission date
df['submission_date'].max()

Timestamp('2021-11-14 00:00:00')

In [12]:
# Number of days between earliest and latest
df['submission_date'].max() - df['submission_date'].min() 

Timedelta('26 days 00:00:00')

#### Filter using str dates

In [26]:
# Filter by anything after october
filter_by_date = df['submission_date'] >= '2021-11'
df_filtered = df.loc[filter_by_date]
print(f"Number of records:{len(df_filtered)}")
df_filtered.head()

Number of records:18


Unnamed: 0,location,state,name,lastname,challenge_grades,challenge_number,submission_date,day_of_week
2,San Antonio,TX,George,Garcia,100,1,2021-11-04,Thursday
4,Dallas,TX,Test,Account,55,1,2021-11-08,Monday
6,Chicago,IL,Andrew,Lablanc,81,1,2021-11-12,Friday
8,Springfield,IL,Mary,Smith,32,1,2021-11-07,Sunday
11,Austin,TX,Juan,Martinez,99,2,2021-11-11,Thursday


In [14]:
# Filter by anything between 11/1/21 and 11/10/21
filter_by_date = (df['submission_date'] >= '2021-11-01') & (df['submission_date'] <= '2021-11-10')
df_filtered = df.loc[filter_by_date]
print(f"Number of records:{len(df_filtered)}")
df_filtered.head()

Number of records:11


Unnamed: 0,location,state,name,lastname,challenge_grades,challenge_number,submission_date,day_of_week
2,San Antonio,TX,George,Garcia,100,1,2021-11-04,Thursday
4,Dallas,TX,Test,Account,55,1,2021-11-08,Monday
8,Springfield,IL,Mary,Smith,32,1,2021-11-07,Sunday
15,Dallas,TX,Test,Account,60,2,2021-11-01,Monday
16,El Paso,TX,Gina,Perdue,86,2,2021-11-02,Tuesday


#### Filter using pd.datetime dates

In [15]:
# Filter by anything after 11/1/21
#filter_by_date = df['submission_date'] >= datetime(2021, 11, 1) 
#filter_by_date = df['submission_date'] >= datetime.strptime('11/01/2021', '%m/%d/%Y') 
filter_by_date = df['submission_date'] >= pd.to_datetime('11/01/2021') 
df_filtered = df.loc[filter_by_date]
print(f"Number of records:{len(df_filtered)}")
df_filtered.head()

Number of records:18


Unnamed: 0,location,state,name,lastname,challenge_grades,challenge_number,submission_date,day_of_week
2,San Antonio,TX,George,Garcia,100,1,2021-11-04,Thursday
4,Dallas,TX,Test,Account,55,1,2021-11-08,Monday
6,Chicago,IL,Andrew,Lablanc,81,1,2021-11-12,Friday
8,Springfield,IL,Mary,Smith,32,1,2021-11-07,Sunday
11,Austin,TX,Juan,Martinez,99,2,2021-11-11,Thursday


# strftime format 
https://strftime.org/

### dateutil

Functions that extend datetime module

https://dateutil.readthedocs.io/en/stable/index.html

In [16]:
!pip install python-dateutil



In [17]:
from dateutil.relativedelta import *
import calendar
from dateutil.parser import *
from dateutil.tz import *

In [18]:
datetime.now()

datetime.datetime(2021, 11, 18, 20, 31, 34, 244996)

In [28]:
datetime.today()

datetime.datetime(2021, 11, 18, 20, 37, 10, 407675)

In [29]:
# Next month
datetime.now()+relativedelta(months=+1)

datetime.datetime(2021, 12, 18, 20, 37, 26, 810980)

In [None]:
# Next month, plus one week
right_now = datetime.now()
right_now+relativedelta(months=+1, weeks=+1)

In [None]:
# Relative Delta since class started 
classs_first_date = datetime(2021, 10, 19, 18, 0)
relativedelta(datetime.now(), classs_first_date)


In [30]:
parse("2021-11-18")

datetime.datetime(2021, 11, 18, 0, 0)

In [31]:
parse("2021-11-18 8:15PM")

datetime.datetime(2021, 11, 18, 20, 15)

In [32]:
parse("11 November 2021")

datetime.datetime(2021, 11, 11, 0, 0)

In [33]:
parse("11 November 2021 at 5pm")

datetime.datetime(2021, 11, 11, 17, 0)

In [34]:
parse("11:00pm")

datetime.datetime(2021, 11, 18, 23, 0)

In [36]:
parse("5:50 A.M. on June 13, 2021")

datetime.datetime(2021, 6, 13, 5, 50)