Impure functions are functions that depend on and/or alter "things" outside the function's parameters/return value.
Examples include global variables, disk I/O, web service calls

In [1]:
global_counter = 0

def impure_increment():
    global global_counter
    global_counter += 1

print("Initial Counter:", global_counter)
impure_increment()
print("Result after first call:", global_counter)
impure_increment()
print("Result after second call:", global_counter)

Initial Counter: 0
Result after first call: 1
Result after second call: 2


In [2]:
def pure_increment(counter):
    counter += 1
    return counter

initial_counter = 0
result1 = pure_increment(initial_counter)
result2 = pure_increment(result1)

print("Initial Counter:", initial_counter)
print("Result after first call:", result1)
print("Result after second call:", result2)


Initial Counter: 0
Result after first call: 1
Result after second call: 2


There are sneaky "things" that you may not realize make a function impure.
For example, datetime.now

In [3]:
from datetime import datetime, timedelta

def add_days_to_date(days_to_add):
    current_datetime = datetime.now()
    new_date = current_datetime + timedelta(days=days_to_add)
    return new_date

days_to_add = 5
new_date = add_days_to_date(days_to_add)
print(f"New Date after adding {days_to_add} days:", new_date)

New Date after adding 5 days: 2023-12-03 07:55:54.658214


In [4]:
from datetime import datetime, timedelta

def add_days_to_date(input_date, days_to_add):
    new_date = input_date + timedelta(days=days_to_add)
    return new_date

current_date = datetime(2023, 1, 1)
days_to_add = 5
new_date = add_days_to_date(current_date, days_to_add)
print("Original Date:", current_date)
print(f"New Date after adding {days_to_add} days:", new_date)

Original Date: 2023-01-01 00:00:00
New Date after adding 5 days: 2023-01-06 00:00:00


Also when you split an impure function into two parts (pure and impure),
the covering function is still impure

In [5]:
import pandas as pd

def get_working_data(file_path):
    raw_data = pd.read_csv(file_path)
    cleaned_data = raw_data.dropna()
    return cleaned_data

file_path = 'shots_2022.csv'
data = get_working_data(file_path)

In [15]:
import pandas as pd

def get_data_from_disk(file_path):
    raw_data = pd.read_csv(file_path)
    return raw_data

def clean_data(raw_data):
    copied_data = raw_data.copy()
    cleaned_data = copied_data.dropna()
    return cleaned_data

def get_working_data(file_path):
    raw_data = get_data_from_disk(file_path)
    cleaned_data = clean_data(raw_data)
    return cleaned_data

file_path = 'shots_2022.csv'
data = get_working_data(file_path)

as a rule, unit test pure functions.  integration tests on impure functions.