# 1. Naming conventions

In [48]:
# Bad Example

numbers = [-10, -7, -3, 0, 1, 5, 7, 9]

# In Python variables and functions should be named using snake_case
# CamelCase should only be applied to class names
#
# data is too generic name
def splitNumbers(data):
    # x, y are clear only for a short period while implementing the function
    #
    # If you come back to this code after a few months, you will need to perform
    # some reverse engineering unnecessarily
    x = []
    y = []
    
    for i in range(len(data)):
        if data[i] <= 0:
            x.append(data[i])
        else:
            y.append(data[i])
            
    return x, y

splitNumbers(numbers)

([-10, -7, -3, 0], [1, 5, 7, 9])

In [4]:
# Good Example - Better naming

# Follows snake_case for function names
#
# "data" argument renamed to numbers which represents the expectation better
def split_numbers(numbers):
    # clear variable names
    non_positives = []
    positives = []
    
    for i in range(len(numbers)):
        if numbers[i] <= 0:
            non_positives.append(numbers[i])
        else:
            positives.append(numbers[i])
            
    return non_positives, positives

split_numbers(numbers)

([-10, -7, -3, 0], [1, 5, 7, 9])

# 2. Unpacking values from a tuple/list

In [5]:
# Bad example

user = (134325, "admin", "John")

def is_admin(user):
    # For a reader magic number, like 1 here will be difficult to understand
    return user[1] == "admin"

is_admin(user)

True

In [None]:
# Improved example

def is_admin(user):
    # If you have to use indexes directly, at least assign them to descriptively named variables,
    # that also makes them more reusable and easier to change (in case the tuple structure has changed)
    ROLE = 1
    return user[ROLE] == "admin"

is_admin(user)

In [6]:
# Good example

def is_admin(user):
    # Tuple unpacking, _ can be used to indicate a placeholder (values we don't need at a moment)
    _, role, _ = user
    return role == "admin"

is_admin(user)

True

In [50]:
# Unpacking to conrete variable names is practical only to some extent

# If there is a lot of items to unpack and we need just some of them, there is a better way
profile = (134325, "admin", "John", "Zurich", "Programmer", 30)

# We would read just the first value and place all of the other elements in a list
user_id, *details = profile

user_id, details

(134325, ['admin', 'John', 'Zurich', 'Programmer', 30])

In [51]:
# In case you don't need the other values, use the placeholder character as in the previous example
user_id, *_ = profile

user_id, _

(134325, ['admin', 'John', 'Zurich', 'Programmer', 30])

# 3. Simplify IF statements

In [11]:
# Bad Example
def delete_item(user, item):
    # == True is redundant as is_admin() result is a boolean value already
    if is_admin(user) == True:
        ...
    else:
        raise Exception

In [None]:
def delete_item(user, item):
    # Anything that is a boolean or can be casted to boolean is ok
    if is_admin(user):
        ...
    else:
        raise Exception

In [16]:
# No need to use len for checking emptiness of the list
if len(numbers) == 0:
    pass

In [18]:
# Placing a list directly in the condition will be evaluated correctly
if not numbers:
    pass

In [19]:
# Be aware of numeric value that can be None as well
def compute_formula(x, optional_y):
    # Let's say you want to reach this if branch only if optiona_y=None
    # 0 is going to be evaluated to False unfortunately
    if not optional_y:
        ...
    else:
        ...

In [None]:
def compute_formula(x, optional_y):
    # In such case None check is a better choice
    if optional_y is None:
        ...
    else:
        ...

# 4. Simplify Loops

In [20]:
# Bad Example - Too complicated for loop construct

def split_numbers(numbers):
    non_positives = []
    positives = []

    # In other programming languages, such a loop could look like this
    # for (int i=0; i < numbers.length; i++) {
    #   if (numbers[i] <= 0) {
    #   ...
    # }
    # 
    # Translating such a construct to Python directly is not optimal.
    # For in range loop should be used only for more advanced scenarios, in which
    # "for item in iterable" is insufficient, usually some algorithms
    for i in range(len(numbers)):
        if numbers[i] <= 0:
            non_positives.append(numbers[i])
        else:
            positives.append(numbers[i])
            
    return non_positives, positives


In [None]:
# Good example - simple for-each loop

def split_numbers(numbers):
    non_positives = []
    positives = []
    
    # "For-each" equivalent, much cleaner
    # Saves us from having to access the list elements by the index
    for number in numbers:
        if number <= 0:
            non_positives.append(number)
        else:
            positives.append(number)
            
    return non_positives, positives

In [21]:
# Good example - simple for-each + tracking the index anyway

def split_numbers(numbers):
    non_positives = []
    positives = []
    
    # Enumerate is a generator that renders tuples (index, item)
    # We can conveniently unpack them on the loop level
    for index, number in enumerate(numbers):
        if number <= 0:
            non_positives.append(number)
        else:
            positives.append(number)
            
    return non_positives, positives

# 5. Iteration patterns - multiple lists

In [None]:
# Bad Example - too complicated iteration over 2 lists

# Let's say x and y represent values in 2 columns
# We would like to add them, but need to iterate over 2 lists simultaneously
# One 
def compute_formula(x, y):
    for i in range(len(x)):
        x[i] + y[i]

In [23]:
# Good Example - using zip to iterate over multiple lists at the same time

def compute_formula(x, y):
    # Zip renders tuples that can be nicely unpacked at the loop level
    for x_value, y_value in zip(x, y):
        x_value + y_value

# 6. Iteration patterns - dictionary

In [24]:
mapping = {
    0: "Low",
    1: "Medium",
    2: "High"
}

for key, value in mapping.items():
    print(key, value)

0 Low
1 Medium
2 High


# 7. Dictionary - counting occurrences

In [29]:
# Most basic version

text = "This is some sentence with sentence word repeated"

def build_histogram(text):
    words = text.lower().split()
    
    # We would like to create a histogram of words
    # as a dictionary where a word is the key and occurrence the value
    histogram = {}
    
    for word in words:
        # We need to make sure that a key exists, before we increment by 1
        if word not in histogram:
            histogram[word] = 0
            
        histogram[word] += 1
        
    return histogram

build_histogram(text)

{'this': 1,
 'is': 1,
 'some': 1,
 'sentence': 2,
 'with': 1,
 'word': 1,
 'repeated': 1}

In [30]:
# Simpler - using defaultdict data structure

from collections import defaultdict

def build_histogram(text):
    words = text.lower().split()
    
    # Default dict allows to configure default value for a key that doesn't exist yet
    # Normally if you do dict_variable[non_existing_key] += 1, it would raise a KeyError.
    # Here by operator overload within the defaultdict, missing key handling is transparent to the user
    histogram = defaultdict(int)
    
    for word in words:
        histogram[word] += 1
        
    return histogram

build_histogram(text)

defaultdict(int,
            {'this': 1,
             'is': 1,
             'some': 1,
             'sentence': 2,
             'with': 1,
             'word': 1,
             'repeated': 1})

In [36]:
# The simplest method - Counter

from collections import Counter

def build_histogram(text):
    words = text.lower().split()
    
    # Out of the box counting, no need to re-invent the wheel
    counter = Counter(words)
    
    # Most common without any arguments simply returns all keys and counts
    return dict(counter.most_common(3))

build_histogram(text)

{'sentence': 2, 'this': 1, 'is': 1}

In [40]:
# Another example, checking if 2 words are anagrams

word_a = "aabcdd"
word_b = "adadbc"

Counter(word_a) == Counter(word_b)

True

# 8. List comprehensions

In [41]:
def split_numbers(numbers):
    # Provided filtering/transformation is simple, usually one-liner
    # literal list creation via comprehension is shorter and cleaner
    #
    # but if it evolved to more complicated case, regular loop is a better option
    non_positives = [number for number in numbers if number <= 0]
    positives = [number for number in numbers if number > 0]
            
    return non_positives, positives

split_numbers(numbers)

([-10, -7, -3, 0], [1, 5, 7, 9])

In [42]:
# Common use case - extract one of the values from the tuples and create a new list
users = [
    (1, "admin"),
    (2, "editor"),
    (3, "viewer")
]

[user_id for user_id, _ in users]

[1, 2, 3]

# 9. Sorting

In [44]:
users = [
    (1, "admin", 25),
    (2, "editor", 20),
    (3, "viewer", 27)
]

# sort method peforms in-place operation
users.sort(key=lambda x: x[2], reverse=True)

# the list gets mutated here, might be unwanted side-effect if the list is supplied from some external context
users

[(3, 'viewer', 27), (1, 'admin', 25), (2, 'editor', 20)]

In [46]:
users = [
    (1, "admin", 25),
    (2, "editor", 20),
    (3, "viewer", 27)
]

# sorted function is a safer choice, in which original list stays untouched
# sorted version is a new instance
sorted(users, key=lambda x: x[2])

[(2, 'editor', 20), (1, 'admin', 25), (3, 'viewer', 27)]

In [47]:
# you can see that the original variable hasn't been updated by the sorting
users

[(1, 'admin', 25), (2, 'editor', 20), (3, 'viewer', 27)]