# Writing Functions

Functions are blocks of codes that perform particular tasks

Each function should only perform ONE computational task

They should accomplish that task in the SIMPLEST way

Don't think about HOW you'll use the function in the code

## Why use functions?

They break up your code into bite-sized chunks

They allow your code to be modular

They make your code much easier to test!

## General syntax for a function

def function_name(parameters):

    ** function code body **
        
    ** use parameters **
    
    return value_to_return

In [1]:
# OUR GEOMETRY ANALYSIS CODE

import numpy
import os

file_location = os.path.join('data','water.xyz')

xyz_file = numpy.genfromtxt(fname=file_location, skip_header=2, dtype='unicode')

symbols = xyz_file[:,0]
coordinates = xyz_file[:,1:]
coordinates = coordinates.astype(numpy.float)

num_atoms = len(symbols)

for num1 in range(0,num_atoms):
    for num2 in range(0,num_atoms):
        
        if num1<num2:
        
            x_distance = coordinates[num1,0] - coordinates[num2,0]
            y_distance = coordinates[num1,1] - coordinates[num2,1]
            z_distance = coordinates[num1,2] - coordinates[num2,2]
            atom_distance = numpy.sqrt(x_distance**2 + y_distance**2 + z_distance**2)

            if atom_distance > 0 and atom_distance <= 1.5:
                print(F'{symbols[num1]} to {symbols[num2]}: {atom_distance:.3f}') # .3f means print just 3 decimal places

O to H1: 0.969
O to H2: 0.969


In [8]:
# Let's separate "distance measurement" into its own function

def calculate_distance(atom1_coord, atom2_coord): # inputs are 2 coordinates
    
    x_distance = atom1_coord[0] - atom2_coord[0]
    y_distance = atom1_coord[1] - atom2_coord[1]
    z_distance = atom1_coord[2] - atom2_coord[2]
    
    atom_distance = numpy.sqrt(x_distance**2 + y_distance**2 + z_distance**2)
    
    return atom_distance

In [9]:
# USE THIS FUNCTION WITHIN THE GEOMETRY ANALYSIS CODE

import numpy
import os

file_location = os.path.join('data','water.xyz')

xyz_file = numpy.genfromtxt(fname=file_location, skip_header=2, dtype='unicode')

symbols = xyz_file[:,0]
coordinates = xyz_file[:,1:]
coordinates = coordinates.astype(numpy.float)

num_atoms = len(symbols)

for num1 in range(0,num_atoms):
    for num2 in range(0,num_atoms):
        
        if num1<num2:
            
            atom_distance = calculate_distance(coordinates[num1], coordinates[num2]) # HERE'S OUR FUNCTION!

            if atom_distance > 0 and atom_distance <= 1.5:
                print(F'{symbols[num1]} to {symbols[num2]}: {atom_distance:.3f}') # .3f means print just 3 decimal places

O to H1: 0.969
O to H2: 0.969


In [10]:
# YOU CAN USE OUR FUNCTION TO CALCULATE ANY ARBITRARY THING

calculate_distance([0,0,0],[0,0,1])

1.0

In [11]:
# Let's make another function to do check if these things are bonds

def bond_check(distance):
    if distance > 0 and distance <= 1.5:
        return True # "True" turned green becasue it's a Boolean operator
    else:
        return False

In [12]:
bond_check(1.2)

True

In [13]:
bond_check(2)

False

In [14]:
bond_check(-1.0)

False

In [21]:
# Modify bond_check function to accept a minimum and maximum value

def bond_check(distance, minimum_length, maximum_length):
    if distance > minimum_length and distance <= maximum_length:
        return True # "True" turned green becasue it's a Boolean operator
    else:
        return False

In [22]:
bond_check(1.2, 1, 1.3)

True

In [23]:
bond_check(1.2, 1, 1.1)

False

In [24]:
bond_check(1.2) # Uh oh, this isn't going to work! You need 3 values!

TypeError: bond_check() missing 2 required positional arguments: 'minimum_length' and 'maximum_length'

In [25]:
# Modify bond_check to have default values, in case user forgets to specify their own min/max

def bond_check(distance, minimum_length=0, maximum_length=1.5): # These equal signs point to the default values!
    if distance > minimum_length and distance <= maximum_length:
        return True # "True" turned green becasue it's a Boolean operator
    else:
        return False

In [27]:
bond_check(1.2) # Now it works with just 1 value, because you have default values in the function!

True

In [28]:
# You can just specify the one that you DON'T want the default value

bond_check(1.2, maximum_length=1)

False

In [29]:
# Remember, python is helpful!

help(numpy.genfromtxt)

Help on function genfromtxt in module numpy:

genfromtxt(fname, dtype=<class 'float'>, comments='#', delimiter=None, skip_header=0, skip_footer=0, converters=None, missing_values=None, filling_values=None, usecols=None, names=None, excludelist=None, deletechars=" !#$%&'()*+,-./:;<=>?@[\\]^{|}~", replace_space='_', autostrip=False, case_sensitive=True, defaultfmt='f%i', unpack=None, usemask=False, loose=True, invalid_raise=True, max_rows=None, encoding='bytes')
    Load data from a text file, with missing values handled as specified.
    
    Each line past the first `skip_header` lines is split at the `delimiter`
    character, and characters following the `comments` character are discarded.
    
    Parameters
    ----------
    fname : file, str, pathlib.Path, list of str, generator
        File, filename, list, or generator to read.  If the filename
        extension is `.gz` or `.bz2`, the file is first decompressed. Note
        that generators must return byte strings. The string

In [30]:
help(calculate_distance)

Help on function calculate_distance in module __main__:

calculate_distance(atom1_coord, atom2_coord)



In [32]:
# Oh no, there's nothing! We didn't write any help for our user! We didn't write any DOCUMENTATION!

In [33]:
# Best practices in programming: any time you write a function, you should always write DOCUMENTATION!

## How to write documentation for functions

In [37]:
# Let's write documentation for "calculate_distance" function

# Use 3 double quotes to enter HELP; this is called a "doc string"

def calculate_distance(atom1_coord, atom2_coord): # inputs are 2 coordinates
    """
    Calculates the distance between two points in 3D space
    Inputs: coordinates of two atoms
    Returns: distance between the atoms
    """
    
    x_distance = atom1_coord[0] - atom2_coord[0]
    y_distance = atom1_coord[1] - atom2_coord[1]
    z_distance = atom1_coord[2] - atom2_coord[2]
    
    atom_distance = numpy.sqrt(x_distance**2 + y_distance**2 + z_distance**2)
    
    return atom_distance

In [38]:
help(calculate_distance)

Help on function calculate_distance in module __main__:

calculate_distance(atom1_coord, atom2_coord)
    Calculates the distance between two points in 3D space.
    Inputs: coordinates of two atoms
    Returns: distance between the atoms



In [41]:
# INCORPORATE BOTH FUNCTIONS

import numpy
import os

file_location = os.path.join('data','water.xyz')

xyz_file = numpy.genfromtxt(fname=file_location, skip_header=2, dtype='unicode')

symbols = xyz_file[:,0]
coordinates = xyz_file[:,1:]
coordinates = coordinates.astype(numpy.float)

num_atoms = len(symbols)

for num1 in range(0,num_atoms):
    for num2 in range(0,num_atoms):
        
        if num1<num2:
            
            atom_distance = calculate_distance(coordinates[num1], coordinates[num2]) # HERE'S OUR FUNCTION!

            if bond_check(atom_distance) is True: # HERE'S OUR OTHER FUNCTION!
                
                print(F'{symbols[num1]} to {symbols[num2]}: {atom_distance:.3f}') # .3f means print just 3 decimal places

O to H1: 0.969
O to H2: 0.969


In [57]:
# WRITE A FUNCTION TO OPEN AND PROCESS A FILE!
# We do this a lot. It seems ripe for a function!

# Write a function that opens and processes the xyz file.
# Name your function open_xyz
# Function only takes one parameter — name of the xyz file
# Function returns list of symbols, coordinates
# return variable1, variable2

def open_xyz(file_path):
    
    """
    This function opens an xyz file, separates the coordinates and the symbols and recasts the coordinates as floats
    Input: a file_path
    Returns 2 things: symbols, coordinates
    """
    
    xyz_file = numpy.genfromtxt(fname=file_path, skip_header=2, dtype='unicode')
    
    symbols = xyz_file[:,0]
    
    coordinates = xyz_file[:,1:]
    
    coordinates = coordinates.astype(numpy.float)
    
    return symbols, coordinates

In [58]:
open_xyz('data/water.xyz')

(array(['O', 'H1', 'H2'], dtype='<U9'),
 array([[ 0.      , -0.007156,  0.965491],
        [-0.      ,  0.001486, -0.003471],
        [ 0.      ,  0.931026,  1.207929]]))

In [59]:
print(symbols)

['O' 'H1' 'H2']


In [60]:
print(coordinates)

[[ 0.       -0.007156  0.965491]
 [-0.        0.001486 -0.003471]
 [ 0.        0.931026  1.207929]]


In [61]:
help(open_xyz)

Help on function open_xyz in module __main__:

open_xyz(file_path)
    This function opens an xyz file, separates the coordinates and the symbols and recasts the coordinates as floats
    Input: a file_path
    Returns: symbols, coordinates



In [65]:
# INCORPORATE ALL FUNCTIONS

import numpy
import os

file_location = os.path.join('data','water.xyz')

symbols, coordinates = open_xyz(file_location) # HERE'S OUR "open_xyz" FUNCTION!

num_atoms = len(symbols)

for num1 in range(0,num_atoms):
    
    for num2 in range(0,num_atoms):
        
        if num1<num2:
            
            atom_distance = calculate_distance(coordinates[num1], coordinates[num2]) # HERE'S OUR "atom_distance" FUNCTION!

            if bond_check(atom_distance) is True: # HERE'S OUR "bond_check" FUNCTION!
                
                print(F'{symbols[num1]} to {symbols[num2]}: {atom_distance:.3f}') # .3f means print just 3 decimal places

O to H1: 0.969
O to H2: 0.969


In [1]:
# COLLECT ALL CODE IN ONE BOX



# start with imports

import numpy
import os



# next: functions

def open_xyz(file_path):
    
    """
    This function opens an xyz file, separates the coordinates and the symbols and recasts the coordinates as floats
    Input: a file_path
    Returns 2 things: symbols, coordinates
    """
    
    xyz_file = numpy.genfromtxt(fname=file_path, skip_header=2, dtype='unicode')
    
    symbols = xyz_file[:,0]
    
    coordinates = xyz_file[:,1:]
    
    coordinates = coordinates.astype(numpy.float)
    
    return symbols, coordinates

def calculate_distance(atom1_coord, atom2_coord): # inputs are 2 coordinates
    
    x_distance = atom1_coord[0] - atom2_coord[0]
    y_distance = atom1_coord[1] - atom2_coord[1]
    z_distance = atom1_coord[2] - atom2_coord[2]
    
    atom_distance = numpy.sqrt(x_distance**2 + y_distance**2 + z_distance**2)
    
    return atom_distance

def bond_check(distance, minimum_length=0, maximum_length=1.5): # These equal signs point to the default values!
    if distance > minimum_length and distance <= maximum_length:
        return True # "True" turned green becasue it's a Boolean operator
    else:
        return False

    
    
# finally: geometry analysis code

file_location = os.path.join('data','water.xyz')

symbols, coordinates = open_xyz(file_location) # HERE'S OUR "open_xyz" FUNCTION!

num_atoms = len(symbols)

for num1 in range(0,num_atoms):
    
    for num2 in range(0,num_atoms):
        
        if num1<num2:
            
            atom_distance = calculate_distance(coordinates[num1], coordinates[num2]) # HERE'S OUR "atom_distance" FUNCTION!

            if bond_check(atom_distance) is True: # HERE'S OUR "bond_check" FUNCTION!
                
                print(F'{symbols[num1]} to {symbols[num2]}: {atom_distance:.3f}') # .3f means print just 3 decimal places

O to H1: 0.969
O to H2: 0.969
