# Import required libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import json
import time

# Python Tutorial - 2

## Defining Functions

In [2]:
#####################################################################################################
# Defining Functions (start)
#####################################################################################################

* The keyword def introduces a function definition. It must be followed by the function name and the paranthesized list of formal parameters. The statements that form the body of the function start at the next line, and must be intended.
* The first statement of the function body can optionally be a string literal; this string literal is the function’s documentation string, or docstring. There are tools which use docstrings to automatically produce online or printed documentation, or to let the user interactively browse through code; it’s good practice to include docstrings in code that you write, so make a habit of it.
* The Python parser does not strip indentation from multi-line string literals in Python, so tools that process documentation have to strip indentation if desired. This is done using the following convention. The first non-blank line after the first line of the string determines the amount of indentation for the entire documentation string. (We can’t use the first line since it is generally adjacent to the string’s opening quotes so its indentation is not apparent in the string literal.) 
* Whitespace “equivalent” to this indentation is then stripped from the start of all lines of the string. Lines that are indented less should not occur, but if they occur all their leading whitespace should be stripped. Equivalence of whitespace should be tested after expansion of tabs (to 8 spaces, normally).

In [3]:
# Define a function that prints the fibonacci series till a defined boundary
def fib(n):
    """Fibonacci Series Generator
Generates Fibonacci series until number n
    """ 
    a,b = 0,1
    while a < n:
        print(a, end = ' ')
        a,b = b,a+b

fib(100)

0 1 1 2 3 5 8 13 21 34 55 89 

In [4]:
# This command can be used to print the docstring of a function.
print(fib.__doc__)

Fibonacci Series Generator
Generates Fibonacci series until number n
    


In [5]:
# Define a function that returns an array of fibonacci series numbers till a defined boundary
# return statement retuns a value from a function.
# The return statement causes the function to exit or terminate immediately even if it is not the 
# last statement of the function

def fib_r(n):
    a,b = 0,1
    series = []
    while a < n:
        series.append(a)
        a,b = b,a+b
    return series

fib100 = fib_r(100)
print(fib100)

[0, 1, 1, 2, 3, 5, 8, 13, 21, 34, 55, 89]


* The execution of a function introduces a new symbol table used for the local variables of the function. More precisely, all variable assignments in a function store the value in the local symbol table; 
* Variable references first look in the local symbol table, then in the local symbol tables of enclosing functions, then in the global symbol table, and finally in the table of built-in names. 
* Thus, global variables and variables of enclosing functions cannot be directly assigned a value within a function (unless, for global variables, named in a global statement, or, for variables of enclosing functions, named in a nonlocal statement), although they may be referenced.

* A function definition introduces the function name in the current symbol table. The value of the function name has a type that is recognized by the interpreter as a user-defined function. 
* This value can be assigned to another name which can then also be used as a function. This serves as a general renaming mechanism:

In [6]:
print(fib) # Prints type of function fib
f = fib # Assigns function fib to another name "f" which can be used as a substitute for function fib.
f(100)

<function fib at 0x00000169DA080E50>
0 1 1 2 3 5 8 13 21 34 55 89 

In [7]:
#####################################################################################################
# Defining Functions (end)
#####################################################################################################

## Default Argument Values

In [8]:
#####################################################################################################
# Default Argument Values (start)
#####################################################################################################

In [9]:
# Functions can be defined with variable number of arguments. The most useful form is to specify a 
# default value for one or more arguments. This creates a function that can be called with fewer 
# arguments than it is defined to allow

def f(i, arg=5):
    print(i, arg)
    return 2*i

In [10]:
a = f(4, 10)
print(a)

4 10
8


In [11]:
b = f(6)
print(b)

6 5
12


In [12]:
# The default values are evaluated at the point of function definition:

i = 5
def f(arg=i):
    print(arg)

i = 6
f() # The default value of arg is set to 5 during function definition

5


In [13]:
# Important warning: The default value is evaluated only once. This makes a difference when the default
# is a mutable object such as a list, dictionary, or instances of most classes. For example, the 
# following function accumulates the arguments passed to it on subsequent calls:

def f(a,L=[]):
    L.append(a)
    return L
print(f(1))
print(f(2))
print(f(3))

[1]
[1, 2]
[1, 2, 3]


In [14]:
# To avoid this, you can write the function like this instead:
def f(a,L=None):
    if L is None:
        L = []
    L.append(a)
    return L
print(f(1))
print(f(2))
print(f(3))

[1]
[2]
[3]


In [15]:
######################################################################################################
# Default Argument Values (end)
######################################################################################################

## Inputs from user

* Python has a command called input(prompt). You call this function to tell the program to stop and wait for the user to key in the data.
* The program will resume once the user presses the ENTER or RETURN key.

In [16]:
a = input("Please enter something: ")
print("a is:",a)

Please enter something: This is Suresh
a is: This is Suresh


## Usage of in keyword

* The in keyword can be used to test whether or not a sequence contains a certain value

In [17]:
def ask_ok(prompt,retries=4,reminder='Please Try Again'):
    while True:
        ok = input(prompt)
        if ok in ('y', 'yes'):
            return True
        if ok in ('n', 'no'):
            return False
        retries = retries-1
        if retries <= 0:
            print("Number of tries exceeded")
            return
        print(reminder)       

In [18]:
ask_ok('Do you really want to quit: ',retries=5)

Do you really want to quit: yes


True

## Keyword Arguments

In [19]:
###########################################################################################
# Keyword Arguments (start)
###########################################################################################

In [20]:
# Functions can also be called using keyword arguments of the form kwarg=value. 
# In a function call, keyword arguments must follow positional arguments. 
# Positional arguments cannot be used after keyword arguments are used.
# All the keyword arguments passed must match one of the arguments accepted by the function.
# Order of keyword arguments is not important.
# No argument must receive a value more than once.

def test1(a, word = 'suresh', b = 5, c = 15):
    print(a, word, b, c)
    
test1(10) # 1 positional argument
test1(a = 15) # 1 keyword argument
test1(word = 'srisha', a = 20) # 2 keyword argument, note that the order of keywords is 
                               # different from function definition
test1(25, word = 'srisha', b = 10) # 1 positional argument, 2 keyword arguments.

10 suresh 5 15
15 suresh 5 15
20 srisha 5 15
25 srisha 10 15


In [21]:
######################################################################################################
# Keyword Arguments (end)
######################################################################################################

## Tuples and Sequences - Sequence Data Type

In [22]:
#####################################################################################################
# Tuples and Sequences (start)
#####################################################################################################

In [23]:
# A tuple consists of a number of values separated by commas and preferably enclosed by () brackets

t = (12345, 45678, 890)
print(t)
print(type(t))

# Tuples can be nested
t1 = (1, 2, 3)
u = (t1, t)
print(u)

(12345, 45678, 890)
<class 'tuple'>
((1, 2, 3), (12345, 45678, 890))


In [24]:
# Tuples are immutable but they can contain mutable objects
list_1 = [1, 2, 3]
list_2 = [4, 5, 6]
t_new = (list_1, list_2)
print(t_new)
list_1[0] = 5
print(t_new)

([1, 2, 3], [4, 5, 6])
([5, 2, 3], [4, 5, 6])


In [25]:
# Tuples may seem similar to lists, but they are often used in different situations for different 
# purposes. 
# Tuples are immutable, and usually contain a heterogeneous sequence of elements that can be accessed 
# via unpacking or indexing. 
# Lists are mutable, and their elements are usually homogeneous and are accessed by iterating over the 
# list.

# Empty tuples are constructed by an empty pair of parantheses
empty = ()
print(len(empty))
print(empty)

0
()


In [26]:
# A tuple with one value is constructed by following a value with a comma (it is not sufficient to 
# enclose a single value in parantheses)

singleton = ('hello',)
print(len(singleton))
print(singleton)

1
('hello',)


In [27]:
# Tuple packing
t = (123, 456, 789) # Values 123, 456 and 789 are packed into the tuple
print(t)

# The reverse operation is also possible. This is called sequence unpacking and works for any tuple 
# on the right-hand side
# Sequence unpacking requires that there are as many variables on the left side of the equals sign 
# as there are elements in the sequence

x, y, z = t
print(x, y, z)

# Multiple assignment is really just a combination of tuple packing and sequence unpacking
a, b, c = (456, 123, 789)
print(a, b, c)

(123, 456, 789)
123 456 789
456 123 789


In [28]:
# One way to change a tuple is to typecast it to a list, change the value in the list and typecast
# the list back to tuple
tuple_ex = (1,2,3,4,5)
print(tuple_ex)
tuple_list = list(tuple_ex)
print(tuple_list)
tuple_list[2] = 21
print(tuple_list)
tuple_ex = tuple(tuple_list)
print(tuple_ex)

(1, 2, 3, 4, 5)
[1, 2, 3, 4, 5]
[1, 2, 21, 4, 5]
(1, 2, 21, 4, 5)


In [29]:
#####################################################################################################
# Tuples and Sequences (end)
#####################################################################################################

## Sets - Sequence Data Type

In [30]:
#####################################################################################################
# Sets (start)
#####################################################################################################

In [31]:
# A set is an unordered collection with no duplicate elements. Basic uses include membership testing 
# and eliminating duplicate entries
# Set objects also support mathematical operations like union, intersection, difference and symmetric 
# difference.
# Curly braces or the set() function can be used to create sets. To create an empty set, you have to use 
# set(), not {}; the latter creates an empty dictionary

basket = {'apple', 'oranges', 'pears', 'oranges', 'bananas'}
print(basket) # Duplicates will be removed.
print('oranges' in basket) # Fast membership testing

{'pears', 'bananas', 'apple', 'oranges'}
True


In [32]:
# Demonstrate set operations on unique letters from two words

a = set('abracadabra')
b = set('alcazam')
print(a,b) # Duplicates have been removed
print("a - b is:", a - b) # Letters in a but not in b
print("a union b is:", a | b) # Union of a and b (letters in a or b or both)
print("a intersection b is:", a & b) # Intersection of a and (letters common to both a and b)
print("Letters in a or b but not both:", a ^ b) # Letters in a or b but not both

{'b', 'd', 'c', 'r', 'a'} {'m', 'c', 'z', 'l', 'a'}
a - b is: {'r', 'd', 'b'}
a union b is: {'m', 'b', 'd', 'r', 'c', 'z', 'l', 'a'}
a intersection b is: {'c', 'a'}
Letters in a or b but not both: {'m', 'b', 'l', 'd', 'r', 'z'}


In [33]:
# similar to list comprehensions, set comprehensions are also supported

a = {x for x in set('abracadabra') if x not in set('abc')}
print(a)

{'r', 'd'}


In [34]:
# We cant change the values of a set but we can add or remove elements from a set
set1 = {'abc','banana',123,456}
print(set1)
set1.add('water')
print(set1)
set1.remove(123)
print(set1)

{123, 'abc', 456, 'banana'}
{'abc', 456, 'water', 'banana', 123}
{'abc', 456, 'water', 'banana'}


In [35]:
#####################################################################################################
# Sets (end)
#####################################################################################################

## Dictionaries - Sequence Data Type

In [36]:
#####################################################################################################
# Dictionaries (start)
#####################################################################################################

In [37]:
# Dictionaries are associative arrays. Unlike sequences, which are indexed by a range of numbers, 
# dictionaries are indexed by keys, which can be any immutable type - strings and numbers can always 
# be keys. Tuples can also be used as keys, if they contain only strings, numbers or tuples. 
# If a tuple contains any mutable object either directly or indirectly, it cannot be used as a key. 
# Lists cannot be used as keys, since lists can be modified in place using index assignments, 
# slice assignments, or methods like append() or extend()

# It is best to think of a dictionary as key:value pairs with the requirement that the keys are 
# unique within one dictionary.
# A pair of braces creates an empty dictionary.
# Placing a list of key:value pairs within the braces adds initial key:value pairs to the dictionary.

# The main operations on a dictionary are storing values using some key and then extracting the 
# value given the key.
# It is also possible to delete a key:value pair with "del" command. 
# If you store a key that is already in use, the old value associated with the key is forgotten. 
 
# Performing list(d) on a dictionary returns a list of all keys used in the dictionary in the 
# insertion order. 
# To get the keys sorted as a list, use sorted(d) instead. 
# To check whether a single key is in the dictionary, use the 'in' keyword.

# dict_name.keys() can be used to retrieve the keys of a dictionary.
# dict_name.values() can be used to retrieve the values stored in a dictionary.

In [38]:
tel = {'suresh':123, 'srisha':456}
print(tel)
tel['jyo'] = 789
print(tel)
print(tel['suresh'], tel['jyo'])
del tel['jyo']
print(tel)
print(list(tel))
print(sorted(tel))
print('jyo' in tel)
print('suresh' in tel)
print("Keys of dictionary tel are {}:\n".format(tel.keys()))
print("Values of dictionary tel are {}:\n".format(tel.values()))

{'suresh': 123, 'srisha': 456}
{'suresh': 123, 'srisha': 456, 'jyo': 789}
123 789
{'suresh': 123, 'srisha': 456}
['suresh', 'srisha']
['srisha', 'suresh']
False
True
Keys of dictionary tel are dict_keys(['suresh', 'srisha']):

Values of dictionary tel are dict_values([123, 456]):



In [39]:
# Dict constructor buils dictionaries directly from sequences of key-value pairs:

a_d = dict([('abc',123), ('def',456), ('ghi',789)])
print(a_d)

# When the keys are simple strings, it is sometimes easier to specify pairs using keyword arguments
c_d = dict(abc=123, daf=456, ghi=789)
print(c_d)

# Dict comprehensions can be used to create dictionaries from arbitrary key and value expressions
b_d = {x:x**3 for x in (1,2,3,4,5,6)}
print(b_d)

{'abc': 123, 'def': 456, 'ghi': 789}
{'abc': 123, 'daf': 456, 'ghi': 789}
{1: 1, 2: 8, 3: 27, 4: 64, 5: 125, 6: 216}


In [40]:
#####################################################################################################
# Dictionaries (end)
#####################################################################################################

# Time taken to run a sequence of steps

In [41]:
num = 1000
x1 = np.random.randn(num)
x2 = np.random.randn(num)

### CLASSIC OUTER PRODUCT IMPLEMENTATION ###
tic = time.process_time()
outer = np.zeros((len(x1),len(x2))) # we create a len(x1)*len(x2) matrix with only zeros
for i in range(len(x1)):
    for j in range(len(x2)):
        outer[i,j] = x1[i]*x2[j]
toc = time.process_time()
del_t = 1e6*(toc-tic)
print ("Classic Outer product Computation time = %0.2f usec" % del_t)

### CLASSIC OUTER PRODUCT IMPLEMENTATION ###
tic = time.process_time()
x3 = np.outer(x1,x2)
toc = time.process_time()
del_t = 1e6*(toc-tic)
print ("Vectorized Outer product Computation time = %0.2f usec" % del_t)

Classic Outer product Computation time = 1078125.00 usec
Vectorized Outer product Computation time = 0.00 usec


# File Operations

## Reading and Writing Files

* open() returns a file object, and is most commonly used with two arguments: 
    * open(filename, mode). Example: f = open('workfile', 'w')
* The first argument is a string containing the filename. 
* The second argument is another string containing a few characters describing the way in which the file will be used. 
    * mode can be 'r' when the file will only be read, 
    * 'w' for only writing (an existing file with the same name will be erased)
    * 'a' opens the file for appending; any data written to the file is automatically added to the end. 
    * 'r+' opens the file for both reading and writing. 
    * The mode argument is optional; 'r' will be assumed if it’s omitted.

* Normally, files are opened in text mode, that means, we read and write strings from and to the file, which are encoded in a specific encoding. 
* If encoding is not specified, the default is platform dependent. 
* 'b' appended to the mode opens the file in binary mode: now the data is read and written in the form of bytes objects. This mode should be used for all files that don’t contain text.

* In text mode, the default when reading is to convert platform-specific line endings (\n on Unix, \r\n on Windows) to just \n. 
* When writing in text mode, the default is to convert occurrences of \n back to platform-specific line endings. 
* This behind-the-scenes modification to file data is fine for text files, but will corrupt binary data like that in JPEG or EXE files. We should be very careful to use binary mode when reading and writing such files.

* It is good practice to use the <b>with</b> keyword when dealing with file objects. The advantage is that the file is properly closed after its suite finishes, even if an exception is raised at some point.
* If we’re not using the with keyword, then we should call f.close() to close the file and immediately free up any system resources used by it. If we don’t explicitly close a file, Python’s garbage collector will eventually destroy the object and close the open file for us, but the file may stay open for a while. Another risk is that different Python implementations will do this clean-up at different times.
* After a file object is closed, either by a with statement or by calling f.close(), attempts to use the file object will automatically fail.

In [42]:
with open('Datasets/read_file_study.txt') as f:
    read_data = f.read()
print(read_data)
display(read_data)

1, Hi, This is Suresh.
2, Iam a boy.
3, My age is 43.


'1, Hi, This is Suresh.\n2, Iam a boy.\n3, My age is 43.'

## Methods of file objects

* <b>To read a file’s contents, call f.read(size), which reads some quantity of data and returns it as a string (in text mode) or bytes object (in binary mode). 
* Size is an optional numeric argument. When size is omitted or negative, the entire contents of the file will be read and returned; 
* If the end of the file has been reached, f.read() will return an empty string ('').</b>

In [43]:
with open('Datasets/read_file_study.txt') as f:
    read_data = f.read(10)
print(read_data) # Only 10 characters read

1, Hi, Thi


In [44]:
with open('Datasets/read_file_study.txt') as f:
    read_data = f.read(26)
print(read_data) # 26 characters read now
display(read_data)

1, Hi, This is Suresh.
2, 


'1, Hi, This is Suresh.\n2, '

* <b>f.readline() reads a single line from the file; a newline character (\n) is left at the end of the string, and is only omitted on the last line of the file if the file doesn’t end in a newline. This makes the return value unambiguous; 
* if f.readline() returns an empty string, the end of the file has been reached, while a blank line is represented by '\n', a string containing only a single newline.</b>

In [45]:
with open('Datasets/read_file_study.txt') as f:
    first_line = f.readline()
    display(first_line)
    second_line = f.readline()
    display(second_line)

'1, Hi, This is Suresh.\n'

'2, Iam a boy.\n'

* <b>The readlines(n) method returns the contents of the entire file as a list of strings, where each item in the list represents one line of the file.  
* If n is not provided then all lines of the file are returned. 
* If n is provided then n characters are read but n is rounded up so that an entire line is returned.</b>

In [46]:
with open('Datasets/read_file_study.txt') as f:
    all_lines = f.readlines()
display(all_lines)
display(all_lines[0])
display(all_lines[1])
display(all_lines[2])

['1, Hi, This is Suresh.\n', '2, Iam a boy.\n', '3, My age is 43.']

'1, Hi, This is Suresh.\n'

'2, Iam a boy.\n'

'3, My age is 43.'

In [47]:
with open('Datasets/read_file_study.txt') as f:
    all_lines = f.readlines(5)
display(all_lines)

['1, Hi, This is Suresh.\n']

<b>For reading lines from a file, we can loop over the file object. This is memory efficient, fast, and leads to simple code:</b>

In [48]:
with open('Datasets/read_file_study.txt') as f:
    for line in f:
        print(line, end = '')

1, Hi, This is Suresh.
2, Iam a boy.
3, My age is 43.