# Python Efficient Tricks
## Code profiling for time usage

In [None]:
#%timeit -r(runs)5 -n(loops)25 set(heroes)
%timeit -r5 -n25 set(heroes)

In [None]:
#load module
pip install line_profiler

In [None]:
#load modele first
%load_ext line_profiler

In [None]:
#command to run the profiling
%lprun -f convert_units convert_units(heroes, hts, wts)

## Code profiling for memory usage

In [None]:
#load module first, then since it requires code that need to be saved in a phycial file, so need to load it first
pip install memory_profiler
from hero_funcs import convert_units

In [None]:
#load modele first
%load_ext memory_profiler

In [None]:
#command to run the profiling
%mprun -f convert_units convert_units(heroes, hts, wts)

## itertool, counter(), zip(), set()

In [None]:
#zip will return a zip object and need to use * to unzip it to a list
names_type1 = [*zip(names, primary_types)]


In [1]:
#collections.Counter() is faster than nested loop to count things

In [None]:
#intertools contains good stuff for combination/permutation
from itertools import combinations
#this is an combination object, change number to change number in each combination
combos_obj = combinations(pokemon, 2)


In [None]:
# use to get intersection of 2 sets, all elements in both sets
set.intersection()
#all elements in one set but not the other
set.difference()
#all elements in exactly one set
set.symmetric_difference()
#all elemnts that are in both sets
set.union
#set is faster than list


## eliminating loops with built in function

In [None]:
# map is faster than line coprehesion
[sum(row) for row in rows]
[*map(sum, rows)]

In [2]:
#numpy also has built-in function that boost the effiency



In [3]:
#if loop can't be avoided, then move those one time calculation out of loop, or conversion out of loop


In [None]:
#pandas iterrows is faster than .iloc
for i, row in df.iterrows()
for i, row in df.itertuples()
#itertuples is faster than iterrows since it return different object
#iterrows run tuple, itertuples return named tuple, which used less overhead
#use .apply() to prevent loop, like .map()
#.apply(0 for column, 1 for row)
# can use lambda
textual_playoffs = rays_df.apply(lambda row: text_playoffs(row['Playoffs']), axis=1)
print(textual_playoffs)

In [None]:
#df['name'].values will return a np array and it allows us to directly calculate them with the whole data set instead 
#of row by row since we have broadcasting of numpy array, which will be much more faster than looping.
df['new_name'] = df['name1'].values - df['name2'].values

In [None]:
win_perc_preds_loop = []

# Use a loop and .itertuples() to collect each row's predicted win percentage
for row in baseball_df.itertuples():
    runs_scored = row.RS
    runs_allowed = row.RA
    win_perc_pred = predict_win_perc(runs_scored, runs_allowed)
    win_perc_preds_loop.append(win_perc_pred)

# Apply predict_win_perc to each row of the DataFrame
win_perc_preds_apply = baseball_df.apply(lambda row: predict_win_perc(row['RS'], row['RA']), axis=1)

# Calculate the win percentage predictions using NumPy arrays
win_perc_preds_np = predict_win_perc(baseball_df['RS'].values, baseball_df['RA'].values)
baseball_df['WP_preds'] = win_perc_preds_np
print(baseball_df.head())

#last method with np array is fastest

# Writting functions in python
## docstring

In [None]:
#it's the comment for describing the usage of the function, usually contains 5 parts.
#start and end with """"""
#1description
#2Args
#3Returns
#4Raises
#
def count_letter(content, letter):
  """Count the number of times `letter` appears in `content`.

  Args:
    content (str): The string to search.
    letter (str): The letter to search for.

  Returns:
    int

  # Add a section detailing what errors might be raised
  Raises:
    ValueError: If `letter` is not a one-character string.
  """
  if (not isinstance(letter, str)) or len(letter) != 1:
    raise ValueError('`letter` must be a single character string.')
  return len([char for char in content if char == letter])

In [None]:
import inspect
docstring = inspect.getdoc(__doc__)

## pass by assignment

In [4]:
#pass by assignment
# use immuteable as function arguments.

## context manager

In [None]:
#context manager
with open('alice.txt')#contextmanager
    as file:
        
with timer():
  print('Numpy version')
  process_with_numpy(image)
#yield, which is used to run the commands within the context manager
def in_dir(directory):
  """Change current working directory to `directory`,
  allow the user to run some code, and change back.

  Args:
    directory (str): The path to a directory to work in.
  """
  current_dir = os.getcwd()
  os.chdir(directory)

  # Add code that lets you handle errors
  try:
    yield
  # Ensure the directory is reset,
  # whether there was an error or not
  finally:
    os.chdir(current_dir)

## Function is an object

In [None]:
# Add the missing function references to the function map
function_map = {
  'mean': mean,
  'std': std,
  'minimum': minimum,
  'maximum': maximum
}

data = load_data()
print(data)

func_name = get_user_input()

# Call the chosen function and pass "data" as an argument
function_map[func_name](data)

## scope

In [None]:
local->global->builtin
global is used to reach out to variable outside local
nonlocal is used to reach out to variable outside local but in parent
nonlocal variable is defined in the parent function but used in child function
closures is nonlocal variable attached to a returned function

In [None]:
def return_a_func(arg1, arg2):
  def new_func():
    print('arg1 was {}'.format(arg1))
    print('arg2 was {}'.format(arg2))
  return new_func
    
my_func = return_a_func(2, 17)

print(my_func.__closure__ is not None)
print(len(my_func.__closure__) == 2)

# Get the values of the variables in the closure
closure_values = [
  my_func.__closure__[i].cell_contents for i in range(2)
]
print(closure_values == [2, 17])

## decorator

In [None]:
@print_args
def my_function(a, b, c):
  print(a + b + c)

my_function(1, 2, 3)

In [None]:
def print_return_type(func):
  # Define wrapper(), the decorated function
  def wrapper(*args, **kwyargs):
    # Call the function being decorated
    result = func(*args, **kwyargs)
    print('{}() returned type {}'.format(
      func.__name__, type(result)
    ))
    return result
  # Return the decorated function
  return wrapper
  
@print_return_type
def foo(value):
  return value
  
print(foo(42))
print(foo([1, 2, 3]))
print(foo({'a': 42}))

In [None]:
def counter(func):
  def wrapper(*args, **kwargs):
    wrapper.count += 1
    # Call the function being decorated and return the result
    return wrapper.count
  wrapper.count = 0
  # Return the new decorated function
  return wrapper

# Decorate foo() with the counter() decorator
@counter
def foo():
  print('calling foo()')
  
foo()
foo()

print('foo() was called {} times.'.format(foo.count))

In [None]:
#Preserving docstrings when decorating functions
# use functools wraps to protect doc in func instead of wrapper
from functools import wraps

def add_hello(func):
  # Decorate wrapper() so that it keeps func()'s metadata
  @wraps(func)
  def wrapper(*args, **kwargs):
    """Print 'hello' and then call the decorated function."""
    print('Hello')
    return func(*args, **kwargs)
  return wrapper
  
@add_hello
def print_sum(a, b):
  """Adds two numbers and prints the sum"""
  print(a + b)
  
print_sum(10, 20)
print(print_sum.__doc__)

In [None]:
#use duplicate.__wrapped__() to call the raw func without decorater
@check_everything
def duplicate(my_list):
  """Return a new list that repeats the input twice"""
  return my_list + my_list

t_start = time.time()
duplicated_list = duplicate(list(range(50)))
t_end = time.time()
decorated_time = t_end - t_start

t_start = time.time()
# Call the original function instead of the decorated one
duplicated_list = duplicate.__wrapped__(list(range(50)))
t_end = time.time()
undecorated_time = t_end - t_start

print('Decorated time: {:.5f}s'.format(decorated_time))
print('Undecorated time: {:.5f}s'.format(undecorated_time))

In [None]:
#decorator that takes arguments
def run_n_times(n):
  """Define and return a decorator"""
  def decorator(func):
    def wrapper(*args, **kwargs):
      for i in range(n):
        func(*args, **kwargs)
    return wrapper
  return decorator
# Modify the print() function to always run 20 times
print = run_n_times(20)(print)

print('What is happening?!?!')

In [None]:
def tag(*tags):
  # Define a new decorator, named "decorator", to return
  def decorator(func):
    # Ensure the decorated function keeps its metadata
    @wraps(func)
    def wrapper(*args, **kwargs):
      # Call the function being decorated and return the result
      return func(*args, **kwargs)
    wrapper.tags = tags
    return wrapper
  # Return the new decorator
  return decorator

@tag('test', 'this is a tag')
def foo():
  pass

print(foo.tags)

In [None]:
def returns(return_type):
  # Complete the returns() decorator
  def decorator(func):
    def wrapper(*args, **kwargs):
      result = func(*args, **kwargs)
      assert(type(result) == return_type)
      return result
    return wrapper
  return decorator
  
@returns(dict)
def foo(value):
  return value

try:
  print(foo([1,2,3]))
except AssertionError:
  print('foo() did not return a dict!')

## Unit testing

In [None]:
#Import the pytest package
import pytest

# Import the function convert_to_int()
from preprocessing_helpers import convert_to_int

# Complete the unit test name by adding a prefix
def test_on_string_with_one_comma():
  # Complete the assert statement
  assert convert_to_int('2,081')==2081

In [None]:
#If you get an AssertionError, this means the function has a bug and you should fix it. 
#If you get another exception, e.g. NameError, this means that something else is wrong with the
#unit test code and you should fix it so that the assert statement can actually run.

In [None]:
#benefit of unit testing
#reduced time
#improved documentation
#more trust
#reduced downtime

In [None]:
#mutliple assert
def test_on_six_rows():
    example_argument = np.array([[2081.0, 314942.0], [1059.0, 186606.0],
                                 [1148.0, 206186.0], [1506.0, 248419.0],
                                 [1210.0, 214114.0], [1697.0, 277794.0]]
                                )
    # Fill in with training array's expected number of rows
    expected_training_array_num_rows = 4
    # Fill in with testing array's expected number of rows
    expected_testing_array_num_rows = 2
    actual = split_into_training_and_testing_sets(example_argument)
    # Write the assert statement checking training array's number of rows
    assert actual[0].shape[0] == expected_training_array_num_rows, "The actual number of rows in the training array is not {}".format(expected_training_array_num_rows)
    # Write the assert statement checking testing array's number of rows
    assert actual[1].shape[0] == expected_testing_array_num_rows, "The actual number of rows in the testing array is not {}".format(expected_testing_array_num_rows)

In [None]:
#special value, boundry value, normal value, bad value
import pytest
from preprocessing_helpers import row_to_list

def test_on_no_tab_no_missing_value():    # (0, 0) boundary value
    # Assign actual to the return value for the argument "123\n"
    actual = row_to_list("123\n")
    assert actual is None, "Expected: None, Actual: {0}".format(actual)
    
def test_on_two_tabs_no_missing_value():    # (2, 0) boundary value
    actual = row_to_list("123\t4,567\t89\n")
    # Complete the assert statement
    assert actual is None, "Expected: None, Actual: {0}".format(actual)
    
def test_on_one_tab_with_missing_value():    # (1, 1) boundary value
    actual = row_to_list("\t4,567\n")
    # Format the failure message
    assert actual is None, "Expected: None, Actual: {0}".format(actual)

## use a class to test

In [None]:
#use a class to handle increasing test
import pytest
import numpy as np

from models.train import split_into_training_and_testing_sets

# Declare the test class
class TestSplitIntoTrainingAndTestingSets(object):
    # Fill in with the correct mandatory argument
    def test_on_one_row(self):
        test_argument = np.array([[1382.0, 390167.0]])
        with pytest.raises(ValueError) as exc_info:
            split_into_training_and_testing_sets(test_argument)
        expected_error_msg = "Argument data_array must have at least 2 rows, it actually has just 1"
        assert exc_info.match(expected_error_msg)

In [None]:
#to run all tests in test folder, use pytest under test folder
pytest -x to return when one test is failed
#class only NodeIDofatestclass:<path to test module>::<test class name>
#function under class only NodeIDofanunittest:<path to test module>::<test class name>::<unit test name>
#RunsalltestswhosenodeIDmatchesthepaern.
pytest -k "pattern"

In [None]:
#exptet to fail
# Add a reason for the expected failure
@pytest.mark.xfail(reason='Using TDD, model_test() has not yet been implemented')
class TestModelTest(object):
    def test_on_linear_data(self):
        test_input = np.array([[1.0, 3.0], [2.0, 5.0], [3.0, 7.0]])
        expected = 1.0
        actual = model_test(test_input, 2.0, 1.0)
        message = "model_test({0}) should return {1}, but it actually returned {2}".format(test_input, expected, actual)
        assert actual == pytest.approx(expected), message
        
    def test_on_one_dimensional_array(self):
        test_input = np.array([1.0, 2.0, 3.0, 4.0])
        with pytest.raises(ValueError) as exc_info:
            model_test(test_input, 1.0, 1.0)

In [None]:
#skip the fail
# Import the sys module
import sys

class TestGetDataAsNumpyArray(object):
    # Add a reason for skipping the test
    @pytest.mark.skipif(sys.version_info > (2, 7), reason="Works only on Python 2.7 or lower")
    def test_on_clean_file(self):
        expected = np.array([[2081.0, 314942.0],
                             [1059.0, 186606.0],
                             [1148.0, 206186.0]
                             ]
                            )
        actual = get_data_as_numpy_array("example_clean_data.txt", num_columns=2)
        message = "Expected return value: {0}, Actual return value: {1}".format(expected, actual)
        assert actual == pytest.approx(expected), message

In [None]:
#Showingreasonforskipping
pytest -rs
#Showingreasonforxfail
pytest -rx
#Showingreasonforbothskippedandxfail
pytest -rsx

## fixture

In [None]:
# Add a decorator to make this function a fixture
@pytest.fixture
def clean_data_file():
    file_path = "clean_data_file.txt"
    with open(file_path, "w") as f:
        f.write("201\t305671\n7892\t298140\n501\t738293\n")
    yield file_path
    os.remove(file_path)
    
# Pass the correct argument so that the test can use the fixture
def test_on_clean_file(clean_data_file):
    expected = np.array([[201.0, 305671.0], [7892.0, 298140.0], [501.0, 738293.0]])
    # Pass the clean data file path yielded by the fixture as the first argument
    actual = get_data_as_numpy_array(clean_data_file, 2)
    assert actual == pytest.approx(expected), "Expected: {0}, Actual: {1}".format(expected, actual) 

In [None]:
#use tmpdir
import pytest

@pytest.fixture
# Add the correct argument so that this fixture can chain with the tmpdir fixture
def empty_file(tmpdir):
    # Use the appropriate method to create an empty file in the temporary directory
    file_path = tmpdir.join("empty.txt")
    open(file_path, "w").close()
    yield file_path

## mock

In [None]:
#mock
#Mocking:testingfunctionsindependentlyofdependencies

# Add the correct argument to use the mocking fixture in this test
def test_on_raw_data(self, raw_and_clean_data_file, mocker):
    raw_path, clean_path = raw_and_clean_data_file
    # Replace the dependency with the bug-free mock
    convert_to_int_mock = mocker.patch("data.preprocessing_helpers.convert_to_int",
                                       side_effect=convert_to_int_bug_free)
    preprocess(raw_path, clean_path)
    # Check if preprocess() called the dependency correctly
    assert convert_to_int_mock.call_args_list == [call("1,801"), call("201,411"), call("2,002"), call("333,209"), call("1990"), call("782,911"), call("1,285"), call("389129")]
    with open(clean_path, "r") as f:
        lines = f.readlines()
    first_line = lines[0]
    assert first_line == "1801\\t201411\\n"
    second_line = lines[1]
    assert second_line == "2002\\t333209\\n" 

## Testing module

In [None]:
import pytest
import numpy as np
from visualization import get_plot_for_best_fit_line
@pytest.mark.mpl_image_compare    
# Under the hood baseline generation and comparison
def test_plot_for_linear_data():    
    slope = 2.0    
    intercept = 1.0    
    x_array = np.array([1.0, 2.0, 3.0])    
    # Linear data set    
    y_array = np.array([3.0, 5.0, 7.0])    
    title = "Test plot for linear data"
    return get_plot_for_best_fit_line(slope, intercept, x_array, y_array, title)


In [None]:
#Generating the baseline image
pytest --mpl-generate-path /home/repl/workspace/project/tests/visualization/baseline -k "test_plot_for_almost_linear_data"

In [None]:
#run the test
pytest -k "test_plot_for_linear_data" --mpl
#Reading failure report
pytest -k "test_plot_for_linear_data" --mpl