modified ops.py

mikeqfu · May 28, 2020 · ff069a1 · ff069a1
1 parent 646bccc
commit ff069a1
Showing 1 changed file with 111 additions and 54 deletions.
diff --git a/pyhelpers/ops.py b/pyhelpers/ops.py
@@ -1,6 +1,7 @@
 """ Miscellaneous helper functions """
 
-import collections
+import collections.abc
+import datetime
 import inspect
 import itertools
 import math
@@ -16,14 +17,14 @@ def confirmed(prompt=None, resp=False, confirmation_required=True):
     """
     :param prompt: [str; None (default)]
     :param resp: [bool] (default: False)
-    :param confirmation_required: [bool] (default: True)
+    :param confirmation_required: [bool] whether to prompt a message for confirmation to proceed (default: True)
     :return: [bool]
 
     Example:
         prompt = "Create Directory?"
-        confirm(prompt, resp=True)
-        >> Create Directory? [No]|Yes: yes
-        >> True
+        confirmed(prompt, resp=True)
+        # Create Directory? [No]|Yes: yes
+        # True
 
     Reference: http://code.activestate.com/recipes/541096-prompt-the-user-for-confirmation/
     """
@@ -54,7 +55,7 @@ def get_variable_name(variable) -> str:
     """
     Example:
         x = 1
-        print(get_variable_name(x))  # 'x'
+        var_name = get_variable_name(x)  # 'x'
     """
     local_variables = inspect.currentframe().f_back.f_locals.items()
     var_str = [var_name for var_name, var_val in local_variables if var_val is variable]
@@ -70,7 +71,7 @@ def get_variable_names(*variable) -> list:
     """
     Examples:
         x = 1
-        print(get_variable_names(x))  # ['x']
+        get_variable_names(x)  # ['x']
         y = 2
         get_variable_names(x, y)  # ['x', 'y']
     """
@@ -96,7 +97,8 @@ def split_list_by_size(lst, chunk_size) -> types.GeneratorType:
     Example:
         lst = list(range(0, 10))
         chunk_size = 3
-        print(list(split_list_by_size(lst, chunk_size)))  # [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]]
+        lists = split_list_by_size(lst, chunk_size)
+        list(lists)  # [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]]
 
     Reference: https://stackoverflow.com/questions/312443/
     """
@@ -113,7 +115,8 @@ def split_list(lst, num_of_chunks) -> types.GeneratorType:
     Example:
         lst = list(range(0, 10))
         num_of_chunks = 3
-        print(list(split_list(lst, num_of_chunks)))  # [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9]]
+        lists = list(split_list(lst, num_of_chunks))
+        list(lists)  # [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9]]
 
     Reference: https://stackoverflow.com/questions/312443/
     """
@@ -173,7 +176,7 @@ def update_nested_dict(source_dict, updates) -> dict:
     Reference: https://stackoverflow.com/questions/3232943/
     """
     for key, val in updates.items():
-        if isinstance(val, collections.Mapping):
+        if isinstance(val, collections.abc.Mapping):
             source_dict[key] = update_nested_dict(source_dict.get(key, {}), val)
         elif isinstance(val, list):
             source_dict[key] = (source_dict.get(key, []) + val)
@@ -215,7 +218,7 @@ def get_all_values_from_nested_dict(key, target_dict) -> types.GeneratorType:
         elif isinstance(v, dict):
             for x in get_all_values_from_nested_dict(key, v):
                 yield x
-        elif isinstance(v, collections.Iterable):
+        elif isinstance(v, collections.abc.Iterable):
             for d in v:
                 if isinstance(d, dict):
                     for y in get_all_values_from_nested_dict(key, d):
@@ -242,8 +245,16 @@ def remove_multiple_keys_from_dict(target_dict, *keys):
 def get_extreme_outlier_bounds(data_set, k=1.5) -> tuple:
     """
     :param data_set: [array-like]
-    :param k: [numbers.Number]
-    :return: [tuple]
+    :param k: [numbers.Number] (default: 1.5)
+    :return lower_bound, upper_bound: [tuple]
+
+    Example:
+        import pandas as pd
+
+        data_set = pd.DataFrame(range(100), columns=['col'])
+        k = 1.5
+
+        lower_bound, upper_bound = get_extreme_outlier_bounds(data, k)  # (0.0, 148.5)
     """
     q1, q3 = np.percentile(data_set, 25), np.percentile(data_set, 75)
     iqr = q3 - q1
@@ -253,14 +264,18 @@ def get_extreme_outlier_bounds(data_set, k=1.5) -> tuple:
 
 
 # Calculate interquartile range
-def interquartile_range(x) -> numbers.Number:
+def interquartile_range(dat) -> numbers.Number:
     """
-    :param x: [array-like]
+    An alternative way to scipy.stats.iqr(x)
+    :param dat: [array-like]
     :return: [numbers.Number]
 
-    An alternative way to scipy.stats.iqr(x)
+    Example:
+        dat = pd.DataFrame(range(100), columns=['col'])
+
+        iqr = interquartile_range(dat)  # 49.5
     """
-    iqr = np.subtract(*np.percentile(x, [75, 25]))
+    iqr = np.subtract(*np.percentile(dat, [75, 25]))
     return iqr
 
 
@@ -289,67 +304,82 @@ def find_closest_date(date, date_list, as_datetime=None, fmt="%Y-%m-%d %H:%M:%S.
         if isinstance(closest_date, str):
             closest_date = pd.to_datetime(closest_date)
     else:
-        if isinstance(closest_date, pd.datetime):
+        if isinstance(closest_date, datetime.datetime):
             closest_date = closest_date.strftime(fmt)
     return closest_date
 
 
 # Colour ramps
-def cmap_discretisation(cmap_param, no_of_colours):
+def cmap_discretisation(cmap, n_colours):
     """
-    :param cmap_param: colormap instance, e.g. cm.jet
-    :param no_of_colours: number of colours
-    :return: a discrete colormap from the continuous colormap cmap.
+    :param cmap: [matplotlib.colors.ListedColormap] colormap instance, e.g. matplotlib.cm.jet
+    :param n_colours: [int] number of colours
+    :return colour_map: [matplotlib.colors.LinearSegmentedColormap] a discrete colormap from the continuous `cmap`.
+
+    Reference: http://sensitivecities.com/so-youd-like-to-make-a-map-using-python-EN.html#.WbpP0T6GNQB
 
     Example:
-        x = np.resize(np.arange(100), (5, 100))
-        d_jet = cmap_discretize(cm.jet, 5)
-        plt.imshow(x, cmap=d_jet)
+        import matplotlib.cm
+        import matplotlib.pyplot as plt
 
-    Reference: http://sensitivecities.com/so-youd-like-to-make-a-map-using-python-EN.html#.WbpP0T6GNQB
+        cmap = matplotlib.cm.Accent
+        n_colours = 5
+
+        cm_accent = cmap_discretisation(cmap, n_colours)
+
+        x = np.resize(range(100), (5, 100))
+        plt.imshow(x, cmap=cm_accent)
     """
-    if isinstance(cmap_param, str):
+    if isinstance(cmap, str):
         import matplotlib.cm
-        cmap_param = matplotlib.cm.get_cmap(cmap_param)
+        cmap = matplotlib.cm.get_cmap(cmap)
 
-    colours_i = np.concatenate((np.linspace(0, 1., no_of_colours), (0., 0., 0., 0.)))
-    colours_rgba = cmap_param(colours_i)
-    indices = np.linspace(0, 1., no_of_colours + 1)
+    colours_i = np.concatenate((np.linspace(0, 1., n_colours), (0., 0., 0., 0.)))
+    colours_rgba = cmap(colours_i)
+    indices = np.linspace(0, 1., n_colours + 1)
     c_dict = {}
 
     for ki, key in enumerate(('red', 'green', 'blue')):
-        c_dict[key] = [(indices[x], colours_rgba[x - 1, ki], colours_rgba[x, ki]) for x in range(no_of_colours + 1)]
+        c_dict[key] = [(indices[x], colours_rgba[x - 1, ki], colours_rgba[x, ki]) for x in range(n_colours + 1)]
 
     import matplotlib.colors
-    colour_map = matplotlib.colors.LinearSegmentedColormap(cmap_param.name + '_%d' % no_of_colours, c_dict, 1024)
+    colour_map = matplotlib.colors.LinearSegmentedColormap(cmap.name + '_%d' % n_colours, c_dict, 1024)
 
     return colour_map
 
 
 # Colour bars
-def colour_bar_index(no_of_colours, cmap_param, labels=None, **kwargs):
+def colour_bar_index(cmap, n_colours, labels=None, **kwargs):
     """
-    :param no_of_colours: [int] number of colors
-    :param cmap_param: colormap instance, eg. cm.jet
-    :param labels: [list; None (default)]
-    :param kwargs:
-
     To stop making off-by-one errors
     Takes a standard colour ramp, and discretizes it, then draws a colour bar with correctly aligned labels
 
+    :param cmap: [matplotlib.colors.ListedColormap] colormap instance, eg. matplotlib.cm.jet
+    :param n_colours: [int] number of colors
+    :param labels: [list; None (default)]
+    :param kwargs: optional arguments used by `plt.colorbar()`
+
     Reference: http://sensitivecities.com/so-youd-like-to-make-a-map-using-python-EN.html#.WbpP0T6GNQB
+
+    Example:
+        cmap_param = matplotlib.cm.Accent
+        n_colours = 5
+        labels = list('abcde')
+
+        colour_bar_index(cmap_param, no_of_colours)
+        colour_bar_index(cmap_param, no_of_colours, labels)
     """
-    cmap_param = cmap_discretisation(cmap_param, no_of_colours)
+    cmap = cmap_discretisation(cmap, n_colours)
 
     import matplotlib.cm
-    mappable = matplotlib.cm.ScalarMappable(cmap=cmap_param)
+    mappable = matplotlib.cm.ScalarMappable(cmap=cmap)
     mappable.set_array(np.array([]))
-    mappable.set_clim(-0.5, no_of_colours + 0.5)
+    mappable.set_clim(-0.5, n_colours + 0.5)
 
-    import matplotlib.pyplot
-    colour_bar = matplotlib.pyplot.colorbar(mappable, **kwargs)
-    colour_bar.set_ticks(np.linspace(0, no_of_colours, no_of_colours))
-    colour_bar.set_ticklabels(range(no_of_colours))
+    import matplotlib.pyplot as plt
+    colour_bar = plt.colorbar(mappable, **kwargs)
+    colour_bar.set_ticks(np.linspace(0, n_colours, n_colours))
+    colour_bar.set_ticklabels(range(n_colours))
 
     if labels:
         colour_bar.set_ticklabels(labels)
@@ -358,11 +388,38 @@ def colour_bar_index(no_of_colours, cmap_param, labels=None, **kwargs):
 
 
 # Detect if a str type column contains 'nan' when reading csv files
-def detect_nan_for_str_column(pd_dataframe, column_names=None):
-    if column_names:
-        col_names = column_names
-    else:
-        col_names = pd_dataframe.columns
-    for x in col_names:
-        if 'nan' in [str(v) for v in pd_dataframe[x].unique() if isinstance(v, str) or np.isnan(v)]:
-            yield pd_dataframe.columns.get_loc(x)
+def detect_nan_for_str_column(data, column_names=None):
+    """
+    :param data: [pd.DataFrame]
+    :param column_names: [iterable; None (default)] specified column names; if None, all columns
+    :return: [types.GeneratorType] position index of the column that contains NaN
+
+    Example:
+        data = pd.DataFrame(np.resize(range(10), (10, 2)), columns=['a', 'b'])
+        data.iloc[3, 1] = np.nan
+
+        col_pos = detect_nan_for_str_column(data, column_names=None)
+        list(col_pos) == [1]
+    """
+    if column_names is None:
+        column_names = data.columns
+
+    for x in column_names:
+        if 'nan' in [str(v) for v in data[x].unique() if isinstance(v, str) or np.isnan(v)]:
+            yield data.columns.get_loc(x)
+
+
+# Create a rotation matrix (counterclockwise)
+def create_rotation_matrix(theta):
+    """
+    :param theta: [numbers.Number] (in radian)
+    :return: [numpy.ndarray] of shape (2, 2)
+
+    Example:
+        theta = 30
+
+        rotation_mat = create_rotation_matrix(theta)
+    """
+    sin_theta, cos_theta = np.sin(theta), np.cos(theta)
+    rotation_mat = np.array([[sin_theta, cos_theta], [-cos_theta, sin_theta]])
+    return rotation_mat