Skip to content

Commit

Permalink
modified ops.py
Browse files Browse the repository at this point in the history
  • Loading branch information
mikeqfu committed May 28, 2020
1 parent 646bccc commit ff069a1
Showing 1 changed file with 111 additions and 54 deletions.
165 changes: 111 additions & 54 deletions pyhelpers/ops.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
""" Miscellaneous helper functions """

import collections
import collections.abc
import datetime
import inspect
import itertools
import math
Expand All @@ -16,14 +17,14 @@ def confirmed(prompt=None, resp=False, confirmation_required=True):
"""
:param prompt: [str; None (default)]
:param resp: [bool] (default: False)
:param confirmation_required: [bool] (default: True)
:param confirmation_required: [bool] whether to prompt a message for confirmation to proceed (default: True)
:return: [bool]
Example:
prompt = "Create Directory?"
confirm(prompt, resp=True)
>> Create Directory? [No]|Yes: yes
>> True
confirmed(prompt, resp=True)
# Create Directory? [No]|Yes: yes
# True
Reference: http://code.activestate.com/recipes/541096-prompt-the-user-for-confirmation/
"""
Expand Down Expand Up @@ -54,7 +55,7 @@ def get_variable_name(variable) -> str:
"""
Example:
x = 1
print(get_variable_name(x)) # 'x'
var_name = get_variable_name(x) # 'x'
"""
local_variables = inspect.currentframe().f_back.f_locals.items()
var_str = [var_name for var_name, var_val in local_variables if var_val is variable]
Expand All @@ -70,7 +71,7 @@ def get_variable_names(*variable) -> list:
"""
Examples:
x = 1
print(get_variable_names(x)) # ['x']
get_variable_names(x) # ['x']
y = 2
get_variable_names(x, y) # ['x', 'y']
"""
Expand All @@ -96,7 +97,8 @@ def split_list_by_size(lst, chunk_size) -> types.GeneratorType:
Example:
lst = list(range(0, 10))
chunk_size = 3
print(list(split_list_by_size(lst, chunk_size))) # [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]]
lists = split_list_by_size(lst, chunk_size)
list(lists) # [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]]
Reference: https://stackoverflow.com/questions/312443/
"""
Expand All @@ -113,7 +115,8 @@ def split_list(lst, num_of_chunks) -> types.GeneratorType:
Example:
lst = list(range(0, 10))
num_of_chunks = 3
print(list(split_list(lst, num_of_chunks))) # [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9]]
lists = list(split_list(lst, num_of_chunks))
list(lists) # [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9]]
Reference: https://stackoverflow.com/questions/312443/
"""
Expand Down Expand Up @@ -173,7 +176,7 @@ def update_nested_dict(source_dict, updates) -> dict:
Reference: https://stackoverflow.com/questions/3232943/
"""
for key, val in updates.items():
if isinstance(val, collections.Mapping):
if isinstance(val, collections.abc.Mapping):
source_dict[key] = update_nested_dict(source_dict.get(key, {}), val)
elif isinstance(val, list):
source_dict[key] = (source_dict.get(key, []) + val)
Expand Down Expand Up @@ -215,7 +218,7 @@ def get_all_values_from_nested_dict(key, target_dict) -> types.GeneratorType:
elif isinstance(v, dict):
for x in get_all_values_from_nested_dict(key, v):
yield x
elif isinstance(v, collections.Iterable):
elif isinstance(v, collections.abc.Iterable):
for d in v:
if isinstance(d, dict):
for y in get_all_values_from_nested_dict(key, d):
Expand All @@ -242,8 +245,16 @@ def remove_multiple_keys_from_dict(target_dict, *keys):
def get_extreme_outlier_bounds(data_set, k=1.5) -> tuple:
"""
:param data_set: [array-like]
:param k: [numbers.Number]
:return: [tuple]
:param k: [numbers.Number] (default: 1.5)
:return lower_bound, upper_bound: [tuple]
Example:
import pandas as pd
data_set = pd.DataFrame(range(100), columns=['col'])
k = 1.5
lower_bound, upper_bound = get_extreme_outlier_bounds(data, k) # (0.0, 148.5)
"""
q1, q3 = np.percentile(data_set, 25), np.percentile(data_set, 75)
iqr = q3 - q1
Expand All @@ -253,14 +264,18 @@ def get_extreme_outlier_bounds(data_set, k=1.5) -> tuple:


# Calculate interquartile range
def interquartile_range(x) -> numbers.Number:
def interquartile_range(dat) -> numbers.Number:
"""
:param x: [array-like]
An alternative way to scipy.stats.iqr(x)
:param dat: [array-like]
:return: [numbers.Number]
An alternative way to scipy.stats.iqr(x)
Example:
dat = pd.DataFrame(range(100), columns=['col'])
iqr = interquartile_range(dat) # 49.5
"""
iqr = np.subtract(*np.percentile(x, [75, 25]))
iqr = np.subtract(*np.percentile(dat, [75, 25]))
return iqr


Expand Down Expand Up @@ -289,67 +304,82 @@ def find_closest_date(date, date_list, as_datetime=None, fmt="%Y-%m-%d %H:%M:%S.
if isinstance(closest_date, str):
closest_date = pd.to_datetime(closest_date)
else:
if isinstance(closest_date, pd.datetime):
if isinstance(closest_date, datetime.datetime):
closest_date = closest_date.strftime(fmt)
return closest_date


# Colour ramps
def cmap_discretisation(cmap_param, no_of_colours):
def cmap_discretisation(cmap, n_colours):
"""
:param cmap_param: colormap instance, e.g. cm.jet
:param no_of_colours: number of colours
:return: a discrete colormap from the continuous colormap cmap.
:param cmap: [matplotlib.colors.ListedColormap] colormap instance, e.g. matplotlib.cm.jet
:param n_colours: [int] number of colours
:return colour_map: [matplotlib.colors.LinearSegmentedColormap] a discrete colormap from the continuous `cmap`.
Reference: http://sensitivecities.com/so-youd-like-to-make-a-map-using-python-EN.html#.WbpP0T6GNQB
Example:
x = np.resize(np.arange(100), (5, 100))
d_jet = cmap_discretize(cm.jet, 5)
plt.imshow(x, cmap=d_jet)
import matplotlib.cm
import matplotlib.pyplot as plt
Reference: http://sensitivecities.com/so-youd-like-to-make-a-map-using-python-EN.html#.WbpP0T6GNQB
cmap = matplotlib.cm.Accent
n_colours = 5
cm_accent = cmap_discretisation(cmap, n_colours)
x = np.resize(range(100), (5, 100))
plt.imshow(x, cmap=cm_accent)
"""
if isinstance(cmap_param, str):
if isinstance(cmap, str):
import matplotlib.cm
cmap_param = matplotlib.cm.get_cmap(cmap_param)
cmap = matplotlib.cm.get_cmap(cmap)

colours_i = np.concatenate((np.linspace(0, 1., no_of_colours), (0., 0., 0., 0.)))
colours_rgba = cmap_param(colours_i)
indices = np.linspace(0, 1., no_of_colours + 1)
colours_i = np.concatenate((np.linspace(0, 1., n_colours), (0., 0., 0., 0.)))
colours_rgba = cmap(colours_i)
indices = np.linspace(0, 1., n_colours + 1)
c_dict = {}

for ki, key in enumerate(('red', 'green', 'blue')):
c_dict[key] = [(indices[x], colours_rgba[x - 1, ki], colours_rgba[x, ki]) for x in range(no_of_colours + 1)]
c_dict[key] = [(indices[x], colours_rgba[x - 1, ki], colours_rgba[x, ki]) for x in range(n_colours + 1)]

import matplotlib.colors
colour_map = matplotlib.colors.LinearSegmentedColormap(cmap_param.name + '_%d' % no_of_colours, c_dict, 1024)
colour_map = matplotlib.colors.LinearSegmentedColormap(cmap.name + '_%d' % n_colours, c_dict, 1024)

return colour_map


# Colour bars
def colour_bar_index(no_of_colours, cmap_param, labels=None, **kwargs):
def colour_bar_index(cmap, n_colours, labels=None, **kwargs):
"""
:param no_of_colours: [int] number of colors
:param cmap_param: colormap instance, eg. cm.jet
:param labels: [list; None (default)]
:param kwargs:
To stop making off-by-one errors
Takes a standard colour ramp, and discretizes it, then draws a colour bar with correctly aligned labels
:param cmap: [matplotlib.colors.ListedColormap] colormap instance, eg. matplotlib.cm.jet
:param n_colours: [int] number of colors
:param labels: [list; None (default)]
:param kwargs: optional arguments used by `plt.colorbar()`
Reference: http://sensitivecities.com/so-youd-like-to-make-a-map-using-python-EN.html#.WbpP0T6GNQB
Example:
cmap_param = matplotlib.cm.Accent
n_colours = 5
labels = list('abcde')
colour_bar_index(cmap_param, no_of_colours)
colour_bar_index(cmap_param, no_of_colours, labels)
"""
cmap_param = cmap_discretisation(cmap_param, no_of_colours)
cmap = cmap_discretisation(cmap, n_colours)

import matplotlib.cm
mappable = matplotlib.cm.ScalarMappable(cmap=cmap_param)
mappable = matplotlib.cm.ScalarMappable(cmap=cmap)
mappable.set_array(np.array([]))
mappable.set_clim(-0.5, no_of_colours + 0.5)
mappable.set_clim(-0.5, n_colours + 0.5)

import matplotlib.pyplot
colour_bar = matplotlib.pyplot.colorbar(mappable, **kwargs)
colour_bar.set_ticks(np.linspace(0, no_of_colours, no_of_colours))
colour_bar.set_ticklabels(range(no_of_colours))
import matplotlib.pyplot as plt
colour_bar = plt.colorbar(mappable, **kwargs)
colour_bar.set_ticks(np.linspace(0, n_colours, n_colours))
colour_bar.set_ticklabels(range(n_colours))

if labels:
colour_bar.set_ticklabels(labels)
Expand All @@ -358,11 +388,38 @@ def colour_bar_index(no_of_colours, cmap_param, labels=None, **kwargs):


# Detect if a str type column contains 'nan' when reading csv files
def detect_nan_for_str_column(pd_dataframe, column_names=None):
if column_names:
col_names = column_names
else:
col_names = pd_dataframe.columns
for x in col_names:
if 'nan' in [str(v) for v in pd_dataframe[x].unique() if isinstance(v, str) or np.isnan(v)]:
yield pd_dataframe.columns.get_loc(x)
def detect_nan_for_str_column(data, column_names=None):
"""
:param data: [pd.DataFrame]
:param column_names: [iterable; None (default)] specified column names; if None, all columns
:return: [types.GeneratorType] position index of the column that contains NaN
Example:
data = pd.DataFrame(np.resize(range(10), (10, 2)), columns=['a', 'b'])
data.iloc[3, 1] = np.nan
col_pos = detect_nan_for_str_column(data, column_names=None)
list(col_pos) == [1]
"""
if column_names is None:
column_names = data.columns

for x in column_names:
if 'nan' in [str(v) for v in data[x].unique() if isinstance(v, str) or np.isnan(v)]:
yield data.columns.get_loc(x)


# Create a rotation matrix (counterclockwise)
def create_rotation_matrix(theta):
"""
:param theta: [numbers.Number] (in radian)
:return: [numpy.ndarray] of shape (2, 2)
Example:
theta = 30
rotation_mat = create_rotation_matrix(theta)
"""
sin_theta, cos_theta = np.sin(theta), np.cos(theta)
rotation_mat = np.array([[sin_theta, cos_theta], [-cos_theta, sin_theta]])
return rotation_mat

0 comments on commit ff069a1

Please sign in to comment.