Skip to content

Commit

Permalink
Update comments and improve all functions
Browse files Browse the repository at this point in the history
  • Loading branch information
mikeqfu committed Sep 10, 2019
1 parent ffa1220 commit 3680975
Showing 1 changed file with 120 additions and 86 deletions.
206 changes: 120 additions & 86 deletions pyhelpers/store.py
Expand Up @@ -8,65 +8,71 @@
import feather
import rapidjson

from pyhelpers.misc import confirmed


# Save feather file
def save_feather(feather_data, path_to_feather):
def save_feather(feather_data, path_to_feather, verbose=False):
"""
:param feather_data: [pd.DataFrame] to be dumped as a 'feather-formatted' file
:param feather_data: [pd.DataFrame] pd.DataFrame to be dumped as a 'feather-formatted' file
:param path_to_feather: [str] local file path
:return: whether the data has been successfully saved
:param verbose: [bool] whether to print note (default: False)
:return: printing message showing whether or not the data has been successfully saved or updated
"""
# assert isinstance(feather_data, pd.DataFrame)
feather_filename = os.path.basename(path_to_feather)
msg = "{} \"{}\" ... ".format("Updating" if os.path.isfile(path_to_feather) else "Saving", feather_filename)
print(msg, end="")
msg = "{} \"{}\"".format("Updating" if os.path.isfile(path_to_feather) else "Saving", feather_filename)
if verbose:
print(msg, end=" ... ")
try:
os.makedirs(os.path.dirname(os.path.abspath(path_to_feather)), exist_ok=True)
feather_data.to_feather(path_to_feather)
print("Successfully.")
except ValueError:
print("Possible issue occurred to column names: 'feather' must have string column names.")
print("Trying to use \"feather-format\" ... ", end="")
print("Successfully.") if verbose else None
except ValueError as ve:
print("Failed with \"DataFrame.to_feather()\". {}. \n"
"Trying to use \"feather-format\" instead".format(ve), end=" ... \n") if verbose else None
feather.write_dataframe(feather_data, path_to_feather)
print("{} successfully.".format(msg.lower().replace(" ... ", "")))
if verbose:
print("{} ... Successfully. "
"(Use \"load_feather()\" to retrieve the saved feather file and check the consistency.)".format(msg))
except Exception as e:
print("{} failed. {}.".format(msg.lower().replace(" ... ", ""), e))
print("{} ... Failed. {}.".format(msg, e)) if verbose else None


# Load feather file
def load_feather(path_to_feather, columns=None, use_threads=True, verbose=False):
"""
:param path_to_feather: [str] local file path to a feather-formatted file
:param columns: [sequence]
:param use_threads: [bool]
:param verbose: [bool] whether to print note
:return: [pd.DataFrame] retrieved from the feather file
:param columns: [array-like; None (default)] column labels
:param use_threads: [bool] (default: True)
:param verbose: [bool] whether to print note (default: False)
:return: [pd.DataFrame] pd.DataFrame retrieved from the specified path
"""
print("Loading \"{}\" ... ".format(os.path.basename(path_to_feather)), end="") if verbose else None
print("Loading \"{}\"".format(os.path.basename(path_to_feather)), end=" ... ") if verbose else None
feather_data = feather.read_dataframe(path_to_feather, columns, use_threads)
print("Successfully.") if verbose else None
return feather_data


# Save Pickle file
def save_pickle(pickle_data, path_to_pickle, verbose=True):
def save_pickle(pickle_data, path_to_pickle, mode='wb', verbose=True):
"""
:param pickle_data: any object that could be dumped by the 'pickle' package
:param pickle_data: data that could be dumped by the 'pickle' package
:param path_to_pickle: [str] local file path
:param verbose: [bool]
:return: whether the data has been successfully saved
:param mode: [str] (default: 'wb')
:param verbose: [bool] whether to print note (default: False)
:return: printing message showing whether or not the data has been successfully saved or updated
"""
pickle_filename = os.path.basename(path_to_pickle)
pickle_dir = os.path.basename(os.path.dirname(path_to_pickle))
pickle_dir_parent = os.path.basename(os.path.dirname(os.path.dirname(path_to_pickle)))

if verbose:
print("{} \"{}\" ... ".format("Updating" if os.path.isfile(path_to_pickle) else "Saving",
" - ".join([pickle_dir_parent, pickle_dir, pickle_filename])), end="")

print("{} \"{}\"".format("Updating" if os.path.isfile(path_to_pickle) else "Saving",
" - ".join([pickle_dir_parent, pickle_dir, pickle_filename])), end=" ... ")
try:
os.makedirs(os.path.dirname(os.path.abspath(path_to_pickle)), exist_ok=True)
pickle_out = open(path_to_pickle, 'wb')
pickle_out = open(path_to_pickle, mode=mode)
pickle.dump(pickle_data, pickle_out)
pickle_out.close()
print("Successfully.") if verbose else None
Expand All @@ -75,37 +81,39 @@ def save_pickle(pickle_data, path_to_pickle, verbose=True):


# Load Pickle file
def load_pickle(path_to_pickle, verbose=False):
def load_pickle(path_to_pickle, mode='rb', verbose=False):
"""
:param path_to_pickle: [str] local file path
:param verbose: [bool] Whether to print note
:return: the object retrieved from the pickle
:param mode: [str] (default: 'rb')
:param verbose: [bool] whether to print note (default: False)
:return: data retrieved from the specified path
"""
print("Loading \"{}\" ... ".format(os.path.basename(path_to_pickle)), end="") if verbose else None
pickle_in = open(path_to_pickle, 'rb')
print("Loading \"{}\"".format(os.path.basename(path_to_pickle)), end=" ... ") if verbose else None
pickle_in = open(path_to_pickle, mode=mode)
pickle_data = pickle.load(pickle_in)
pickle_in.close()
print("Successfully.") if verbose else None
return pickle_data


# Save JSON file
def save_json(json_data, path_to_json, verbose=True):
def save_json(json_data, path_to_json, mode='w', verbose=True):
"""
:param json_data: any object that could be dumped by the 'json' package
:param json_data: data that could be dumped by the 'json' package
:param path_to_json: [str] local file path
:param verbose: [bool]
:return: whether the data has been successfully saved
:param mode: [str] (default: 'w')
:param verbose: [bool] whether to print note (default: False)
:return: printing message showing whether or not the data has been successfully saved or updated
"""
json_filename = os.path.basename(path_to_json)
json_dir = os.path.basename(os.path.dirname(path_to_json))
json_dir_parent = os.path.basename(os.path.dirname(os.path.dirname(path_to_json)))

print("{} \"{}\" ... ".format("Updating" if os.path.isfile(path_to_json) else "Saving",
" - ".join([json_dir_parent, json_dir, json_filename])), end="") if verbose else None
print("{} \"{}\"".format("Updating" if os.path.isfile(path_to_json) else "Saving",
" - ".join([json_dir_parent, json_dir, json_filename])), end=" ... ") if verbose else None
try:
os.makedirs(os.path.dirname(os.path.abspath(path_to_json)), exist_ok=True)
json_out = open(path_to_json, 'w')
json_out = open(path_to_json, mode=mode)
rapidjson.dump(json_data, json_out)
json_out.close()
print("Successfully.") if verbose else None
Expand All @@ -114,37 +122,40 @@ def save_json(json_data, path_to_json, verbose=True):


# Load JSON file
def load_json(path_to_json, verbose=False):
def load_json(path_to_json, mode='r', verbose=False):
"""
:param path_to_json: [str] local file path
:param verbose: [bool] Whether to print note
:return: the json data retrieved
:param mode: [str] (default: 'r')
:param verbose: [bool] whether to print note (default: False)
:return: data retrieved from the specified path
"""
print("Loading \"{}\" ... ".format(os.path.basename(path_to_json)), end="") if verbose else None
json_in = open(path_to_json, 'r')
print("Loading \"{}\"".format(os.path.basename(path_to_json)), end=" ... ") if verbose else None
json_in = open(path_to_json, mode=mode)
json_data = rapidjson.load(json_in)
json_in.close()
print("Successfully.") if verbose else None
return json_data


# Save Excel workbook
def save_excel(excel_data, path_to_excel, sep, index, sheet_name, engine='xlsxwriter'):
def save_excel(excel_data, path_to_excel, sep, index, sheet_name, engine='xlsxwriter', verbose=False):
"""
:param excel_data: any [DataFrame] that could be dumped saved as a Excel workbook, e.g. '.csv', '.xlsx'
:param excel_data: [pd.DataFrame] data that could be saved as a Excel workbook, e.g. ".csv", ".xlsx"
:param path_to_excel: [str] local file path
:param sep: [str] separator for saving excel_data to a '.csv' file
:param index: [bool]
:param sheet_name: [str] name of worksheet for saving the excel_data to a e.g. '.xlsx' file
:param engine: [str] ExcelWriter engine; pandas writes Excel files using the 'xlwt' module for '.xls' files and the
'openpyxl' or 'xlsxWriter' modules for '.xlsx' files.
:return: whether the data has been successfully saved or updated
:param sep: [str] separator for saving 'excel_data' as a ".csv" file
:param index: [bool] whether to include the index as a column
:param sheet_name: [str] name of worksheet for saving the excel_data (for example, as a ".xlsx" file)
:param engine: [str] ExcelWriter engine. pandas writes Excel files using the 'xlwt' module for ".xls" files and the
'openpyxl' or 'xlsxwriter' (default) for ".xlsx" files.
:param verbose: [bool] whether to print note (default: False)
:return: printing message showing whether or not the data has been successfully saved or updated
"""
import pandas as pd

excel_filename = os.path.basename(path_to_excel)
_, save_as = os.path.splitext(excel_filename)
print("{} \"{}\" ... ".format("Updating" if os.path.isfile(path_to_excel) else "Saving", excel_filename), end="")
if verbose:
print("{} \"{}\"".format("Updating" if os.path.isfile(path_to_excel) else "Saving", excel_filename),
end=" ... ")
try:
os.makedirs(os.path.dirname(os.path.abspath(path_to_excel)), exist_ok=True)
if excel_filename.endswith(".csv"): # Save the data to a .csv file
Expand All @@ -160,91 +171,113 @@ def save_excel(excel_data, path_to_excel, sep, index, sheet_name, engine='xlsxwr
freeze_panes=None)
xlsx_writer.save()
xlsx_writer.close()
print("Successfully.")
print("Successfully.") if verbose else None
except Exception as e:
print("Failed. {}.".format(e))
print("Failed. {}.".format(e)) if verbose else None


# Save data locally (".pickle", ".csv", ".xlsx" or ".xls")
def save(data, path_to_file, sep=',', index=False, sheet_name='Sheet1', engine='xlsxwriter', deep_copy=True):
def save(data, path_to_file, sep=',', index=False, sheet_name='Sheet1', engine='xlsxwriter', deep_copy=True,
verbose=False):
"""
:param data: any object that could be dumped
:param data: data that could be dumped as .feather, .json, .pickle and .csv/.xlsx/.xls
:param path_to_file: [str] local file path
:param sep: [str] separator for '.csv'
:param index:
:param engine: [str] 'xlwt' for .xls; 'xlsxwriter' or 'openpyxl' for .xlsx
:param sheet_name: [str] name of worksheet
:param deep_copy: [bool] whether make a deep copy of the data before saving it
:return: whether the data has been successfully saved or updated
:param sep: [str] separator for ".csv" (default: ',')
:param index: [bool] whether to include the index as a column (default: False)
:param engine: [str] 'xlsxwriter' (default) or 'openpyxl' for .xlsx; 'xlwt' for .xls
:param sheet_name: [str] name of worksheet (default: 'Sheet1' )
:param deep_copy: [bool] whether make a deep copy of the data before saving it (default: True)
:param verbose: [bool] whether to print note (default: False)
:return: printing message showing whether or not the data has been successfully saved or updated
"""
import pandas as pd

# Make a copy the original data
dat = copy.deepcopy(data) if deep_copy else copy.copy(data)

# The specified path exists?
os.makedirs(os.path.dirname(os.path.abspath(path_to_file)), exist_ok=True)

import pandas as pd
if isinstance(dat, pd.DataFrame) and dat.index.nlevels > 1:
dat.reset_index(inplace=True)

# Save the data according to the file extension
if path_to_file.endswith((".csv", ".xlsx", ".xls")):
save_excel(dat, path_to_file, sep, index, sheet_name, engine)
save_excel(dat, path_to_file, sep, index, sheet_name, engine, verbose=verbose)
elif path_to_file.endswith(".feather"):
save_feather(dat, path_to_file, verbose=verbose)
elif path_to_file.endswith(".json"):
save_json(dat, path_to_file)
save_json(dat, path_to_file, verbose=verbose)
else:
save_pickle(dat, path_to_file)
if not path_to_file.endswith(".pickle"):
print("Note that the file extension is not among the recognisable formats of this 'save()' function.")
if path_to_file.endswith(".pickle"):
save_pickle(dat, path_to_file, verbose=verbose)
else:
print("Note that the current file extension is not recognisable by this \"save()\" function.")
if confirmed("To save \"{}\" as a .pickle file? ".format(os.path.basename(path_to_file))):
save_pickle(dat, path_to_file, verbose=verbose)


# Save a figure using matplotlib.pyplot.savefig and Inkscape
def save_fig(path_to_fig_file, dpi):
import matplotlib.pyplot as plt
def save_fig(path_to_fig_file, dpi=None, verbose=False):
"""
:param path_to_fig_file: [str]
:param dpi: [int; None (default)]
:param verbose: [bool] whether to print note; False (default)
:return: printing message showing whether or not the figure has been successfully saved or updated
"""
fig_filename = os.path.basename(path_to_fig_file)
print("{} \"{}\" ... ".format("Updating" if os.path.isfile(path_to_fig_file) else "Saving", fig_filename), end="")
if verbose:
print("{} \"{}\"".format("Updating" if os.path.isfile(path_to_fig_file) else "Saving", fig_filename),
end=" ... ")
try:
_, save_as = os.path.splitext(path_to_fig_file)
# assert save_as.strip(".") in plt.gcf().canvas.get_supported_filetypes().keys()
import matplotlib.pyplot as plt
plt.savefig(path_to_fig_file, dpi=dpi)
if save_as == ".svg" and os.path.isfile("C:\\Program Files\\Inkscape\\inkscape.exe"):
path_to_emf = path_to_fig_file.replace(save_as, ".emf")
subprocess.call(["C:\\Program Files\\Inkscape\\inkscape.exe", '-z', path_to_fig_file, '-M', path_to_emf])
print("Successfully.")
print("Successfully.") if verbose else None
except Exception as e:
print("Failed. {}.".format(e))
print("Failed. {}.".format(e)) if verbose else None


# Save a .svg file as a .emf file
def save_svg_as_emf(path_to_svg, path_to_emf):
def save_svg_as_emf(path_to_svg, path_to_emf, verbose=False):
"""
:param path_to_svg: [str]
:param path_to_emf: [str]
:param verbose: [bool] whether to print note; False (default)
:return: printing message showing whether or not the figure has been successfully saved or updated
"""
path_to_inkscape = "C:\\Program Files\\Inkscape\\inkscape.exe"
if os.path.isfile(path_to_inkscape):
print("Converting \".svg\" to \".emf\" ... ", end="")
print("Converting \".svg\" to \".emf\"", end=" ... ") if verbose else None
try:
subprocess.call([path_to_inkscape, '-z', path_to_svg, '-M', path_to_emf])
print("Done. \nThe .emf file is saved to \"{}\".".format(path_to_emf))
print("Done. \nThe .emf file is saved to \"{}\".".format(path_to_emf)) if verbose else None
except Exception as e:
print("Failed. {}".format(e))
print("Failed. {}".format(e)) if verbose else None
else:
print("\"Inkscape\" (https://inkscape.org) is required to run this function. It is not found on this device.")
print("\"Inkscape\" (https://inkscape.org) is required to run this function. It is not found on this device.") \
if verbose else None


# Save a web page as a PDF file
def save_web_page_as_pdf(url_to_web_page, path_to_pdf, page_size='A4', zoom=1.0, encoding='UTF-8'):
def save_web_page_as_pdf(url_to_web_page, path_to_pdf, page_size='A4', zoom=1.0, encoding='UTF-8', verbose=False):
"""
:param url_to_web_page: [str] URL of a web page
:param path_to_pdf: [str] local file path
:param page_size: [str]
:param zoom: [float]
:param encoding: [str]
:param page_size: [str] 'A4' (default)
:param zoom: [float] 1.0 (default)
:param encoding: [str] 'UTF-8' (default)
:param verbose: [bool] whether to print note; False (default)
"""
import pdfkit

path_to_wkhtmltopdf = "C:\\Program Files\\wkhtmltopdf\\bin\\wkhtmltopdf.exe"
if os.path.isfile(path_to_wkhtmltopdf):
try:
print("Saving the web page \"{}\" as PDF ... ".format(url_to_web_page), end="")
print("Saving the web page \"{}\" as PDF".format(url_to_web_page), end=" ... ") if verbose else None
config = pdfkit.configuration(wkhtmltopdf=path_to_wkhtmltopdf)
pdf_options = {'page-size': page_size,
# 'margin-top': '0',
Expand All @@ -255,8 +288,9 @@ def save_web_page_as_pdf(url_to_web_page, path_to_pdf, page_size='A4', zoom=1.0,
'encoding': encoding}
status = pdfkit.from_url(url_to_web_page, path_to_pdf, configuration=config, options=pdf_options)
print("Done. \nThe web page is saved to \"{}\"".format(path_to_pdf)
if status else "Failed. Check if the URL is available.")
if status else "Failed. Check if the URL is available.") if verbose else None
except Exception as e:
print("Failed. {}".format(e))
print("Failed. {}".format(e)) if verbose else None
else:
print("\"wkhtmltopdf\" (https://wkhtmltopdf.org) not found. It is required to run this function.")
print("\"wkhtmltopdf\" (https://wkhtmltopdf.org) is required to run this function. "
"It is not found on this device.") if verbose else None

0 comments on commit 3680975

Please sign in to comment.