# Include-Setup-Functions
- Import libraries for handling data
- Set plotting styles
- Set how dataframes are rendered in Jupyter notebooks

In [None]:
# Python logging to monitor gensim models
#### TURN ON AS NEEDED ####
#import logging
#logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)

## Packages for computation and data manipulation

In [None]:
import numpy as np # for number crunching
import pandas as pd # for data loading and manipulation
import time
import pickle
import re
import random
import re # regex
import itertools
from itertools import groupby
import collections
from collections import Counter
from collections import defaultdict
from datetime import date, datetime, timedelta
import string
import io

import scipy
from scipy import stats
import scipy.sparse as sp

# To quantify the distance between any two distributions
from scipy.spatial import distance

# To calculate the pairwise distance of items in a list using pre-defined or custom-defined distance metrics
from sklearn.metrics.pairwise import pairwise_distances

# For visualizing cluster boundaries
from scipy.spatial import Voronoi

# For symbol manipulation
import sympy

## Scikit Learn packages for ML models

In [None]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.utils import shuffle
from sklearn.preprocessing import MinMaxScaler, StandardScaler

from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV

from sklearn import svm
from sklearn.svm import SVC
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_samples, silhouette_score
from sklearn.decomposition import PCA

from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix, f1_score

## Useful programming tips
- Flattening a list of lists into a single list
 - https://stackoverflow.com/questions/952914/making-a-flat-list-out-of-list-of-lists-in-python
 - flat_list = [item for sublist in K for item in sublist] where K is the list of lists

## Set up plotting packages, modules, styles

In [1]:
# Import the pyplot module from the matplotlib library
#from matplotlib import pyplot as plt
#import matplotlib as mpl
import matplotlib as mpl, matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from mpl_toolkits.mplot3d import Axes3D
# Use Jupyter magics to plot inline without needing to call plt.show()
# From the documentation (https://stackoverflow.com/questions/43027980/)
# "With backend = 'inline', the output of plotting commands is displayed inline within frontends 
#   like the Jupyter notebook, directly below the code cell that produced it. 
#   The resulting plots will then also be stored in the notebook document."
#%matplotlib inline

In [None]:
# Import the Seaborn library (by Michael Waskom)
import seaborn as sns
# Set the visual styles
## See https://seaborn.pydata.org/tutorial/aesthetics.html for settings
sns.set(context = 'notebook', 
        palette = 'bright', 
        font = 'sans-serif', 
        font_scale = 1.3, 
        color_codes = True, 
        rc = None
       )
sns.set_palette("husl")
#sns.set_palette("Set2")

In [None]:
# The current Seaborn styles
#sns.axes_style()

In [2]:
# List the matplotlib styles available
print(plt.style.available)

['bmh', 'classic', 'dark_background', 'fast', 'fivethirtyeight', 'ggplot', 'grayscale', 'seaborn-bright', 'seaborn-colorblind', 'seaborn-dark-palette', 'seaborn-dark', 'seaborn-darkgrid', 'seaborn-deep', 'seaborn-muted', 'seaborn-notebook', 'seaborn-paper', 'seaborn-pastel', 'seaborn-poster', 'seaborn-talk', 'seaborn-ticks', 'seaborn-white', 'seaborn-whitegrid', 'seaborn', 'Solarize_Light2', 'tableau-colorblind10', '_classic_test']


In [None]:
#### SET the matplotlib style HERE ####
style = 'seaborn-whitegrid'
plt.style.use(style)
# See https://xkcd.com/color/rgb/ for xkcd named colors
xkcd_colors = ["blue", "hot pink", "violet", "olive", "lime green", "lemon yellow", "goldenrod", "dark orange", "periwinkle", "black"]

In [None]:
#### UNCOMMENT TO TEST ####
# Test out the style settings
#print("Here's what the {} style looks like...".format(style))
#fig, axes = plt.subplots(1, 4, figsize=(12, 4))
#axes[0].set_xlim(0, 0.5)

In [None]:
# Plotnine for ggplot
## NOTE: This will throw the following warning:
### FutureWarning: The pandas.core.datetools module is deprecated and will be removed in a future version. 
### Please use the pandas.tseries module instead. from pandas.core import datetools
### Not sure how to handle it.

#from plotnine import ggplot
#from plotnine import *

## Jupyter notebook settings

In [None]:
# Make sure all columns of a dataframe are displayed
# https://stackoverflow.com/questions/47022070/
from IPython.display import display
#pd.options.display.max_columns = None

In [None]:
# Make sure that a dataframe column value (e.g., a large text field) is not truncated
# https://stackoverflow.com/questions/25351968
#pd.set_option('display.max_colwidth', None)
#pd.set_option('display.max_rows', 100)

## UPDATED May 19, 2021
## https://stackoverflow.com/questions/49188960/how-to-show-all-of-columns-name-on-pandas-dataframe
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', 100)

In [None]:
# Configure slide scrolling
# from hfinger at https://github.com/damianavila/RISE/issues/185
#### NOTE: Have to restart notebook server after running it the first time ####
from notebook.services.config import ConfigManager
cm = ConfigManager()
cm.update('livereveal', {'width': 1024, 'height': 768, 'scroll': True})

## Seaborn cmap options

In [None]:
# Seaborn heatmap cmap value options: 
cmap_options = ['Accent', 'Accent_r', 'Blues', 'Blues_r', 'BrBG', 'BrBG_r', 'BuGn', 'BuGn_r', 'BuPu', 'BuPu_r', 
                'CMRmap', 'CMRmap_r', 'Dark2', 'Dark2_r', 'GnBu', 'GnBu_r', 'Greens', 'Greens_r', 'Greys', 'Greys_r', 
                'OrRd', 'OrRd_r', 'Oranges', 'Oranges_r', 'PRGn', 'PRGn_r', 'Paired', 'Paired_r', 'Pastel1', 'Pastel1_r', 
                'Pastel2', 'Pastel2_r', 'PiYG', 'PiYG_r', 'PuBu', 'PuBuGn', 'PuBuGn_r', 'PuBu_r', 'PuOr', 'PuOr_r', 
                'PuRd', 'PuRd_r', 'Purples', 'Purples_r', 'RdBu', 'RdBu_r', 'RdGy', 'RdGy_r', 'RdPu', 'RdPu_r', 'RdYlBu', 
                'RdYlBu_r', 'RdYlGn', 'RdYlGn_r', 'Reds', 'Reds_r', 'Set1', 'Set1_r', 'Set2', 'Set2_r', 'Set3', 'Set3_r', 
                'Spectral', 'Spectral_r', 'Wistia', 'Wistia_r', 'YlGn', 'YlGnBu', 'YlGnBu_r', 'YlGn_r', 'YlOrBr', 
                'YlOrBr_r', 'YlOrRd', 'YlOrRd_r', 'afmhot', 'afmhot_r', 'autumn', 'autumn_r', 'binary', 'binary_r', 
                'bone', 'bone_r', 'brg', 'brg_r', 'bwr', 'bwr_r', 'cividis', 'cividis_r', 'cool', 'cool_r', 'coolwarm', 
                'coolwarm_r', 'copper', 'copper_r', 'crest', 'crest_r', 'cubehelix', 'cubehelix_r', 'flag', 'flag_r', 
                'flare', 'flare_r', 'gist_earth', 'gist_earth_r', 'gist_gray', 'gist_gray_r', 'gist_heat', 'gist_heat_r', 
                'gist_ncar', 'gist_ncar_r', 'gist_rainbow', 'gist_rainbow_r', 'gist_stern', 'gist_stern_r', 'gist_yarg', 
                'gist_yarg_r', 'gnuplot', 'gnuplot2', 'gnuplot2_r', 'gnuplot_r', 'gray', 'gray_r', 'hot', 'hot_r', 'hsv', 
                'hsv_r', 'icefire', 'icefire_r', 'inferno', 'inferno_r', 'jet', 'jet_r', 'magma', 'magma_r', 'mako', 
                'mako_r', 'nipy_spectral', 'nipy_spectral_r', 'ocean', 'ocean_r', 'pink', 'pink_r', 'plasma', 'plasma_r', 
                'prism', 'prism_r', 'rainbow', 'rainbow_r', 'rocket', 'rocket_r', 'seismic', 'seismic_r', 'spring', 
                'spring_r', 'summer', 'summer_r', 'tab10', 'tab10_r', 'tab20', 'tab20_r', 'tab20b', 'tab20b_r', 'tab20c', 
                'tab20c_r', 'terrain', 'terrain_r', 'turbo', 'turbo_r', 'twilight', 'twilight_r', 'twilight_shifted', 
                'twilight_shifted_r', 'viridis', 'viridis_r', 'vlag', 'vlag_r', 'winter', 'winter_r']