In [1]:
from scipy.stats import mode, kurtosis, skew
from datetime import datetime,timedelta
import numpy as np
import pandas as pd
import seaborn as sns
from MongoDB.client import SyncDB
import matplotlib.pyplot as plt

startDate = datetime(2021, 10, 10)
endDate = datetime(2022, 1, 14)
data_collection = 'TA_Daily'   
data_column = 'RSI'
data_label = 'RSI'
data_match = {'symbol': 'SPY'} #match_1 = {'Direction': 'Buy', 'symbol': 'SPY'} or match_1={}
data_key = '' # set key_1 = '' when don't need filter 
data_value = '' #value_1 = {'$gte': '00:00:00', '$lt': '00:05:00'}
mode_level = 2 # number of decimals to consider as the same value in getting data mode
BLSHMean = 50 # please set around data mean to get better chart

In [2]:
dateRange = (endDate - startDate).days

data_query = {**data_match, **{'date': {'$gte': startDate, '$lt': endDate}}}

data_list_1 = list(SyncDB.find(data_collection, data_query))
if not data_list_1:
    data_query = {**data_match, **{'date': {'$gte': startDate.strftime('%Y-%m-%d'), '$lt': endDate.strftime('%Y-%m-%d')}}}
    data_list_1 = list(SyncDB.find(data_collection, data_query))
if not data_list_1:
    data_query = {**data_match, **{'date': {'$gte': startDate, '$lt': endDate}}}
    data_list_1 = list(SyncDB.find(data_collection, data_query))
if not data_list_1:
    data_query = {**data_match, **{'Date': {'$gte': startDate.strftime('%Y-%m-%d'), '$lt': endDate.strftime('%Y-%m-%d')}}}
    data_list_1 = list(SyncDB.find(data_collection, data_query))
if not data_list_1:
    data_query = {**data_match, **{'TradeTime': {'$gte': startDate, '$lt': endDate}}}
    pipeline = [{'$match': data_query},
               {'$group' : {
                   '_id': {
                       "year": {"$year": "$TradeTime" },
                       "month": {"$month": "$TradeTime"}, 
                       "day": {"$dayOfMonth": "$TradeTime"}
                   },
                   ('total%s' % data_column): { '$sum': '$%s' % data_column}                   
                }},
               ]
    data_list_1 = list(SyncDB.aggregate(data_collection, pipeline))
    new_data_list = list()
    for record in data_list_1:
        record['date'] = datetime(record['_id']['year'], record['_id']['month'], record['_id']['day'])
        record[column_1] = record['total%s' % column_1]
        new_data_list.append({'date': record['date'], data_column: record[data_column]})
    data_list_1 = new_data_list
data_index = 'date' if 'date' in data_list_1[0] else 'Date'

cols = [data_index, data_column]
df = pd.DataFrame(data_list_1)[cols]
if type(df[data_index]) != pd.core.indexes.datetimes.DatetimeIndex:
    df[data_index] = pd.to_datetime(df[data_index], infer_datetime_format=True)
df.set_index(data_index, drop=True, inplace=True)
df.sort_values(data_index, inplace=True)

df.dropna(how='any', inplace=True)
data = np.array(df[data_column])


In [3]:
# Density plot
def distplot(data, data_label, data_match):
    fig, ax = plt.subplots(figsize=(26, 15))
    fig.subplots_adjust(hspace=0)
    sns.histplot(ax=ax, data=data, kde=True, bins=50, color='blue', 
                edgecolor='darkblue', alpha=0.3,
             line_kws={'linewidth': 3})
    ax.set_xlabel(data_label)
    data_mean = np.mean(data)
    data_median = np.median(data)
    data_mode = mode(np.round(data, mode_level))[0]
    if BLSHMean >= 5 * data_mean:
        print('BLSHMean is more than 5 times larger than data mean, please reset BLSHMean value to obtain better chart.')
    data_sd = ((data - BLSHMean)**2).mean()**0.5
    skewness = skew(data)
    kurto = kurtosis(data, fisher=False)
    ax.axvline(x=data_mean, color='green', linestyle='-.', alpha=1, linewidth=3)
    ax.axvline(x=data_median, color='blue', linestyle='-.', alpha=1, linewidth=3)
    ax.axvline(x=data_mode, color='red', linestyle='-.', alpha=1, linewidth=3)
    ax.axvline(x=BLSHMean + data_sd, color='grey', linestyle='-.', alpha=1, linewidth=3)
    ax.axvline(x=BLSHMean + 2 * data_sd, color='grey', linestyle='-.', alpha=1, linewidth=3)
    ax.axvline(x=BLSHMean + 3 * data_sd, color='grey', linestyle='-.', alpha=1, linewidth=3)
    ax.axvline(x=BLSHMean - data_sd, color='grey', linestyle='-.', alpha=1, linewidth=3)
    ax.axvline(x=BLSHMean - 2 * data_sd, color='grey', linestyle='-.', alpha=1, linewidth=3)
    ax.axvline(x=BLSHMean - 3 * data_sd, color='grey', linestyle='-.', alpha=1, linewidth=3)
    ax.text(0.02, 0.96, "%s" % (str(data_match)[1:-1].replace("'", "")), horizontalalignment='left', color='black', fontsize=20, transform=ax.transAxes)
    ax.text(0.02, 0.93, "mean: %f" % data_mean, horizontalalignment='left', color='green', fontsize=20, transform=ax.transAxes)
    ax.text(0.02, 0.90, "median: %f" % data_median, horizontalalignment='left', color='blue', fontsize=20, transform=ax.transAxes)
    ax.text(0.02, 0.87, "mode: %f" % data_mode, horizontalalignment='left', color='red', fontsize=20, transform=ax.transAxes)
    ax.text(0.02, 0.84, "skewness: %f" % skewness, horizontalalignment='left', color='black', fontsize=20, transform=ax.transAxes)
    ax.text(0.02, 0.81, "kurtosis: %f" % kurto, horizontalalignment='left', color='black', fontsize=20, transform=ax.transAxes)
    ax.text(0.02, 0.78, "BLSHMean: %f" % BLSHMean, horizontalalignment='left', color='black', fontsize=20, transform=ax.transAxes)
    ax.text(0.02, 0.75, "+/- 1SD: %f / %f" % (BLSHMean + data_sd, BLSHMean - data_sd), horizontalalignment='left', color='black', 
            fontsize=20, transform=ax.transAxes)
    ax.text(0.02, 0.72, "+/- 2SD: %f / %f" % (BLSHMean + 2 * data_sd, BLSHMean - 2 * data_sd), horizontalalignment='left', color='black', 
            fontsize=20, transform=ax.transAxes)
    ax.text(0.02, 0.69, "+/- 3SD: %f / %f" % (BLSHMean + 3 * data_sd, BLSHMean - 3 * data_sd), horizontalalignment='left', color='black', 
            fontsize=20, transform=ax.transAxes)
    
    pic_title = 'Megapro Chart %s Distribution Study\n%s-%s' % (data_label, startDate.strftime('%Y%m%d'), endDate.strftime('%Y%m%d'))
    fig.suptitle(pic_title, fontsize=30, y=0.95)
    fig.subplots_adjust(top=0.88)
    # save plot to file
    filename = ('%s_Distribution_%s-%s.png' % (data_label, startDate.strftime('%y%m'), endDate.strftime('%y%m'))).replace(' ', '')
    plt.savefig(filename)
    url = 'https://jbook123456.megagurus.net/user/yourusername/view/%s' % filename
    print(url)

In [None]:
distplot(data, data_label, data_match)