In [None]:
# Adding system path
import sys, pathlib
sys.path.append(str(pathlib.Path.cwd().parent.parent))
# sys.path

In [None]:
# Set to show warnings only once
import warnings
warnings.filterwarnings('ignore')

In [None]:
# Setting up displays
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
from dash import Dash,html,dcc,Input,Output
app = Dash(__name__)
import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from tabulate import tabulate
color_scheme=["red","blue","green","orange","purple","brown","pink","gray","olive","cyan","darkviolet","goldenrod","darkgreen","chocolate","lawngreen"]

In [None]:
from dash import Dash,html,dcc

In [None]:
# ##center allign all the figure outputs.
# from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
# from IPython.display import display, HTML
# from plotly.graph_objs import *
# import numpy as np
# init_notebook_mode(connected=True)

# display(HTML("""
# <style>
# .output {
#     display: flex;
#     align-items: center;
#     text-align: center;
# }
# </style>
# """))

In [None]:
# Setting up autoreload for libs
%load_ext autoreload
%autoreload 2
%aimport optiml.queries

In [None]:
# Initialize connection to Snowflake and set analysis date
from optiml.connection import SnowflakeConnConfig
connection = SnowflakeConnConfig(accountname='jg84276.us-central1.gcp',warehousename="XSMALL_WH").create_connection()
# Initialize local environment
import os
cache_dir = os.path.expanduser('~/data/kiva')
# Initialize query library
from optiml.queries import SNFLKQuery
qlib = SNFLKQuery(connection, 'KIV', cache_dir)
sdate = '2022-09-12'
edate = '2022-10-12'
print(f"The analysis is carried our for date range {sdate} to {edate}")

# User Analysis


## Most idle users

In [None]:
## This would be my starting point for users that are a security risk
df = qlib.idle_users(sdate,edate)
df.to_csv('/home/manas/DS_data/idle_users.csv')
# df.head()

In [None]:
# import plotly.express as px
# data_canada = px.data.gapminder().query("country == 'Canada'")
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=df['last_success_login'], y=[0]*len(df['last_success_login'].to_list()), mode='markers', marker_size=20, customdata=df['name'], hovertemplate="<br>".join([
        "Last successful login: %{x}","name: %{customdata}"])
))
fig.update_xaxes(showgrid=False, title_text="last sucess login")
fig.update_yaxes(showgrid=False, 
                 zeroline=True, zerolinecolor='black', zerolinewidth=3,
                 showticklabels=False)
fig.update_layout(height=200, plot_bgcolor='white', title_text="Idle Users")
fig.show()

### Analysis
<div class="alert alert-info">
    
* 39 users have not logged into snowflake before 12th September, 2022.
* 16 users have not logged in since 2020 :
    * 2 users since 2020.
    * 2 users since 2019.
    * 7 users since 2018.
    * 4 users since 2017.
    * 1 user since 2016. 

</div>

### Actions and Recommendations
<div class="alert alert-success">

* Users who have not logged in since 2020 can be disabled or formally onboarded.
    
</div>

## Users that have never logged in

In [None]:
## Users never logged in but not deleted

df = qlib.users_never_logged_in(sdate,edate)

df.head()

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=df['created_on'], y=[0]*len(df['created_on'].to_list()), mode='markers', marker_size=20, customdata=df['name'], hovertemplate="<br>".join([
        "Created on: %{x}","name: %{customdata}"])
))
fig.update_xaxes(showgrid=False, title_text="Created on")
fig.update_yaxes(showgrid=False, 
                 zeroline=True, zerolinecolor='black', zerolinewidth=3,
                 showticklabels=False)
fig.update_layout(height=200, plot_bgcolor='white', title_text="Never logged in")
fig.show()

### Analysis
<div class="alert alert-info">
    
* 4 users have not logged in since date of creation.
* SNOWFLAKE user created an account in June 2016 and hasn't logged in since.
 

</div>

### Actions and Recommendations
<div class="alert alert-success">

* SNOWFLAKE user can be disabled.
    
</div>

## Users and queries that scan full tables

#### Users

In [None]:
df=qlib.users_full_table_scans(sdate,edate)
df.to_csv('/home/manas/DS_data/users_scan_full_tables.csv')
df.head()

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=df['count_of_queries'], y=[0]*len(df['count_of_queries'].to_list()), mode='markers', marker_size=20,customdata=df['user_name'], hovertemplate="<br>".join([
        "Count of queries: %{x}","Username: %{customdata}"])
))
fig.update_xaxes(showgrid=False, title_text="count of queries")
fig.update_yaxes(showgrid=False, 
                 zeroline=True, zerolinecolor='black', zerolinewidth=3,
                 showticklabels=False)
fig.update_layout(height=200, plot_bgcolor='white', title_text="Full table scans")
fig.show()

#### Queries

In [None]:
df=qlib.queries_full_table_scan(sdate,edate)
df.to_csv('/home/manas/DS_data/queries_scan_full_tables.csv')
# df.head()

### Analysis
<div class="alert alert-info">
    
* Users VERTEX_API_PROD,FIVETRAN_USER, FIVETRAN_USER,FIVETRAN_USER_DEV, LOOKER_PROD and VERTEX_API_DEV run queries that scan almost all partitions of the table.


</div>

### Actions and Recommendations
<div class="alert alert-success">

* Users can be trained to run more optimized queries.
* Cluster keys can be defined for the queries they execute so that number of partitions scanned is less.
    
</div>

## Users that scan the most data

In [None]:
df=qlib.heavy_users(start_date="2019-01-01", end_date="2022-02-02")
df.head()

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=df['avg_pct_scanned'], y=[0]*len(df['avg_pct_scanned'].to_list()), mode='markers', marker_size=20,customdata=df['warehouse_name'], hovertemplate="<br>".join([
        "Count of queries: %{x}","warehouse name: %{customdata}"])
))
fig.update_xaxes(showgrid=False, title_text="Average partitions scanned")
fig.update_yaxes(showgrid=False, 
                 zeroline=True, zerolinecolor='black', zerolinewidth=3,
                 showticklabels=False)
fig.update_layout(height=200, plot_bgcolor='white', title_text="Heavy users")
fig.show()

## Idle roles

In [None]:
df=qlib.idle_roles(sdate,edate)
df.head()

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=df['created_on'], y=[0]*len(df['created_on'].to_list()), mode='markers', marker_size=20,customdata=df["name"], hovertemplate="<br>".join([
        "created on: %{x}","name: %{customdata}"])
))
fig.update_xaxes(showgrid=False, title_text="Created on")
fig.update_yaxes(showgrid=False, 
                 zeroline=True, zerolinecolor='black', zerolinewidth=3,
                 showticklabels=False)
fig.update_layout(height=200, plot_bgcolor='white', title_text="Idle roles")
fig.show()

In [None]:
df=qlib.table_streams(sdate,edate)
df.head()

# TASK ANALYSIS

## Failed tasks

In [None]:
df=qlib.failed_tasks(sdate,edate)
df.head()

## Long running tasks

In [None]:
df=qlib.long_running_tasks(sdate,edate)
df.head()

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=df['duration_seconds'], y=[0]*len(df['duration_seconds'].to_list()), mode='markers', marker_size=20,customdata=df["name"], hovertemplate="<br>".join([
        "duration seconds: %{x}","name: %{customdata}"])
))
fig.update_xaxes(showgrid=False, title_text="Created on")
fig.update_yaxes(showgrid=False, 
                 zeroline=True, zerolinecolor='black', zerolinewidth=3,
                 showticklabels=False)
fig.update_layout(height=200, plot_bgcolor='white', title_text="Idle roles")
fig.show()