## Install required packages

In [None]:
# !pip install psycopg2-binary
!pip --version

!pip install -r requirements.txt

## Do general imports

In [None]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import math as math
from sklearn import preprocessing
from feature_engine.discretisation import ArbitraryDiscretiser

from preprocessing.issues_clustering import TotalTimeClusteringPreProcess
import preprocessing.projects as ppp
import exploration.analytics_plots as vs

pd.set_option('display.max_columns', None)
plt.rcParams.update({'font.size': 14})


## Load Datasets

In [None]:
issues_df = pd.read_csv('./temp_data/issues.csv', index_col=["id"])
issues_df['issue_created'] = pd.to_datetime(issues_df['issue_created'])
print(F'Total records in dataset %i' % len(issues_df))

### Plot issues per year

In [None]:
fig = plt.figure(figsize=(10,5))

ax = fig.add_subplot(111)
df = issues_df[(issues_df['issue_created'] <= '2022-12-31')]
vs.plot_issues_by_year(df,ax)

### Keep the issues reported in 2022 for the study

In [None]:


# types = ['Ticket','Service','Deployment','HD Service','Project']
types = ['Ticket','Deployment','HD Service']

issues_df = issues_df[(issues_df['issue_proj'].str.match('\w{2}\d{2}\w{1,}'))
                      & (issues_df['issue_type'].isin(types))
                      & (issues_df['issue_created'] >= '2022-01-01')
                      & (issues_df['issue_created'] <= '2022-12-31')
                      & pd.notna(issues_df['issue_resolution_date'])]
                      # & (issues_df['issue_priority'] == 'High')]
print(F'Total records after filter %i' % len(issues_df))

issues_df.head(1)

### Find number of tickets peer type

In [None]:
print(issues_df['issue_type'].value_counts())
print(len(issues_df['issue_proj'].unique()))

In [None]:
issues_df.head(2)

### Explore reported issues by priority

In [None]:
# plt.autoscale(enable=False)

fig = plt.figure(figsize=(10, 6))

ax = fig.add_subplot(111)
vs.plot_issues_by_month(issues_df, ax)

fig = plt.figure(figsize=(10, 5))
ax = fig.add_subplot(111)
vs.plot_issues_by_priority(issues_df, ax)

# fig = plt.figure(figsize=(10, 5))
# ax = fig.add_subplot(111)
# vs.plot_total_time_bin(issues_df, result, ax)

# fig = plt.figure(figsize=(10, 5))
# ax = fig.add_subplot(111)
# vs.plot_box_by_issue_category(issues_df, result, ax)

## Inspect comments

### Explore issues spent time Vs. Comments

In [None]:
fig = plt.figure(figsize=(10,5))
ax = fig.add_subplot(111)
# ax.autoscale(enable=False)
vs.plot_comments_count_frequency(issues_df, ax)

fig = plt.figure(figsize=(10,2))
ax = fig.add_subplot(111)
vs.plot_comments_count_summary(issues_df, ax)

fig = plt.figure(figsize=(10,5))
ax = fig.add_subplot(111)
vs.plot_comments_count_time_spent(issues_df, ax)

## Total spent time analysis

### Plot summary for each workflow step

In [None]:
vs.wf_steps_summary(issues_df, divider = 60).round(2)

In [None]:
fig = plt.figure(figsize=(10, 10))
ax = fig.add_subplot(111)

vs.plot_wf_spent_summary(issues_df, ax,divider = 60*60)

### plot summary for total workflow time

In [None]:
fig = plt.figure(figsize=(15, 2))

ax = fig.add_subplot(111)
vs.plot_wf_total_time_summary(issues_df, ax)

fig = plt.figure(figsize=(15, 2))
ax = fig.add_subplot(111)
vs.plot_issues_processing_steps(issues_df, ax)

issues_df[['processing_steps']].describe().transpose()

In [None]:
t = issues_df[['wf_total_time']].copy()
t['wf_total_time'] = t['wf_total_time']/(60*60*24)
t[['wf_total_time']].describe().transpose()

### Explore the relation between steps count and total processing time

In [None]:
# fig = plt.figure(figsize=(20, 6))

# max_ps = issues_df['processing_steps'].max()
# max_wf_time = issues_df['wf_total_time'].max()

# proj_cat = issues_df['proj_category'].drop_duplicates().sort_values()
# for i,c in enumerate(proj_cat):    
#     ax = fig.add_subplot(2,3,i+1)
#     df = issues_df[issues_df['proj_category'] == c]
#     vs.plot_relation_between_processing_steps_and_time(df, ax, max_x = math.ceil(max_wf_time), max_y=math.ceil(max_ps))

fig = plt.figure(figsize=(10, 5))
ax = fig.add_subplot(111)
vs.plot_relation_between_processing_steps_and_time(issues_df, ax)

fig = plt.figure(figsize=(10, 5))
ax = fig.add_subplot(111)
vs.plot_processing_steps_frequency(issues_df, ax)

### Explore number of contributors for each ticket

In [None]:
contr = issues_df[['issue_contr_count']]

fig = plt.figure(figsize=(10, 2))
ax = fig.add_subplot(111)

vs.plot_issue_contributors_summary(contr, ax)

fig = plt.figure(figsize=(5, 5))
ax = fig.add_subplot(111)
vs.plot_issue_contributors_frequency(contr, ax)

fig = plt.figure(figsize=(5, 5))
ax = fig.add_subplot(111)
vs.plot_issue_contributors_to_total_spent_time(issues_df, ax)

fig = plt.figure(figsize=(5, 5))
ax = fig.add_subplot(111)
vs.plot_issue_contributors_to_total_comments(issues_df, ax)