# Planning report
## Create new tickets with fitting predictions easily

In [72]:
import ipywidgets as widgets
import pandas as pd
import plotly.express as px
from plotly.offline import iplot, init_notebook_mode
init_notebook_mode(connected=True)

import xgboost as xgb
import shap
import numpy as np
import cufflinks as cf
cf.go_offline(connected=True)
cf.set_config_file(colorscale='plotly', world_readable=True)

# Extra options
pd.options.display.max_rows = 30
pd.options.display.max_columns = 25

from datetime import date
from ipywidgets import interact, interact_manual
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from src.db.utils import SnowflakeWrapper
from src.config import data_root
from src.compute.utils import Interval
from src.compute.tickets import get_tickets, get_ticket_counts, get_unresolved_ticket_counts
from src.compute.developer import get_developers, get_developer_ids, tickets_assigned_per_day
# conn = SnowflakeWrapper.create_snowflake_connection()
# sw = SnowflakeWrapper(conn)


breakdown_labels = {
    "Issue Type": "issueType",
    "Issue Priority": "issuePriority"
}

In [73]:
# def plot_ticket_counts(breakdown):
#     plot_df = get_ticket_counts(sw, [breakdown_labels[breakdown], "resolved"])
#     sum_all = plot_df["COUNT"].sum()
#     fig = px.histogram(
#         plot_df,
#         x=breakdown_labels[breakdown].upper(),
#         y="COUNT",
#         color="RESOLVED",
#         barmode="group",
#         title=f"All tickets [{sum_all}] broken down by {breakdown}",
#         hover_data=plot_df.columns
#     )
#     fig.update_layout(
#         bargap=0.01,
#         yaxis_title="Number of tickets",
#         xaxis_title=breakdown,
#     )
#     return fig
# 
# _ = interact(
#     plot_ticket_counts,
#     breakdown=widgets.Dropdown(options=breakdown_labels.keys(), description="Breakdown by"),
# )

In [74]:
# unresolved_breakdown_labels = {
#     "Issue Type": "issueType",
#     "Issue Priority": "issuePriority",
#     "Status": "status"
# }
# def plot_unresolved_ticket_counts(date_from, date_to, all_unresolved, breakdown):
#     current_interval = Interval(date_from, date_to)
#     plot_df = get_unresolved_ticket_counts(sw, current_interval, all_unresolved_until=all_unresolved, breakdowns=[unresolved_breakdown_labels[breakdown]])
#     fig = px.histogram(
#         plot_df,
#         x="DAYSUNRESOLVED",
#         color=unresolved_breakdown_labels[breakdown].upper(),
#         barmode="group",
#         marginal="rug",
#         title=f"All unresolved tickets broken down by {breakdown}",
#         hover_data=plot_df.columns
#     )
#     fig.update_layout(
#         bargap=0.01,
#         yaxis_title="Number of tickets",
#         xaxis_title="Days unresolved",
#     )
#     return fig
# 
# _ = interact(
#     plot_unresolved_ticket_counts,
#     date_from = widgets.DatePicker(value=date(2019,10,1)),
#     date_to = widgets.DatePicker(value=date(2020,1,1)),
#     all_unresolved = widgets.Checkbox(value=False, description='All unresolved'),
#     breakdown=widgets.Dropdown(options=unresolved_breakdown_labels.keys(), description="Breakdown by"),
# )

In [75]:
# ('hours-1-month-real', 'encoded_model_data_development_filtered_hours_1-month_real-data.csv', "HOURSINDEVELOPMENT")
base_fname = f'{data_root}/prediction_data/ticket_model'
prediction_data = pd.read_csv(f"{base_fname}/encoded_model_data_development_filtered_hours_1-month_real-data.csv")

def build_model(data):
    X, y = data.iloc[:, :-1].values, data.iloc[:, -1].values
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    pred_method = xgb.XGBRegressor(
        objective='reg:squarederror',
        colsample_bytree=0.3,
        learning_rate=0.25,
        max_depth=40,
        alpha=50,
        n_estimators=100,
        reg_lambda=30,
    )
    pred_method.fit(X_train, y_train)
    preds = pred_method.predict(X_test)
    mae = mean_absolute_error(y_test, preds)
    tree_explainer = shap.TreeExplainer(pred_method)
    return pred_method, mae, tree_explainer

pm, mae, te = build_model(prediction_data)

model_labels = ["video","update_ami","ui_changes","tag_change","staging_issue","qa","pmm_check","maintainers","devtest","devops_label","config_changes","client","autotested"]
model_components = ["testilda","services: compiler","review","releng","production","precisionMarketing","pnp","other: other","distribution","devops","baking","analytics"]
model_priorities = {"Blocker": 4,"Critical": 3,"Major": 2,"Minor": 1,"Trivial": 0}
model_types = {"Bug": 0,"Bug (Sub-task)": 1,"Epic": 2,"Improvement (Sub-task)": 3,"Internal Improvement": 4,"New Feature or Improvement": 5,"Prototype": 6,"Sub-task": 7}
model_indices = model_labels + model_components + ["ISSUETYPE", "ISSUEPRIORITY", "NUMBEROFCOMPONENTS", "NUMBEROFLABELS", "NUMBEROFLINKEDISSUES"]

Setting feature_perturbation = "tree_path_dependent" because no background data was given.


In [76]:
def map_vals(orig, data):
    mapped = [0] * len(orig)
    for entry in data:
        mapped[orig.index(entry)] = 1
    return mapped


def print_prediction(labels, components, t_priority, t_type, num_linked):
    print(f"Selected labels: {labels}")
    print(f"Selected components: {components}")
    print(f"Selected priority: {model_priorities[t_priority]}")
    print(f"Selected type: {model_types[t_type]}")
    print(f"Number of linked tickets: {num_linked}")
    test_case = map_vals(model_labels, labels) + map_vals(model_components, components) + [
        len(components),
        len(labels),
        model_priorities[t_priority],
        model_types[t_type],
        num_linked
    ]
    test_case = np.asarray(test_case).T
    test_case = test_case.reshape(1, -1)
    prediction = pm.predict(test_case)[0]
    print(f"Predicted development time:\n in hours: {prediction:.3f} ± {mae:.3f} \n in  days: {prediction / 24:.3f} ± {mae / 24:.3f}")
    df = pd.DataFrame(test_case)
    df.columns = model_indices
    tree_shap_values = te.shap_values(df, y=prediction)
    shap.force_plot(te.expected_value, tree_shap_values[0,:], df.iloc[0,:], matplotlib=True)

_ = interact(
    print_prediction,
    labels=widgets.SelectMultiple(options=model_labels, description='Labels',disabled=False),
    components=widgets.SelectMultiple(options=model_components,description='Components',disabled=False),
    t_priority=widgets.Dropdown(options=model_priorities.keys(), description='Priority',disabled=False),
    t_type=widgets.Dropdown(options=model_types.keys(), description='Type',disabled=False),
    num_linked=widgets.BoundedIntText(value=0,min=0, max=100,step=1,description='Number of linked tickets:',disabled=False)
)

interactive(children=(SelectMultiple(description='Labels', options=('video', 'update_ami', 'ui_changes', 'tag_…