# Exploratory Data Analysis

## Loading Dataset

In [57]:
import plotly.figure_factory as ff
import plotly.express as px
import pandas as pd
import plotly.graph_objects as go
import numpy as np


df = pd.read_csv("../../data/transformed/pipeline_data.csv")

In [50]:
df = df.dropna(subset=['resolutiondate']).reset_index()
df = df.loc[df['hours_needed'] != 0] 

# Basic Graphs

In [51]:


start = df[['days_needed', 'status']].groupby('days_needed').count()
fig = px.histogram(start, x="status", marginal="rug",
                   hover_data=start.columns)
fig.show()

In [10]:
df['created_date'] = pd.to_datetime(df['created_date'])
df['created_date'] = [time.strftime('%Y-%m-%d') for time in df['created_date']]

## Number of cases created

In [11]:
created = df[['created_date', 'status']].groupby('created_date').count().reset_index()
created = created.set_index(['created_date'])

fig = px.line(created, x=created.index, y='status')
fig.update_xaxes(rangeslider_visible=True)
fig.show()

## Days needed to solve problem

In [12]:
created = df[['created_date', 'days_needed']].groupby('created_date').mean().reset_index()
created = created.set_index(['created_date'])

fig = px.line(created, x=created.index, y='days_needed')
fig.update_xaxes(rangeslider_visible=True)
fig.show()

## Steps taken

In [13]:
test = df[['steps_taken', 'status']].groupby('steps_taken').count()
fig = px.bar(test, y = test.index, x = test["status"], orientation='h')
fig.show()

In [32]:


a = df.loc[df['steps_taken'] == 'Non-existent-Open-Resolved']
b = df.loc[df['steps_taken'] == 'Non-existent-Open-Patch Available-Resolved']


x0 = a['days_needed'].to_numpy()
x1 = b['days_needed'].to_numpy()


fig = go.Figure()
# Use x instead of y argument for horizontal plot
fig.add_trace(go.Box(x=x0, name="Non-existent-Open-Resolved"))
fig.add_trace(go.Box(x=x1, name = "Non-existent-Open-Patch Available-Resolved"))

fig.show()

## Time needed

### per resolution

In [33]:
test = df[['days_needed', 'resolution']]
fig = px.box(test, x='days_needed', y="resolution", orientation='h')
fig.show()

In [34]:
test = df[['days_needed', 'issue_type']]
fig = px.box(test, x='days_needed', y="issue_type", orientation='h')
fig.show()

In [35]:
test = df[['days_needed', 'priority']]
fig = px.box(test, x='days_needed', y="priority", orientation='h')
fig.show()