# SpecMetrics - Processing from AWS

In [7]:
import json, os, sys
from tqdm import tqdm
import pandas as pd

ROOT_DIR = os.environ['HOME'] + '/Dev/spec_metrics_dashboard'
sys.path.append(ROOT_DIR + '/src/lib')

import connector_s3, process

In [2]:
run_keys = process.fetch_run_keys(connector_s3)
runs_df = process.build_runs_df(run_keys)
runs_df.index = runs_df.date

branch_names = process.branch_names(runs_df=runs_df)
print(len(branch_names), "branches")

4it [00:03,  1.05s/it]

241 branches





## Identifying recurring failing examples solved by rerun

Count of unique failed examples: 
- 56 over last 50 runs
- 75 over last 100 runs
- 147 over last 200 runs

In [34]:
all_failed_examples_df = pd.DataFrame()
develop_run_keys = list(runs_df[runs_df.branch == "develop"].run_key)
for key in tqdm(list(reversed(develop_run_keys))[0:200]):
    data = process.fetch_run_data(connector_s3, key)
    examples_df = process.build_run_examples_df(data)
    if len(examples_df) == 0:
        # we may have runs with no examples, in this case the df is empty
        continue
    failed_examples_df = examples_df[examples_df.status == "failed"]
    all_failed_examples_df = all_failed_examples_df.append(failed_examples_df, ignore_index=True)

print("Count of found unique failed examples over N last develop runs:", len(all_failed_examples_df.drop_duplicates("description"))

100%|██████████| 200/200 [01:40<00:00,  4.67it/s]

Count of found unique failed examples over N last develop runs: 147





In [56]:
most_failed_example_description = all_failed_examples_df.groupby("description") \
    .count().sort_values(by="dir_0", ascending=False).index[0]
most_failed_example_description

'StudentMailer#job_offers_recap As an intranet student job_offers_recap when student profile has a french locale sends all the offers'

Trying to find the examples that were run before the most failed example ran. Must isolate them in the case the most failed example failed. We may also isolate the tests that run before when it didn't fail, to have white and blacklists.

In [84]:
all_before_most_failed_examples_df = pd.DataFrame()
develop_run_keys = list(runs_df[runs_df.branch == "develop"].run_key)

for key in list(reversed(develop_run_keys))[0:10]:
    data = process.fetch_run_data(connector_s3, key)
    examples_df = process.build_run_examples_df(data)

    if len(examples_df) == 0:
        # we may have runs with no examples, in this case the df is empty
        continue
    
    most_failed_example = examples_df[examples_df.description == most_failed_example_description]
    
    if len(most_failed_example) == 0:
        # most failed example not present in this run
        continue
        
    if most_failed_example.status.iloc[0] == "passed":
        # the most failed example did not fail in this run, ignoring it
        continue

    examples_df.index = examples_df.finished_at
    most_failed_example_finished_at = most_failed_example.finished_at.iloc[0]
    before_most_failed_examples_df = examples_df[examples_df.finished_at < most_failed_example_finished_at]
    all_before_most_failed_examples_df = all_before_most_failed_examples_df.append(
        before_most_failed_examples_df, ignore_index=True)

In [91]:
all_before_most_failed_examples_df.groupby("description").count().sort_values(by="dir_0", ascending=False)

Unnamed: 0_level_0,dir_0,dir_1,dir_2,dir_3,file_name,finished_at,line_number,queries_count,queries_duration,requests_count,requests_duration,run_time,status
description,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Event should equal true,18,18,0,0,18,18,18,18,18,18,18,18,18
Authorizations::CvthequeAcl access_allowed? Jobteaser cvtheque as any other role should not be accessible,15,15,0,0,15,15,15,15,15,15,15,15,15
Users::OmniauthCallbacksController should not work for ENSIACET with an ENSEEIHT student,6,6,0,0,6,6,6,6,6,6,6,6,6
BackendLayoutPresenter when user is an admin should eq nil,6,0,0,0,6,6,6,6,6,6,6,6,6
Wallet#credits_balance should be qual to the credits sum,6,0,0,0,6,6,6,6,6,6,6,6,6
Resume doyoubuzz showcase specific methods behaves like Resume should validate a student is linked,6,0,0,0,6,6,6,6,6,6,6,6,6
Resume file upload resume behaves like Resume should validate a student is linked,6,0,0,0,6,6,6,6,6,6,6,6,6
Authorizations::CompanyAuthorizations#authorized_to_manage_company_positions? user with role company_recruiter for company with {:full_profile?=>false} should == false,6,6,0,0,6,6,6,6,6,6,6,6,6
Kpi::DailyReport#compute_values_for_class when the absolute value are increasing should return the correct absolute value,6,6,0,0,6,6,6,6,6,6,6,6,6
Backend::CompanyPolicy#administrate_payments? when the user is jt_admin should be truthy,4,0,0,0,4,4,4,4,4,4,4,4,4


## Intersection of examples present before a given failing example

## Highcharts

In [None]:
%run setup_highcharts.py
load_highcharts()

In [None]:
load_highcharts_modules()

In [None]:
branch_runs_df = runs_df[runs_df.branch == "develop"]
run_key = branch_runs_df.to_dict(orient="rows")[-7]["run_key"]
run_data = process.fetch_run_datas(connector_s3, [run_key])[0]
run_examples_df = process.build_run_examples_df(run_data)

In [None]:
import highcharts
display(HTML(highcharts.pie_drilldown(
    run_examples_df[["dir_0", "dir_1", "run_time"]] \
        .groupby(['dir_0', 'dir_1']) \
        .sum()[['run_time']],
    serie_name='Run time',
    title="Run time"
)))