The goal of this notebook is to select the branches that are interesting to be looked at separately

In [5]:
%load_ext autoreload
%autoreload 3

import modules.data_preparation as data_prep
import modules.constants as const
import pandas as pd

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [6]:
masterdata = pd.read_csv("masterdata.csv", index_col=0)

In [7]:
rentalcalc_branches = masterdata[masterdata[const.OBJECTIVE].str.contains("rentalcalc")]

In [8]:
rentalcalc_branches

Unnamed: 0,Fitness function,Objective,Success,Autocorrelation for k = 1,Information content,Neutrality distance,Neutrality volume
846,Branch distance,protect.rentalcalc0,0.35,0.292442,0.708395,2.60,295.80
847,Branch distance,protect.rentalcalc1,0.30,0.319453,0.658174,29.10,260.15
848,Branch distance,protect.rentalcalc10,0.40,0.318886,0.678781,21.05,278.10
849,Branch distance,protect.rentalcalc100,0.50,0.393341,0.693270,2.85,276.45
850,Branch distance,protect.rentalcalc101,0.45,0.378385,0.689344,9.05,277.55
...,...,...,...,...,...,...,...
2595,Code-based fitness,protect.rentalcalc95,1.00,,0.000000,751.00,1.00
2596,Code-based fitness,protect.rentalcalc96,1.00,0.177757,0.145236,42.70,30.60
2597,Code-based fitness,protect.rentalcalc97,1.00,,0.000000,751.00,1.00
2598,Code-based fitness,protect.rentalcalc98,1.00,0.045287,0.073826,108.75,15.15


In [9]:
objectives = []
lines = []
with open("rentalcalc/branches.txt") as f:
    i = 0
    for line in f:
        objectives.append("protect.rentalcalc" + str(i))
        lines.append(line.strip())
        i += 1

branches_names = pd.DataFrame()
branches_names[const.OBJECTIVE] = objectives
branches_names["Name"] = lines

branches_names

Unnamed: 0,Objective,Name
0,protect.rentalcalc0,Lprotect/rentalcalc/PropertiesListActivity;->o...
1,protect.rentalcalc1,Lprotect/rentalcalc/PropertyWorksheetActivity;...
2,protect.rentalcalc2,Lprotect/rentalcalc/PropertyViewActivity;->onC...
3,protect.rentalcalc3,Lprotect/rentalcalc/CalcUtil;->calculateForYea...
4,protect.rentalcalc4,Lprotect/rentalcalc/PropertyWorksheetActivity;...
...,...,...
449,protect.rentalcalc449,Lprotect/rentalcalc/CalcUtil;->calculateForYea...
450,protect.rentalcalc450,Lprotect/rentalcalc/PropertySummaryActivity;->...
451,protect.rentalcalc451,Lprotect/rentalcalc/PropertySummaryActivity;->...
452,protect.rentalcalc452,Lprotect/rentalcalc/PropertyProjectionsActivit...


In [10]:
rentalcalc_branches_with_names = pd.merge(rentalcalc_branches, branches_names, on=const.OBJECTIVE)

In [11]:
successful = rentalcalc_branches_with_names[rentalcalc_branches_with_names[const.SUCCESS] == 1.0]
unsuccessful = rentalcalc_branches_with_names[rentalcalc_branches_with_names[const.SUCCESS] == 0.0]

In [12]:
successful

Unnamed: 0,Fitness function,Objective,Success,Autocorrelation for k = 1,Information content,Neutrality distance,Neutrality volume,Name
3,Code-based fitness,protect.rentalcalc1,1.0,0.208740,0.194051,35.90,45.90,Lprotect/rentalcalc/PropertyWorksheetActivity;...
5,Code-based fitness,protect.rentalcalc10,1.0,0.126690,0.818205,2.95,371.85,Lprotect/rentalcalc/PropertyViewActivity;->onC...
13,Code-based fitness,protect.rentalcalc103,1.0,0.186247,0.195530,35.90,45.50,Lprotect/rentalcalc/PropertyPicturesActivity;-...
23,Code-based fitness,protect.rentalcalc108,1.0,0.177757,0.145236,42.70,30.60,Lprotect/rentalcalc/PictureViewActivity;->onCr...
31,Code-based fitness,protect.rentalcalc111,1.0,0.243653,0.731271,3.10,307.30,Lprotect/rentalcalc/PropertyCursorAdapter;->bi...
...,...,...,...,...,...,...,...,...
897,Code-based fitness,protect.rentalcalc94,1.0,0.177757,0.145236,42.70,30.60,Lprotect/rentalcalc/PropertyWorksheetActivity;...
899,Code-based fitness,protect.rentalcalc95,1.0,,0.000000,751.00,1.00,Lprotect/rentalcalc/PropertySummaryActivity;->...
901,Code-based fitness,protect.rentalcalc96,1.0,0.177757,0.145236,42.70,30.60,Lprotect/rentalcalc/PropertyPicturesActivity;-...
903,Code-based fitness,protect.rentalcalc97,1.0,,0.000000,751.00,1.00,Lprotect/rentalcalc/PropertiesListActivity;->d...


In [13]:
unsuccessful

Unnamed: 0,Fitness function,Objective,Success,Autocorrelation for k = 1,Information content,Neutrality distance,Neutrality volume,Name
1,Code-based fitness,protect.rentalcalc0,0.0,,0.000000,751.00,1.00,Lprotect/rentalcalc/PropertiesListActivity;->o...
7,Code-based fitness,protect.rentalcalc100,0.0,0.253891,0.301621,18.90,76.40,Lprotect/rentalcalc/PropertyNotesActivity;->on...
9,Code-based fitness,protect.rentalcalc101,0.0,0.203092,0.172313,42.45,37.40,Lprotect/rentalcalc/PropertyOverviewActivity;-...
11,Code-based fitness,protect.rentalcalc102,0.0,,0.000000,751.00,1.00,Lprotect/rentalcalc/PropertyWorksheetActivity;...
15,Code-based fitness,protect.rentalcalc104,0.0,0.203092,0.172313,42.45,37.40,Lprotect/rentalcalc/PropertyNotesActivity;->on...
...,...,...,...,...,...,...,...,...
883,Code-based fitness,protect.rentalcalc88,0.0,0.251890,0.730051,3.55,291.05,Lprotect/rentalcalc/PropertyViewActivity;->doS...
889,Code-based fitness,protect.rentalcalc90,0.0,,0.000000,751.00,1.00,Lprotect/rentalcalc/PropertyWorksheetActivity;...
891,Code-based fitness,protect.rentalcalc91,0.0,0.203092,0.172313,42.45,37.40,Lprotect/rentalcalc/Property;->toProperty(Land...
893,Code-based fitness,protect.rentalcalc92,0.0,0.203092,0.172313,42.45,37.40,Lprotect/rentalcalc/PropertyPicturesActivity;-...


In [15]:
unsuccessful_not_flat = unsuccessful[unsuccessful[const.NEUTRALITY_DISTANCE] != 751]

In [16]:
unsuccessful_not_flat

Unnamed: 0,Fitness function,Objective,Success,Autocorrelation for k = 1,Information content,Neutrality distance,Neutrality volume,Name
7,Code-based fitness,protect.rentalcalc100,0.0,0.253891,0.301621,18.90,76.40,Lprotect/rentalcalc/PropertyNotesActivity;->on...
9,Code-based fitness,protect.rentalcalc101,0.0,0.203092,0.172313,42.45,37.40,Lprotect/rentalcalc/PropertyOverviewActivity;-...
15,Code-based fitness,protect.rentalcalc104,0.0,0.203092,0.172313,42.45,37.40,Lprotect/rentalcalc/PropertyNotesActivity;->on...
17,Code-based fitness,protect.rentalcalc105,0.0,0.232669,0.731740,3.10,306.60,Lprotect/rentalcalc/Property;->toBlankIfNull(L...
21,Code-based fitness,protect.rentalcalc107,0.0,0.203092,0.172313,42.45,37.40,Lprotect/rentalcalc/PropertyViewActivity;->onO...
...,...,...,...,...,...,...,...,...
879,Code-based fitness,protect.rentalcalc86,0.0,0.203092,0.172313,42.45,37.40,Lprotect/rentalcalc/PropertyPicturesActivity;-...
883,Code-based fitness,protect.rentalcalc88,0.0,0.251890,0.730051,3.55,291.05,Lprotect/rentalcalc/PropertyViewActivity;->doS...
891,Code-based fitness,protect.rentalcalc91,0.0,0.203092,0.172313,42.45,37.40,Lprotect/rentalcalc/Property;->toProperty(Land...
893,Code-based fitness,protect.rentalcalc92,0.0,0.203092,0.172313,42.45,37.40,Lprotect/rentalcalc/PropertyPicturesActivity;-...


In [17]:
successful_not_flat = successful[successful[const.NEUTRALITY_DISTANCE] != 751]

In [18]:
successful_not_flat

Unnamed: 0,Fitness function,Objective,Success,Autocorrelation for k = 1,Information content,Neutrality distance,Neutrality volume,Name
3,Code-based fitness,protect.rentalcalc1,1.0,0.208740,0.194051,35.90,45.90,Lprotect/rentalcalc/PropertyWorksheetActivity;...
5,Code-based fitness,protect.rentalcalc10,1.0,0.126690,0.818205,2.95,371.85,Lprotect/rentalcalc/PropertyViewActivity;->onC...
13,Code-based fitness,protect.rentalcalc103,1.0,0.186247,0.195530,35.90,45.50,Lprotect/rentalcalc/PropertyPicturesActivity;-...
23,Code-based fitness,protect.rentalcalc108,1.0,0.177757,0.145236,42.70,30.60,Lprotect/rentalcalc/PictureViewActivity;->onCr...
31,Code-based fitness,protect.rentalcalc111,1.0,0.243653,0.731271,3.10,307.30,Lprotect/rentalcalc/PropertyCursorAdapter;->bi...
...,...,...,...,...,...,...,...,...
885,Code-based fitness,protect.rentalcalc89,1.0,0.235826,0.143605,55.20,29.40,Lprotect/rentalcalc/CalcUtil;->calculateForYea...
887,Code-based fitness,protect.rentalcalc9,1.0,0.203092,0.172313,42.45,37.40,Lprotect/rentalcalc/PropertyWorksheetActivity;...
897,Code-based fitness,protect.rentalcalc94,1.0,0.177757,0.145236,42.70,30.60,Lprotect/rentalcalc/PropertyWorksheetActivity;...
901,Code-based fitness,protect.rentalcalc96,1.0,0.177757,0.145236,42.70,30.60,Lprotect/rentalcalc/PropertyPicturesActivity;-...


In [19]:
sampled_branches = pd.concat([unsuccessful_not_flat.sample(), successful_not_flat.sample()])

The following line hardcodes the sampled branches to the one used in the thesis. Remove this cell if you want to have new samples branches.

In [26]:
sampled_branches = rentalcalc_branches_with_names[((rentalcalc_branches_with_names[const.OBJECTIVE] == "protect.rentalcalc202") ^ (rentalcalc_branches_with_names[const.OBJECTIVE] == "protect.rentalcalc59")) & (rentalcalc_branches_with_names[const.FITNESS_FUNCTION] == const.CODE_BASED)]

In [27]:
sampled_branches

Unnamed: 0,Fitness function,Objective,Success,Autocorrelation for k = 1,Information content,Neutrality distance,Neutrality volume,Name
233,Code-based fitness,protect.rentalcalc202,0.0,0.253891,0.301621,18.9,76.4,Lprotect/rentalcalc/PropertyPicturesActivity;-...
819,Code-based fitness,protect.rentalcalc59,1.0,0.175427,0.143192,42.7,31.4,Lprotect/rentalcalc/PropertyNotesActivity;->on...


In [28]:
sampled_branches.to_csv("rentalcalc/samples_for_case_study")