In [1]:
import pandas as pd, os

In [2]:
%matplotlib notebook
import seaborn as sns, matplotlib.pyplot as plt, numpy as np
sns.set_style('darkgrid')

In [3]:
# for saving figures
import datetime
today = datetime.date.today().isoformat()
basepath = '../../graphics/codedtruth'

In [4]:
base = '../../data/codedtruth'
os.listdir(base)

['.DS_Store',
 'paper_metadata.csv',
 'papertags_how_withwhom_final.csv',
 'papertags_what_final.csv',
 'truth_metadata.csv',
 'truth_overflow.csv',
 'truth_ratings.csv',
 'v_11_coded_final.csv',
 'v_16_coded_final.csv',
 'v_19_coded_final.csv',
 'v_6_coded_final.csv',
 'v_8345etseq_final.csv',
 'v_8780etseq_final.csv']

In [6]:
dfdict = {x[:-4]:pd.read_csv(f'{base}/{x}') for x in os.listdir(base) 
          if x.endswith('.csv') and 'coded' not in x}
dfdict.keys()

dict_keys(['paper_metadata', 'papertags_how_withwhom_final', 'papertags_what_final', 'truth_metadata', 'truth_overflow', 'truth_ratings', 'v_8345etseq_final', 'v_8780etseq_final'])

In [28]:
def save_plot(filename):
    plt.savefig(f'{basepath}/{today}_{filename}.pdf')

In [30]:
def split_tags(old_df, levs=2):
    df = pd.DataFrame(old_df, copy=True)
    levels = [x.split(':') for x in df.Tag]
    for level in range(levs):
        df[f'level_{level+1}'] = [x[level] if len(x) > level else '' for x in levels]
    return df

### Positive and Negative Ratings

In [51]:
papers = dfdict['paper_metadata']

#### Section 3.3.1 - Positive Ratings

In [43]:
pos = dfdict['v_8345etseq_final']

In [44]:
# number of respondents with positive rating explanations
pos.groupby('lfdn').count().shape[0]

122

In [45]:
# number of paper summaries for which we have positive rating explanations
pos.groupby('PaperID').count().shape[0]

103

In [59]:
split_tags(pos).groupby(['level_1', 'level_2']).count()[['Tag']]

Unnamed: 0_level_0,Unnamed: 1_level_0,Tag
level_1,level_2,Unnamed: 2_level_1
NotAnswered,,1
reason,originality,5
reason,plausibility,57
reason,relevance,75
source,experience,10
source,opinion,12


In [147]:
pos[['PaperID', 'lfdn', 'reasoning']].drop_duplicates().groupby(['PaperID']).filter(lambda x: len(x) > 1)

Unnamed: 0,PaperID,lfdn,reasoning
5,10,32,A learning-by-example method is one of the mos...
6,10,148,"The more we know about a product, the more we ..."
18,24,31,There is a constant need to capture both the a...
19,24,55,"in my opinion, testing ist very important and ..."
22,31,24,Focus and particular attention to critical reqs
23,31,117,This problem is not adressed in industry. Howe...
31,82,66,I'm working in agile development environments....
33,82,81,Because of the conflict between complete requi...
41,98,86,relevant in daily project-life
42,98,126,It's important to discuss and setup requiremen...


In [223]:
pos[pos.PaperID == 18].merge(papers)

Unnamed: 0,PaperID,lfdn,reasoning,Tag,Title,Authors,Venue,Year,NumberOfPages,AcadVsInd,IndTrack,Summary
0,18,92,the title sounds like advanced RE and furthe...,reason:originality,A Case Study on Tool-Supported Multi-level Req...,"Bittner, M.; Reiser, M.-O.; Weber, M.",REFSQ,2010,14,Academic,No,A case study for validating a multi-level appr...


In [156]:
pos[pos.reasoning.str.contains('opinion')]

Unnamed: 0,PaperID,lfdn,reasoning,Tag
19,24,55,"in my opinion, testing ist very important and ...",reason:relevance
20,24,55,"in my opinion, testing ist very important and ...",source:opinion
29,63,111,(Formal) Verification trough models (thus in m...,reason:plausibility
85,235,59,"In my opinion, despite the efforts for alignin...",reason:originality
86,235,59,"In my opinion, despite the efforts for alignin...",source:opinion
105,272,128,In my opinion we need to share and exchange mo...,reason:plausibility
106,272,128,In my opinion we need to share and exchange mo...,source:opinion


#### Section 3.3.2 - Negative Ratings

In [55]:
neg = dfdict['v_8780etseq_final']

In [56]:
# number of respondents with negative rating explanations
neg.groupby('lfdn').count().shape[0]

117

In [57]:
# number of paper summaries for which we have negative rating explanations
neg.groupby('PaperID').count().shape[0]

103

In [58]:
split_tags(neg).groupby(['level_1', 'level_2']).count()[['Tag']]

Unnamed: 0_level_0,Unnamed: 1_level_0,Tag
level_1,level_2,Unnamed: 2_level_1
NotAnswered,,1
reason,notconvincing,37
reason,notefficient,10
reason,notimportant,20
reason,notinteresting,6
reason,notoriginal,4
reason,notrealistic,24
reason,toocomplicated,5
reason,toospecialized,12
reason,toosubjective,1


In [240]:
neg[neg.PaperID == 58].merge(papers)

Unnamed: 0,PaperID,lfdn,reasoning,Tag,Title,Authors,Venue,Year,NumberOfPages,AcadVsInd,IndTrack,Summary
0,58,74,I'm not sure how I would apply this research a...,reason:toocomplicated,On the Effectiveness of Abstraction Identifica...,"Gacitua, R.; Sawyer, P.; Gervasi, V.",RE,2010,10,Academic,No,A method for identifying single- and multi-wor...


In [148]:
neg[['PaperID', 'lfdn', 'reasoning']].drop_duplicates().groupby(['PaperID']).filter(lambda x: len(x) > 1)

Unnamed: 0,PaperID,lfdn,reasoning
29,127,23,"creativity ist nicht der entscheidende Punkt, ..."
30,127,91,i am not clear not this
31,127,120,It does not make sense for me.
34,143,116,To me it is unclear how the self-understanding...
36,143,139,Better to have involving than from paper work
40,169,13,Didn't really understand what was going on here.
41,169,26,It is not clear to me how simple data (whate...
42,169,80,Sounds more like a specialised piece of analyt...
48,197,43,I consider that domain professionals are bette...
49,197,50,This is a very narrow topic (on its own in is ...


#### Section 3.3.3 - Research Wishes

In [129]:
wishes = dfdict['truth_overflow'][['lfdn', 'v_18']]

In [188]:
wishes_with_meta = wishes.merge(dfdict['truth_metadata'])[['lfdn', 'v_18', 
                                                          'v_5_6_integrated', # role
                                                          'v_11_coded', # experience
                                                          'v_19_coded', # sector
                                                          'v_124', # country 
                                                          'v_14', # team size
                                                          'v_15_16_integrated', # system 
                                                         ]]

In [198]:
wishes_filtered = wishes_with_meta[wishes_with_meta.v_18 != 'NotAnswered']
wishes_filtered.shape

(103, 8)

In [199]:
wishes_filtered

Unnamed: 0,lfdn,v_18,v_5_6_integrated,v_11_coded,v_19_coded,v_124,v_14,v_15_16_integrated
2,2,Building of economic models for comparing and ...,Coach,10.0,Multiple Sectors,Germany,Medium (5-10),Consumer Software
4,4,I think that the community is to focused in pr...,Business Analyst,15.0,ICT,Ecuador,Larger (10-49),Hybrid / mix of embedded systems and informati...
5,5,Find better ways for healthcare practitioners/...,Requirements Engineer,10.0,Healthcare,Canada,Medium (5-10),(Business) information systems
6,6,Reducing ambiguity that arises from unexplicat...,Other,25.0,Education,United States,Larger (10-49),Hybrid / mix of embedded systems and informati...
7,7,Study real-life projects and identify issues t...,Requirements Engineer,50.0,Multiple Sectors,Denmark,Medium (5-10),(Business) information systems
8,8,1) Derive Software Requirements from System Re...,Consultant,15.0,Multiple Sectors,Germany,Medium (5-10),Software-intensive embedded systems
9,9,I would say: Please analyze the documentation ...,Requirements Engineer,20.0,Multiple Sectors,Germany,Larger (10-49),Software-intensive embedded systems
10,10,(Semi)-Automated support for standard RE activ...,Requirements Engineer,10.0,Multiple Sectors,Germany,Larger (10-49),Hybrid / mix of embedded systems and informati...
11,11,Develop methods for extracting familiar ideas ...,Requirements Engineer,30.0,Financial Services,Switzerland,Very large (50+),(Business) information systems
12,12,Understanding and staying within the scope of ...,Project Manager,2.0,Robotics,Greece,Larger (10-49),Hybrid / mix of embedded systems and informati...


In [192]:
for row in wishes_filtered.iterrows():
    print(row[1][1:].values,'\n')

['Building of economic models for comparing and prioritizing requirements.   Predicting the business impact of future software capabilities.   Observing user behaviour and connecting it to previous and future requirements.   Visualising requirements and business goals and their interconnections.   Deriving requirements from business goals.   Formally proving the adherence of a system to non-functional requirements. '
 'Coach' 10.0 'Multiple Sectors' 'Germany' 'Medium (5-10)'
 'Consumer Software'] 

['I think that the community is to focused in producing short term results for scientific publications. In general, experiments are fictitious and irrelevant, and conducted in very controlled environments. I think that research shall move towards conducting large experiments in the industry and present the results of the application of methods and notations in more real non-controlled environments. I currently see the RE community as a very closed family in which novel ideas are not welcomed

In [204]:
wishes_filtered[wishes_filtered.v_18.str.contains('[Aa]gile').fillna(False)].v_18.values

array(['How to best apply professional requirement engineering together with agile development methods.',
       'Design processes and their complexity. We are constantly trying to push software engineering into a linear process (even with agile) but it is and will Always will be a circular and reflective process. ',
       'How to set up requirements so that   1) At the most global level everything is described  2) The requirements engineers and the requirements users can track easily what is described up to which detail level   3) The requirements engineers and the requirement users can easily  zoom-in  and  zoom-out  between different detail levels.   In our agile environment that would help us much.',
       '- Human-centric (natural language supported, AI supported) assistance for formal specification and disambiguation of requirements with multifaceted inter-role linkage, that is, supporting that multi-disciplinary stakeholders collaborate on agile, computed processed, formalizat

In [217]:
wishes_filtered[wishes_filtered.v_18.str.contains(
    '[Ss]takeholder|[Uu]ser|[Hh]uman|[Pp]eople').fillna(False)].v_18.values

array(['Building of economic models for comparing and prioritizing requirements.   Predicting the business impact of future software capabilities.   Observing user behaviour and connecting it to previous and future requirements.   Visualising requirements and business goals and their interconnections.   Deriving requirements from business goals.   Formally proving the adherence of a system to non-functional requirements. ',
       '1) Derive Software Requirements from System Requirements. I do not know any proper, research based method which guide people in the industry here!  2) A research based guidance how to treat the different categories of requirements (not the levels!!!). E.g. a process requirement will have not traceability into the technical implementation etc.  3) A research based argumentation where and when natural language requirements are useful and where not',
       'I would say: Please analyze the documentation of our past projects to find subsystems and conditions whe