# Collect Data - Concepts, Triples, Discourse

In [1]:
# Import Dependencies
import pandas as pd
import textwrap # ensures more readable text rendering in ipynb, html, as well as pdf
import time

In [2]:
# Import CSVs
dw_arch = ['searches', 'papers', 'authors', 'concepts', 'triples', 'discourse']
dw = {obj:pd.read_csv(f"{obj}.csv") for obj in dw_arch}
dw.keys()

dict_keys(['searches', 'papers', 'authors', 'concepts', 'triples', 'discourse'])

In [3]:
# Update dataware status
{'date':time.strftime('%Y-%m-%d')}|{e:len(dw[e]) for e in dw_arch}

{'date': '2022-08-31',
 'searches': 1,
 'papers': 5,
 'authors': 9,
 'concepts': 28,
 'triples': 5,
 'discourse': 3}

In [4]:
status = [
    {'date': '2022-08-30', 'searches': 1, 'papers': 5, 'authors': 9, 'concepts': 3, 'triples': 5, 'discourse': 0},
    {'date': '2022-08-31', 'searches': 1, 'papers': 5, 'authors': 9, 'concepts': 28, 'triples': 5, 'discourse': 3},
    ]
pd.DataFrame(status)

Unnamed: 0,date,searches,papers,authors,concepts,triples,discourse
0,2022-08-30,1,5,9,3,5,0
1,2022-08-31,1,5,9,28,5,3


In [5]:
# Check concepts
dw['concepts'].head()

Unnamed: 0,name,definition,source,note
0,systematic literature review (SLR),A review of an existing body of literature tha...,"Kraus, Sascha et al. (2020)|p.1026",note
1,SL-Reviewer,The system (model) for producing systematic li...,This study,note
2,SL-Reviewer's States,Ask questions (SAQ)|Collect data (SCD)|Explore...,This study,note
3,SAQ-|Agree on detailed procedure,WHILE asking questions and WHERE there are sev...,"Oosterwyk, Grant et al (2019)",To ensure shared understanding and agreement.
4,SAQ-|Specify primary goal,"WHILE asking questions, the SL-Reviewer shall ...","Oosterwyk, Grant et al (2019)",To justify the need for a standalone review. T...


In [6]:
def wrap_lines(rec, column, inline=False):
    '''Print the content of the column from the record wrapped.'''

    if rec[column] == column:
        print(column+': no data')
    elif inline:
        for line in textwrap.wrap(column+': '+rec[column], width=140):
            print(line)
    else: 
        print(column+':')
        for line in textwrap.wrap(rec[column], width=140):
            print(line)

    return


def print_concepts(concepts):
    '''Print the records of the concepts' dataframe.'''

    for idx,rec in concepts.iterrows():
        wrap_lines(rec, 'name', inline=True)
        wrap_lines(rec, 'definition', inline=False)
        wrap_lines(rec, 'source', inline=True)
        wrap_lines(rec, 'note', inline=True)
        print()
    
    return

def print_concepts(concepts):
    '''Print the records of the concepts' dataframe.'''

    for idx,rec in concepts.iterrows():
        wrap_lines(rec, 'name', inline=True)
        wrap_lines(rec, 'definition', inline=False)
        wrap_lines(rec, 'source', inline=True)
        wrap_lines(rec, 'note', inline=True)
        print()
    
    return

def print_discourse(df):
    '''Print the records of the discourse dataframe.'''

    for idx,rec in df.iterrows():
        wrap_lines(rec, 'idea', inline=False)
        wrap_lines(rec, 'source', inline=True)
        wrap_lines(rec, 'note', inline=True)
        print()
    
    return


In [7]:
concepts = dw['concepts'].head(4)
print_concepts(concepts)

name: systematic literature review (SLR)
definition:
A review of an existing body of literature that follows a transparent and reproducible methodology in searching, assessing its quality and
synthesising it, with a high level of objectivity.
source: Kraus, Sascha et al. (2020)|p.1026
note: no data

name: SL-Reviewer
definition:
The system (model) for producing systematic literature reviews.
source: This study
note: no data

name: SL-Reviewer's States
definition:
Ask questions (SAQ)|Collect data (SCD)|Explore data (SED)|Synthesise knowledge (SSK)|Write the review (SWR)
source: This study
note: no data

name: SAQ-|Agree on detailed procedure
definition:
WHILE asking questions and WHERE there are several researchers involved, the SL-Reviewer shall agree on detailed procedure.
source: Oosterwyk, Grant et al (2019)
note: To ensure shared understanding and agreement.



In [8]:
# Generate template for state-driven requirement
dw['concepts'].loc[dw['concepts'].name == "SL-Reviewer's States",'definition'].values[0]


'Ask questions (SAQ)|Collect data (SCD)|Explore data (SED)|Synthesise knowledge (SSK)|Write the review (SWR)'

In [9]:
# Print current state of requirements
dw['concepts'].iloc[3]

name                           SAQ-|Agree on detailed procedure
definition    WHILE asking questions and WHERE there are sev...
source                            Oosterwyk, Grant et al (2019)
note              To ensure shared understanding and agreement.
Name: 3, dtype: object

In [10]:
# print_concepts(dw['concepts'].iloc[3:])

In [None]:
def export_concepts_to_md_table(df):
    """Print names and definitions as a MD table. Print sources and notes below the table"""
    src = []
    notes_idx, notes = 0, []

    print('# List of Concepts\n\nname|definition|src|note\n-|-|-|-')

    for idx,row in df.iterrows():
        
        # Register the source and set the index
        if row['source'] not in src:
            src.append(row['source'])
        src_idx = src.index(row['source'])+1

        # Identify note
        is_note = False
        if row['note'] != 'note':
            is_note = True
            notes_idx += 1
            notes.append(row['note'])
        
        # print the row of the main table
        print(f"{row['name'].replace('|',' ')}|{row['definition'].replace('|',' ')}|{str(src_idx)}", end='|')
        if is_note:
            print(str(notes_idx))
        else:
            print()
    
    print('\n\nN|source\n-|-')
    for idx,name in enumerate(src):
        print(f"{str(idx+1)}|{name.replace('|',' ; ')}")


    print('\n\nN|note\n-|-')
    for idx,note in enumerate(notes):
        print(f"{str(idx+1)}|{note.replace('|',' ')}")

    return

export_concepts_to_md_table(dw['concepts'].iloc[0:])


# List of Concepts

name|definition|src|note
-|-|-|-
systematic literature review (SLR)|A review of an existing body of literature that follows a transparent and reproducible methodology in searching, assessing its quality and synthesising it, with a high level of objectivity.|1|
SL-Reviewer|The system (model) for producing systematic literature reviews.|2|
SL-Reviewer's States|Ask questions (SAQ) Collect data (SCD) Explore data (SED) Synthesise knowledge (SSK) Write the review (SWR)|2|
SAQ- Agree on detailed procedure|WHILE asking questions and WHERE there are several researchers involved, the SL-Reviewer shall agree on detailed procedure.|3|1
SAQ- Specify primary goal|WHILE asking questions, the SL-Reviewer shall specify primary goal.|3|2
SAQ- Define key concepts|WHILE asking questions, the SL-Reviewer shall define key concept(s) of relevance.|3|3
SAQ- Establish boundaries|WHILE asking questions, the SL-Reviewer shall establish the scope and boundaries.|3|4
SAQ- Draft the research questions|WHILE asking questions, the SL-Reviewer shall draft the research questions.|3|5
SAQ- Specify type of review|WHILE asking questions, the SL-Reviewer shall specify type of review.|3|
SAQ- Specify disposition|WHILE asking questions, the SL-Reviewer shall specify sequential or iterative disposition.|3|
SAQ- Conduct reviewer training|WHILE asking questions, the SL-Reviewer shall conduct reviewer training in note-taking and reviewing methods.|3|6
SCD- Specify where to search|WHILE collecting data, the SL-Reviewer shall specify where to search.|3|7
SCD- Identify the main sources|WHILE collecting data, the SL-Reviewer shall identify the main peer refereed journal and conference outlets.|3|8
SCD- Specify terms to use|WHILE collecting data, the SL-Reviewer shall specify terms to use (search string).|3|9
SCD- Specify dimensions to search|WHILE collecting data, the SL-Reviewer shall specify dimensions (topic, title, keywords, abstract, results etc.) to search.|3|
SCD- Specify timespan|WHILE collecting data, the SL-Reviewer shall specify timespan.|3|10
SCD- Restrict search|WHILE collecting data, the SL-Reviewer shall restrict search within bounds of research question.|3|
SCD- Minimize publication bias|WHILE collecting data, the SL-Reviewer shall minimize publication bias (grey and unpublished data.|3|11
SCD- Perform backward and forward search|WHILE collecting data, the SL-Reviewer shall perform backward and forward search.|3|12
SCD- Specify inclusion criteria|WHILE collecting data, the SL-Reviewer shall specify inclusion criteria.|3|13
SCD- Specify exclusion criteria|WHILE collecting data, the SL-Reviewer shall specify exclusion criteria.|3|14
SCD- Review title, abstract, keywords|WHILE collecting data, the SL-Reviewer shall review title, abstract, keywords and apply screen.|3|
SCD- Review introduction and conclusion|WHILE collecting data, the SL-Reviewer shall review introduction and conclusion and apply screen.|3|
SCD- Review full papers|WHILE collecting data, the SL-Reviewer shall review full papers for relevance, rigour and credibility.|3|
SED- Select and apply appropriate method|WHILE exploring data, the SL-Reviewer shall select and apply appropriate method.|3|15
SSK- Select and apply appropriate method|WHILE synthesising knowledge, the SL-Reviewer shall select and apply appropriate method.|3|16
SWR- Specify structural elements|WHILE writing the review, the SL-Reviewer shall specify structural elements, such as introduction, definitions, methodology, analysis, synthesis and interpretation, outcome, conclusion.|3|
SWR- Consider presentation|WHILE writing the review, the SL-Reviewer shall consider presentation, such are diagrams, visuals, illustrations and tables.|3|


N|source
-|-
1|Kraus, Sascha et al. (2020) ; p.1026
2|This study
3|Oosterwyk, Grant et al (2019)


N|note
-|-
1|To ensure shared understanding and agreement.
2|To justify the need for a standalone review. To inform selection of the structure and type of review required.
3|IF the review has a strong inductive, theory-building intent, THEN this requirment is less relevant.
4|To inform subsequent stages of search and selection.
5|To inform selection of type of literature review to be conducted. The research question should be a one-to-two sentence statement that explains the aim and outcome (Okoli, 2015), and perhaps review’s audience (e.g., scholars, practitioners, policy makers, etc.). For inductive studies the research question may be more openended (Bandara et al., 2015).
6|To produce a review with a reliable standard.
7|In standalone reviews, sources such as the AIS Electronic Library (AISeL), JSTOR, ABI/Inform, IEEE Xplore and ISI Web of Science are frequently used in IS (Bandara et al., 2015, Okoli 2015).
8|Bandara et al., (2015) and Levy & Ellis (2006) recommend the use of top ranked peer-refereed journals and conference outlets. In IS, these would include the AIS “basket of eight” (MISQ, EJIS, ISJ, JMIS, ISR, JAIS, JIT, JSIS) and AIS top 4 conferences (ICIS, EICS, PACIS, AMCIS) (Schryen, 2015). Additional top IS journals can be identified through bibliometric studies such as that of Lowry et al., (2013). In addition sources such as the Clarivate Analytics journal citation report (https://jcr.incites.thomsonreuters.com) and Scimago (https://www.scimagojr.com) provide details of high impact journals in a domain of study. Conference ranking systems in IS are rare, but some exist, e.g. the Australian Computing Research and Education site (http://www.core.edu.au/conference-portal), and the IFIP TC8 rankings (http://ifiptc8.dsi.uminho.pt/index.php/events/ranking-of-is-conferences). Some studies limit the search to journals only, or even the AIS top 8 journals. Whatever the decision, it must be documented and justified.
9|It is recommended to select keywords from identified papers. Taxonomies suitable for the literature review can be utilised. For example, many of the taxonomies of keywords can be found on the ACM website (http://www.acm.org/about/class/class/2012) provides taxonomies of keywords. These references are only keywords and hence, should be combined to create search strings. Keywords that are linked with logical operators are frequently used in literature databases as search strings (e.g., AND, OR, NOT) (Schryen, 2015).
10|Mapping the literature based on a specific timeframe e.g. looking at past studies going back 10 years should be sufficient (Templier & Paré, 2015; vom Brocke et al., 2015).
11|Publication bias refers to the “problem that significant (and supporting) results are more likely to be published than non-significant (and non-supporting) results” (Templier & Paré, 2015). Topic-specific dissertations and unpublished research reports could be included as well, where the research protocol suggests inclusion of such sources.
12|A backward search refers to the process of identifying articles by searching the reference lists of important papers (Webster & Watson, 2002; vom Brocke et al., 2015). The forward search refers to identifying articles that have cited some important papers. Google Scholar and Web of Science provide the function to forward search (Schryen, 2015; Webster & Watson, 2002).
13|Inclusion criteria refer to the criteria used to select papers, based on, for example the research question and scope of study etc. Should several researchers be working on the literature review paper, a standardized inclusion criterion needs to be discussed and agreed upon (Bandara et al., 2015).
14|To increase validity of any paper, it is important to state the criteria used to exclude research. This increases the credibility of the paper and ensures other authors can reference the published work confidently (Bandara et al., 2015). An example of excluding research is to not use duplicate studies in different sources or forms such as a conference papers subsequently published as journal articles (Bandara et al., 2015; Schryen 2015).
15|The choice of method is determined by its appropriateness to the research question, the type of literature review being conducted, and the corpus of literature.
16|The choice of method is determined by its appropriateness to the research question, the type of literature review being conducted, and the corpus of literature.


In [27]:
def export_discourse_to_md_table(df):
    """Print ideas as a MD table and sources and notes below the table"""
    src = []
    notes_idx, notes = 0, []

    print('# Discourse\n\nidea|src|note\n-|-|-')

    for idx,row in df.iterrows():
        
        # Register the source and set the index
        if row['source'] not in src:
            src.append(row['source'])
        src_idx = src.index(row['source'])+1

        # Identify note
        is_note = False
        if row['note'] != 'note':
            is_note = True
            notes_idx += 1
            notes.append(row['note'])
        
        # print the row of the main table
        print(f"{row['idea'].replace('|',' ')}|{str(src_idx)}", end='|')
        if is_note:
            print(str(notes_idx))
        else:
            print()
    
    print('\n\nN|source\n-|-')
    for idx,name in enumerate(src):
        print(f"{str(idx+1)}|{name.replace('|',' ; ')}")


    print('\n\nN|note\n-|-')
    for idx,note in enumerate(notes):
        print(f"{str(idx+1)}|{note.replace('|',' ')}")

    return

In [None]:
export_discourse_to_md_table(dw['discourse'])

# Discourse

idea|src|note
-|-|-
The protocol is subject to change, however protocol changes must be documented, to affirm that the research work is comprehensive, clear, reproducible and is of high quality.|1|1
Restrict search within bounds of research question - Templier & Paré (2015) and Okoli (2015) agree that the research question must be the focus of the investigation and that the search strategy must be aligned to the research question(s). On the other hand, Bandara et al., (2015) argue that the research questions should be allowed to emerge as data (literature) is collected and analysed. The later argument holds where a strongly inductive approach is being followed, often within an iterative hermeneutic stance (Boell & Cecez-Kecmanovic, 2014).|2|
Select and apply appropriate method - A variety of methods can be used to analyse, synthesise and interpret literature, including soft systems methodology for heterogeneous literature (Sylvester et al., 2013), grounded theory techniques for theory-building literature reviews (Wolfswinkel et al., 2013), critical discourse analysis (Wall et al., 2015), meta-analysis techniques (Templier & Paré, 2015; vom Brocke et al., 2015) and so on. Geeling et al., (2017) show that thematic analysis could be integrated with grounded theory techniques, while mixed studies combine qualitative and quantitative techniques (Bandara et al., 2015). A hermeneutic review emphasizes the importance of integrating literature searches with the analysis and interpretation of text (Boell and Cecez-Kecmanovic, 2014).|2|


N|source
-|-
1|Okoli (2015) ; Oosterwyk, Grant et al (2019)
2|Oosterwyk, Grant et al (2019)


N|note
-|-
1|FixMe Update reference to Okoli