In [1]:
import pandas as pd
import json
import SPARQLWrapper
from SPARQLWrapper import SPARQLWrapper, JSON
import ast
from tqdm import tqdm


# import docx
from docx import Document
from tabulate import tabulate
from IPython.display import display, HTML
import matplotlib.backends.backend_pdf
import matplotlib.pyplot as plt

In [None]:
pd.set_option('max_rows', 600)
pd.set_option('max_colwidth', 300)

In [2]:
%store -r rankings_H
# %store -r triples_arr

rankings = rankings_H
# triples = triples_arr
rankings

Unnamed: 0,statement,rank,score,probas_pos_neg
0,https://data.cooperationdatabank.org/vocab/pro...,1,7.381446,0.981578
1,https://data.cooperationdatabank.org/vocab/pro...,2,7.320162,0.980730
2,https://data.cooperationdatabank.org/vocab/pro...,2,7.317283,0.980689
3,https://data.cooperationdatabank.org/vocab/pro...,1,7.157279,0.978288
4,https://data.cooperationdatabank.org/vocab/pro...,1,7.092446,0.977233
...,...,...,...,...
555,https://data.cooperationdatabank.org/vocab/pro...,771,0.376581,0.220347
556,https://data.cooperationdatabank.org/vocab/pro...,1596,0.235451,0.202749
557,https://data.cooperationdatabank.org/vocab/pro...,1162,0.230638,0.202168
558,https://data.cooperationdatabank.org/vocab/pro...,3816,0.099160,0.186769


In [5]:
url = 'https://api.cooperationdatabank.org/datasets/coda-dev/databank/services/databank/sparql'

def get_sparql_dataframe(service, query):
    sparql = SPARQLWrapper(service)
    out = []
   
    sparql.setQuery(query)

    sparql.setReturnFormat(JSON)
    result = sparql.query()
    processed_results = json.load(result.response)
    cols = processed_results['head']['vars']

    for row in processed_results['results']['bindings']:
        item = []
        for c in cols:
            item.append(row.get(c, {}).get('value'))
        out.append(item)
        
    return pd.DataFrame(out, columns=cols)

In [3]:
observations = pd.read_csv('data/observations_with_intervals_final.csv', index_col = 0)
observations = observations.reset_index(drop=True)

In [None]:
data_table = pd.DataFrame(columns = ['obs'])
data_table = observations[['obs', 't1', 't2', 'independentProperties']]
data_table

In [None]:
def construct_category_data_table(observations): 
    loop = tqdm(total = len(observations), position=0, leave=False)

    categories = pd.DataFrame()
    variables = pd.Series()
    for index, row in observations.iterrows(): 
        obs = row['obs']
        t1 = row['t1']
        t2 = row['t2']
        inds = row['independentProperties']
        print(inds)
        if len(inds) < 3: 
            
            for variable in inds:
                variables = variables.append(pd.Series(variable))
                prefix = """ PREFIX owl: <http://www.w3.org/2002/07/owl#>
                        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
                        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
                        PREFIX class: <https://data.cooperationdatabank.org/vocab/class/>
                        PREFIX property: <https://data.cooperationdatabank.org/vocab/prop/>
                        PREFIX id: <https://data.cooperationdatabank.org/id/> 
                        PREFIX dct: <http://purl.org/dc/terms/>
                        """

                query = prefix + """
                             SELECT * WHERE {
                             """ + '<' + obs + '>' + """ property:treatment ?treatment . 
                             ?treatment """ + '<' + variable + '>' + """ ?categoryOrValue. 
                             """ + '<' + variable + '>' + """  rdfs:range ?range . 

                            }"""
                
                dfje = get_sparql_dataframe(url, query)
                dfje['IV'] = variable
                categories = categories.append(dfje).reset_index(drop=True)
            
        loop.set_description("Loading...".format(index))
        loop.update(1)
    
    loop.close()
    return categories, variables

categories, variables = construct_category_data_table(data_table)
categories

In [None]:
pd.set_option('max_colwidth', 300)
categories

In [None]:
categories.to_csv('data/categories.csv')

In [None]:
variables.nunique()

In [None]:
counts = categories.groupby(['IV', 'categoryOrValue']).size().reset_index(name='Count')
pd.set_option('max_rows', 30)
counts = counts.sort_values('IV')
counts

In [None]:
%store -r categories
%store -r counts

In [None]:
counts = pd.read_csv('data/counts.csv', index_col = 0)
categories = pd.read_csv('data/categories.csv', index_col = 0)

In [None]:
categories

In [6]:
def create_category_output(catT1, catT2, var):
    categories = pd.Series([catT1, catT2])
    categories_placeholder = pd.Series()
    descriptions = pd.Series()
    range_type = query_range(var)
    treatment = pd.Series(['T1', 'T2'])
    
    if range_type != 'http://www.w3.org/2001/XMLSchema#string':
        for index, category in categories.items(): 
            try: 
                description = query_description(category)
            except: 
                continue
            if description != None: 
                slash = category.rfind('/')
                category_str = category[slash+1:]
                category_ser = pd.Series([category_str])
                category_ser.index = [index]
                categories_placeholder = categories_placeholder.append(category_ser)

                description = pd.Series([description])
                description.index = [index]
                descriptions = descriptions.append(description)

            else: 
                description = pd.Series([description])
                descriptions = descriptions.append(description)

    if descriptions.isna().any() == False and lenzi(descriptions) == False: 
        categories = categories_placeholder
        output = pd.DataFrame({'treatment' : treatment,
                               'IV value': categories, 
                               'description': descriptions}).reset_index(drop=True)
    else: 
        output = pd.DataFrame({'treatment' : treatment,
                               'IV value': categories}).reset_index(drop=True)
    return output

def disassemble_statement(statement): 
    result = statement.find(' ')
    item2_3 = statement[result+1:]
    result2 = item2_3.find(' ')

    item1 = statement[:result]
    item1a = item1[:item1.find('_H')]
    item2 = item2_3[:result2].strip()
    item3 = item2_3[result2:].strip()
    item2 = item2.replace('https://data.cooperationdatabank.org/vocab/prop/', '')
    return item1, item1a, item2, item3


def query_label(iri): 
    query = """
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        SELECT * 
        WHERE {
         """ + '<' + iri + '>' + """ rdfs:label ?label . 
        }"""
    dfje = get_sparql_dataframe(url, query)
    try: 
        label = dfje['label'][0]
        return label
    except: 
        print("Except: No label found?")

def lenzi(df):
    return len(df.index) == 0

def query_description(entity): 
    prefix = """ PREFIX owl: <http://www.w3.org/2002/07/owl#>
        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX class: <https://data.cooperationdatabank.org/vocab/class/>
        PREFIX property: <https://data.cooperationdatabank.org/vocab/prop/>
        PREFIX id: <https://data.cooperationdatabank.org/id/> 
        PREFIX dct: <http://purl.org/dc/terms/>
"""
    query = prefix + """
            SELECT * 
            WHERE {
             """ + '<' + entity + '>' + """ dct:description ?description . 
            }"""
    
    dfje = get_sparql_dataframe(url, query)
   
    if lenzi(dfje) == False: 
        description = dfje['description']
        return description.item()



def effect_string_maker(effect):
    if effect == 'hasPositiveEffectOn': 
        effect_string = 'has positive effect on'
        return effect_string
    elif effect == 'hasNegativeEffectOn': 
        effect_string = 'has negative effect on'
        return effect_string

def query_range(IV): 
    query = """
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        SELECT * 
        WHERE {
         """ + '<' + IV + '>' + """ rdfs:range ?range . 
        }"""
    dfje = get_sparql_dataframe(url, query)

    if dfje.empty == False:
        IVrange = dfje['range'][0]
        return IVrange

    

    
def control_function( rankings, obs):        
    for index, row in rankings.iterrows(): 
        IV_H, IV, effect, DV = disassemble_statement(row['statement'])
        effect_string = effect_string_maker(effect)
        
        dois = obs[obs['IV_new'].str.contains(str(IV_H))]['doi'].unique().tolist()
        catT1 = obs[obs['IV_new'].str.contains(str(IV_H))]['categoryT1'].unique().tolist()[0]
        catT2 = obs[obs['IV_new'].str.contains(str(IV_H))]['categoryT2'].unique().tolist()[0]
        
        output = create_category_output(catT1, catT2, IV)
        print('\n\nHypothesis ' + str(index+1) + ':', query_label(IV), effect_string, query_label(DV).lower(),
              '\n\nDependent Variable (DV): ', DV, 
              '\n\nIndependent Variable (IV): ', IV, IV_H, 
              '\nDescription IV: ', query_description(IV), 
              '\n\nThe IV categories that were researched and their descriptions are shown below: ')

        with pd.option_context('display.width', 10):  # more options can be specified also
            display(output.style.hide_index())
            print(output.to_latex())
    
        print('\nThe paper(s) that researched this topic is/are the following: ')
        for doi in dois: 
            if doi == 'nan':
                print('Papers are not available in the data') 
            else: 
                print(doi)

# Most likely examples 
print('POSSIBLE NEW HYPOTHESES WITH HIGHEST PROBABILITIES')
control_function(rankings[:10], observations)
# least likely examples 
print('\n\nHYPOTHESES WITH VERY LOW PROBABILITIES')
control_function(rankings[len(rankings)-10:], observations)

POSSIBLE NEW HYPOTHESES WITH HIGHEST PROBABILITIES


  This is separate from the ipykernel package so we can avoid doing imports until
  after removing the cwd from sys.path.




Hypothesis 1: iterated strategy has positive effect on cooperation 

Dependent Variable (DV):  https://data.cooperationdatabank.org/id/dependentvariable/cooperation 

Independent Variable (IV):  https://data.cooperationdatabank.org/vocab/prop/iteratedStrategy https://data.cooperationdatabank.org/vocab/prop/iteratedStrategy_H6 
Description IV:  The specific strategy played by the participant's partner in an iterated game. 

The IV categories that were researched and their descriptions are shown below: 


treatment,IV value,description
T1,preprogrammed_cooperation_rate,The partner cooperates randomly with a given cooperation rate across the duration of the game
T2,other,Other strategies


\begin{tabular}{llll}
\toprule
{} & treatment &                        IV value &                                        description \\
\midrule
0 &        T1 &  preprogrammed\_cooperation\_rate &  The partner cooperates randomly with a given c... \\
1 &        T2 &                           other &                                   Other strategies \\
\bottomrule
\end{tabular}


The paper(s) that researched this topic is/are the following: 
http://dx.doi.org/10.1080/00224545.1970.9922453
http://dx.doi.org/10.1177/0022002783027003007
http://dx.doi.org/10.1177/0022002794038004006
http://dx.doi.org/10.1177/002200276901300108


  This is separate from the ipykernel package so we can avoid doing imports until
  after removing the cwd from sys.path.




Hypothesis 2: iterated strategy has positive effect on cooperation 

Dependent Variable (DV):  https://data.cooperationdatabank.org/id/dependentvariable/cooperation 

Independent Variable (IV):  https://data.cooperationdatabank.org/vocab/prop/iteratedStrategy https://data.cooperationdatabank.org/vocab/prop/iteratedStrategy_H9 
Description IV:  The specific strategy played by the participant's partner in an iterated game. 

The IV categories that were researched and their descriptions are shown below: 


treatment,IV value,description
T1,predominantly_cooperative,"Partner cooperates over most trials, but the study doesn't specify the exact rate"
T2,other,Other strategies


\begin{tabular}{llll}
\toprule
{} & treatment &                   IV value &                                        description \\
\midrule
0 &        T1 &  predominantly\_cooperative &  Partner cooperates over most trials, but the s... \\
1 &        T2 &                      other &                                   Other strategies \\
\bottomrule
\end{tabular}


The paper(s) that researched this topic is/are the following: 
http://dx.doi.org/10.3758/bf03328771


Hypothesis 3: ethnicity (us) has positive effect on cooperation 

Dependent Variable (DV):  https://data.cooperationdatabank.org/id/dependentvariable/cooperation 

Independent Variable (IV):  https://data.cooperationdatabank.org/vocab/prop/ethnicityUS https://data.cooperationdatabank.org/vocab/prop/ethnicityUS_H1 
Description IV:  Participant's ethnic group (values adopted from US Census). 

The IV categories that were researched and their descriptions are shown below: 


treatment,IV value,description
T1,white,"Europe, Middle East, North Africa"
T2,black_or_african_american,Africa


\begin{tabular}{llll}
\toprule
{} & treatment &                   IV value &                        description \\
\midrule
0 &        T1 &                      white &  Europe, Middle East, North Africa \\
1 &        T2 &  black\_or\_african\_american &                             Africa \\
\bottomrule
\end{tabular}


The paper(s) that researched this topic is/are the following: 
http://dx.doi.org/10.1016/j.jebo.2015.02.018
http://dx.doi.org/10.1080/00224545.1970.9922453
http://dx.doi.org/10.2466/pr0.1966.18.3.818
http://dx.doi.org/10.1162/rest.2009.10174
http://dx.doi.org/NA
http://dx.doi.org/10.1080/00224545.1988.9713767


Hypothesis 4: punishment treatment has negative effect on contributions 

Dependent Variable (DV):  https://data.cooperationdatabank.org/id/dependentvariable/contributions 

Independent Variable (IV):  https://data.cooperationdatabank.org/vocab/prop/punishmentTreatment https://data.cooperationdatabank.org/vocab/prop/punishmentTreatment_H3 
Description IV:  Punishm

treatment,IV value,description
T1,-1,"The baseline treatment. This is the treatment compared against any treatment coded as punishment treatment (= 1 identifies the punishment effect). This means that the treatment is equal to the punishment treatment, except for the availability of punishment (which includes other concepts that are part of punishment, such as punishment effectiveness, etc.)"
T2,0,"Is not a punishment treatment. This is any treatment that is neither a punishment treatment nor a baseline relative to the punishment treatment (In practice, these are ignored in the platform)"


\begin{tabular}{llll}
\toprule
{} & treatment & IV value &                                        description \\
\midrule
0 &        T1 &       -1 &  The baseline treatment. This is the treatment ... \\
1 &        T2 &        0 &  Is not a punishment treatment. This is any tre... \\
\bottomrule
\end{tabular}


The paper(s) that researched this topic is/are the following: 
http://dx.doi.org/10.1371/journal.pone.0069871
http://dx.doi.org/10.1038/ncomms12288
http://dx.doi.org/10.1016/j.euroecorev.2012.05.003
http://dx.doi.org/10.1016/j.jpubeco.2014.02.003


Hypothesis 5: partner's group membership has negative effect on contributions 

Dependent Variable (DV):  https://data.cooperationdatabank.org/id/dependentvariable/contributions 

Independent Variable (IV):  https://data.cooperationdatabank.org/vocab/prop/targetMembership https://data.cooperationdatabank.org/vocab/prop/targetMembership_H5 
Description IV:  Whether the participant is interacting with a partner identified as ingroup, out

treatment,IV value,description
T1,ingroup,Partner(s) is a member of the participant's group
T2,ingroup_and_outgroup,When an experimental treatment explicitly provides information that a partner or group belongs to both an ingroup and an outgroup


\begin{tabular}{llll}
\toprule
{} & treatment &              IV value &                                        description \\
\midrule
0 &        T1 &               ingroup &  Partner(s) is a member of the participant's group \\
1 &        T2 &  ingroup\_and\_outgroup &  When an experimental treatment explicitly prov... \\
\bottomrule
\end{tabular}


The paper(s) that researched this topic is/are the following: 
http://dx.doi.org/10.1016/j.joep.2013.06.005
http://dx.doi.org/10.1177/0146167205282149
http://dx.doi.org/10.1016/j.ijintrel.2011.02.017


Hypothesis 6: svo type has negative effect on cooperation 

Dependent Variable (DV):  https://data.cooperationdatabank.org/id/dependentvariable/cooperation 

Independent Variable (IV):  https://data.cooperationdatabank.org/vocab/prop/sVOType https://data.cooperationdatabank.org/vocab/prop/sVOType_H4 
Description IV:  Participants were classified according to the specific type of social value orientation that resulted in the SVO measure.  

T

treatment,IV value,description
T1,individualist,"Individualists aim to maximize their own outcomes, regardless of the othersâ outcomes"
T2,competitor,Competitors aim to maximize the relative difference between their own and the othersâ outcome


\begin{tabular}{llll}
\toprule
{} & treatment &       IV value &                                        description \\
\midrule
0 &        T1 &  individualist &  Individualists aim to maximize their own outco... \\
1 &        T2 &     competitor &  Competitors aim to maximize the relative diffe... \\
\bottomrule
\end{tabular}


The paper(s) that researched this topic is/are the following: 
nan
http://dx.doi.org/10.1016/s0749-5978(02)00511-3
http://dx.doi.org/10.1037//0022-3514.72.5.1093
http://dx.doi.org/10.1037//0022-3514.32.5.922
http://dx.doi.org/10.1037//0022-3514.34.1.69
http://dx.doi.org/10.1111/jopy.12139
http://dx.doi.org/10.1177/1948550617699256
http://dx.doi.org/NA
http://dx.doi.org/10.1037/0022-3514.77.6.1245
http://dx.doi.org/10.1037//0022-3514.77.2.337
http://dx.doi.org/10.1002/per.2410060505
http://dx.doi.org/10.1037//0022-3514.77.4.762


Hypothesis 7: time pressure has negative effect on contributions 

Dependent Variable (DV):  https://data.cooperationdatabank.org/id/de

treatment,IV value,description
T1,time_pressure,Participants are given a limited amount of time and asked to make their decision as quickly as possible
T2,time_delay,Participants are given relatively more time to think carefully about their decision before making it


\begin{tabular}{llll}
\toprule
{} & treatment &       IV value &                                        description \\
\midrule
0 &        T1 &  time\_pressure &  Participants are given a limited amount of tim... \\
1 &        T2 &     time\_delay &  Participants are given relatively more time to... \\
\bottomrule
\end{tabular}


The paper(s) that researched this topic is/are the following: 
nan
http://dx.doi.org/10.1177/1745691617693624
http://dx.doi.org/10.1016/j.jesp.2017.06.014
http://dx.doi.org/10.1016/j.cognition.2016.02.014
http://dx.doi.org/10.1038/nature11467
http://dx.doi.org/10.1371/journal.pone.0096654


Hypothesis 8: intergroup competition has positive effect on contributions 

Dependent Variable (DV):  https://data.cooperationdatabank.org/id/dependentvariable/contributions 

Independent Variable (IV):  https://data.cooperationdatabank.org/vocab/prop/intergroupComp https://data.cooperationdatabank.org/vocab/prop/intergroupComp_H1 
Description IV:  A setting whereby multipl

treatment,IV value,description
T1,individual_group,Group of individuals making decisions that only affect that group
T2,intergroup_competition,"Two groups and the higher earning group wins, or earns more"


\begin{tabular}{llll}
\toprule
{} & treatment &                IV value &                                        description \\
\midrule
0 &        T1 &        individual\_group &  Group of individuals making decisions that onl... \\
1 &        T2 &  intergroup\_competition &  Two groups and the higher earning group wins, ... \\
\bottomrule
\end{tabular}


The paper(s) that researched this topic is/are the following: 
nan
http://dx.doi.org/10.1371/journal.pone.0157840
http://dx.doi.org/10.1037/0022-3514.62.2.238
http://dx.doi.org/NA
http://dx.doi.org/10.1016/j.obhdp.2005.08.005
http://dx.doi.org/10.1177/002200276901300305


Hypothesis 9: anonymity manipulation has positive effect on cooperation 

Dependent Variable (DV):  https://data.cooperationdatabank.org/id/dependentvariable/cooperation 

Independent Variable (IV):  https://data.cooperationdatabank.org/vocab/prop/anonymityManipul https://data.cooperationdatabank.org/vocab/prop/anonymityManipul_H1 
Description IV:  Degree to which p

treatment,IV value,description
T1,high,The treatment was classified as having a relative high level of anonymity
T2,low,The treatment was classified as having a relative low level of anonymity


\begin{tabular}{llll}
\toprule
{} & treatment & IV value &                                        description \\
\midrule
0 &        T1 &     high &  The treatment was classified as having a relat... \\
1 &        T2 &      low &  The treatment was classified as having a relat... \\
\bottomrule
\end{tabular}


The paper(s) that researched this topic is/are the following: 
http://dx.doi.org/10.1080/17565529.2016.1174664
http://dx.doi.org/10.1016/j.jpubeco.2007.11.004
nan
http://dx.doi.org/10.1098/rspb.2017.0689
http://dx.doi.org/10.1016/0167-2681(93)90001-6
http://dx.doi.org/10.1080/00224540209603917
http://dx.doi.org/10.1006/jmps.1997.1203
http://dx.doi.org/10.1093/scan/nst040
http://dx.doi.org/10.1016/j.jebo.2017.07.007
http://dx.doi.org/10.1016/j.geb.2017.08.002
http://dx.doi.org/10.1007/s10683-013-9377-5
http://dx.doi.org/10.1556/jep.11.2013.3.3
http://dx.doi.org/10.1037/h0037450
http://dx.doi.org/10.2139/ssrn.2556325
http://dx.doi.org/10.1016/0167-2681(94)00073-n
http://dx.doi.org/

treatment,IV value
T1,"(-0.401, 0.3]"
T2,"(0.3, 0.5]"


\begin{tabular}{lll}
\toprule
{} & treatment &       IV value \\
\midrule
0 &        T1 &  (-0.401, 0.3] \\
1 &        T2 &     (0.3, 0.5] \\
\bottomrule
\end{tabular}


The paper(s) that researched this topic is/are the following: 
http://dx.doi.org/10.1007/s10726-006-9067-1
http://dx.doi.org/10.1016/0167-2681(93)90001-6
http://dx.doi.org/10.1016/j.euroecorev.2017.07.013
http://dx.doi.org/10.1371/journal.pone.0120379


HYPOTHESES WITH VERY LOW PROBABILITIES


Hypothesis 551: emotion has positive effect on cooperation 

Dependent Variable (DV):  https://data.cooperationdatabank.org/id/dependentvariable/cooperation 

Independent Variable (IV):  https://data.cooperationdatabank.org/vocab/prop/emotion https://data.cooperationdatabank.org/vocab/prop/emotion_H10 
Description IV:  Emotions are discrete, automatic responses to universally shared, culture-specific and individual-specific events (Ekman & Cordaro, 2011).  

The IV categories that were researched and their descriptions are shown 

treatment,IV value,description
T1,neutral,Often used as a baseline in studies that elicit emotions. Participants are exposed either to stimuli with neutral emotional valence or engage in a non-emotional control task
T2,disappointment,"A response to unfulfilled positive expectations (Van Dijk et al., 1999)"


\begin{tabular}{llll}
\toprule
{} & treatment &        IV value &                                        description \\
\midrule
0 &        T1 &         neutral &  Often used as a baseline in studies that elici... \\
1 &        T2 &  disappointment &  A response to unfulfilled positive expectation... \\
\bottomrule
\end{tabular}


The paper(s) that researched this topic is/are the following: 
http://dx.doi.org/10.1080/02699931.2010.485889


Hypothesis 552: exit option has positive effect on contributions 

Dependent Variable (DV):  https://data.cooperationdatabank.org/id/dependentvariable/contributions 

Independent Variable (IV):  https://data.cooperationdatabank.org/vocab/prop/exitOption https://data.cooperationdatabank.org/vocab/prop/exitOption_H1 
Description IV:  Whether the game provided the option to walk away and not interact in the game. TRUE = Exit option is provided, FALSE = Exit option is not provided 

The IV categories that were researched and their descriptions are shown

treatment,IV value
T1,0
T2,1


\begin{tabular}{lll}
\toprule
{} & treatment & IV value \\
\midrule
0 &        T1 &        0 \\
1 &        T2 &        1 \\
\bottomrule
\end{tabular}


The paper(s) that researched this topic is/are the following: 
http://dx.doi.org/10.1371/journal.pone.0069871
http://dx.doi.org/10.1037/h0024598


Hypothesis 553: nationality or region has negative effect on contributions 

Dependent Variable (DV):  https://data.cooperationdatabank.org/id/dependentvariable/contributions 

Independent Variable (IV):  https://data.cooperationdatabank.org/vocab/prop/nationality https://data.cooperationdatabank.org/vocab/prop/nationality_H23 
Description IV:  Participant's nationality according to ISO-3 Country Code. 

The IV categories that were researched and their descriptions are shown below: 


treatment,IV value
T1,JPN
T2,AUS


\begin{tabular}{lll}
\toprule
{} & treatment & IV value \\
\midrule
0 &        T1 &      JPN \\
1 &        T2 &      AUS \\
\bottomrule
\end{tabular}


The paper(s) that researched this topic is/are the following: 
http://dx.doi.org/10.1111/j.1467-839x.2005.00165.x


Hypothesis 554: emotion has negative effect on cooperation 

Dependent Variable (DV):  https://data.cooperationdatabank.org/id/dependentvariable/cooperation 

Independent Variable (IV):  https://data.cooperationdatabank.org/vocab/prop/emotion https://data.cooperationdatabank.org/vocab/prop/emotion_H10 
Description IV:  Emotions are discrete, automatic responses to universally shared, culture-specific and individual-specific events (Ekman & Cordaro, 2011).  

The IV categories that were researched and their descriptions are shown below: 


treatment,IV value,description
T1,neutral,Often used as a baseline in studies that elicit emotions. Participants are exposed either to stimuli with neutral emotional valence or engage in a non-emotional control task
T2,disappointment,"A response to unfulfilled positive expectations (Van Dijk et al., 1999)"


\begin{tabular}{llll}
\toprule
{} & treatment &        IV value &                                        description \\
\midrule
0 &        T1 &         neutral &  Often used as a baseline in studies that elici... \\
1 &        T2 &  disappointment &  A response to unfulfilled positive expectation... \\
\bottomrule
\end{tabular}


The paper(s) that researched this topic is/are the following: 
http://dx.doi.org/10.1080/02699931.2010.485889


Hypothesis 555: preference for conditional cooperation has positive effect on cooperation 

Dependent Variable (DV):  https://data.cooperationdatabank.org/id/dependentvariable/cooperation 

Independent Variable (IV):  https://data.cooperationdatabank.org/vocab/prop/preferenceConditionalCooperation https://data.cooperationdatabank.org/vocab/prop/preferenceConditionalCooperation_H4 
Description IV:  Participants are classified as having a specific dispositional strategy of cooperation using a strategy method (see Fischbacher et al. 2001).  

The IV ca

treatment,IV value,description
T1,freeriders,"Using the strategy method of contributions to public goods by Fischbacher et al. (2001), participants who always contributed nothing to the public good"
T2,hump-shaped_contributors,"Using the strategy method of contributions to public goods by Fischbacher et al. (2001), participants who initially provide increasing amounts to the public good according to increasing average group contributions, but then at some point give a significantly decreasing amount to the public good"


\begin{tabular}{llll}
\toprule
{} & treatment &                  IV value &                                        description \\
\midrule
0 &        T1 &                freeriders &  Using the strategy method of contributions to ... \\
1 &        T2 &  hump-shaped\_contributors &  Using the strategy method of contributions to ... \\
\bottomrule
\end{tabular}


The paper(s) that researched this topic is/are the following: 
http://dx.doi.org/10.2139/ssrn.203288


Hypothesis 556: preference for conditional cooperation has negative effect on cooperation 

Dependent Variable (DV):  https://data.cooperationdatabank.org/id/dependentvariable/cooperation 

Independent Variable (IV):  https://data.cooperationdatabank.org/vocab/prop/preferenceConditionalCooperation https://data.cooperationdatabank.org/vocab/prop/preferenceConditionalCooperation_H4 
Description IV:  Participants are classified as having a specific dispositional strategy of cooperation using a strategy method (see Fischbacher et a

treatment,IV value,description
T1,freeriders,"Using the strategy method of contributions to public goods by Fischbacher et al. (2001), participants who always contributed nothing to the public good"
T2,hump-shaped_contributors,"Using the strategy method of contributions to public goods by Fischbacher et al. (2001), participants who initially provide increasing amounts to the public good according to increasing average group contributions, but then at some point give a significantly decreasing amount to the public good"


\begin{tabular}{llll}
\toprule
{} & treatment &                  IV value &                                        description \\
\midrule
0 &        T1 &                freeriders &  Using the strategy method of contributions to ... \\
1 &        T2 &  hump-shaped\_contributors &  Using the strategy method of contributions to ... \\
\bottomrule
\end{tabular}


The paper(s) that researched this topic is/are the following: 
http://dx.doi.org/10.2139/ssrn.203288


Hypothesis 557: uncertainty target has negative effect on cooperation 

Dependent Variable (DV):  https://data.cooperationdatabank.org/id/dependentvariable/cooperation 

Independent Variable (IV):  https://data.cooperationdatabank.org/vocab/prop/uncertaintyTarget https://data.cooperationdatabank.org/vocab/prop/uncertaintyTarget_H1 
Description IV:  Whether participants receive uncertain information about a certain aspect of the game. This also includes receiving detailed vs less specific information, as well as events happening

treatment,IV value,description
T1,loss,"Negative payoff that resulted in a loss of resources due to endogenous (e.g., not reaching a threshold) or exogenous (e.g., shocks) factors"
T2,threshold,"The minimum threshold of total contribution by all group members for the public good to be provided or the occurrence of a loss (e.g., in case of public bad games)"


\begin{tabular}{llll}
\toprule
{} & treatment &   IV value &                                        description \\
\midrule
0 &        T1 &       loss &  Negative payoff that resulted in a loss of res... \\
1 &        T2 &  threshold &  The minimum threshold of total contribution by... \\
\bottomrule
\end{tabular}


The paper(s) that researched this topic is/are the following: 
http://dx.doi.org/10.1073/pnas.1208417109


Hypothesis 558: iterated strategy has positive effect on contributions 

Dependent Variable (DV):  https://data.cooperationdatabank.org/id/dependentvariable/contributions 

Independent Variable (IV):  https://data.cooperationdatabank.org/vocab/prop/iteratedStrategy https://data.cooperationdatabank.org/vocab/prop/iteratedStrategy_H14 
Description IV:  The specific strategy played by the participant's partner in an iterated game. 

The IV categories that were researched and their descriptions are shown below: 


treatment,IV value,description
T1,tit-for-tat,"The partner cooperates on the first trial, then copies the previous choices of the focal participant on each subsequent trial"
T2,tit-for-tat+1,"The partner cooperates on the first trial, and then copies the choices of the focal participant with a delay (i.e., cooperates when the focal agent cooperates, but defects only after two consecutive defections from the focal participant)"


\begin{tabular}{llll}
\toprule
{} & treatment &       IV value &                                        description \\
\midrule
0 &        T1 &    tit-for-tat &  The partner cooperates on the first trial, the... \\
1 &        T2 &  tit-for-tat+1 &  The partner cooperates on the first trial, and... \\
\bottomrule
\end{tabular}


The paper(s) that researched this topic is/are the following: 
http://dx.doi.org/10.1037/0022-3514.87.6.845
http://dx.doi.org/10.1037//0022-3514.82.5.768


Hypothesis 559: uncertainty target has positive effect on cooperation 

Dependent Variable (DV):  https://data.cooperationdatabank.org/id/dependentvariable/cooperation 

Independent Variable (IV):  https://data.cooperationdatabank.org/vocab/prop/uncertaintyTarget https://data.cooperationdatabank.org/vocab/prop/uncertaintyTarget_H1 
Description IV:  Whether participants receive uncertain information about a certain aspect of the game. This also includes receiving detailed vs less specific information, as well 

treatment,IV value,description
T1,loss,"Negative payoff that resulted in a loss of resources due to endogenous (e.g., not reaching a threshold) or exogenous (e.g., shocks) factors"
T2,threshold,"The minimum threshold of total contribution by all group members for the public good to be provided or the occurrence of a loss (e.g., in case of public bad games)"


\begin{tabular}{llll}
\toprule
{} & treatment &   IV value &                                        description \\
\midrule
0 &        T1 &       loss &  Negative payoff that resulted in a loss of res... \\
1 &        T2 &  threshold &  The minimum threshold of total contribution by... \\
\bottomrule
\end{tabular}


The paper(s) that researched this topic is/are the following: 
http://dx.doi.org/10.1073/pnas.1208417109


Hypothesis 560: exit option has negative effect on contributions 

Dependent Variable (DV):  https://data.cooperationdatabank.org/id/dependentvariable/contributions 

Independent Variable (IV):  https://data.cooperationdatabank.org/vocab/prop/exitOption https://data.cooperationdatabank.org/vocab/prop/exitOption_H1 
Description IV:  Whether the game provided the option to walk away and not interact in the game. TRUE = Exit option is provided, FALSE = Exit option is not provided 

The IV categories that were researched and their descriptions are shown below: 


treatment,IV value
T1,0
T2,1


\begin{tabular}{lll}
\toprule
{} & treatment & IV value \\
\midrule
0 &        T1 &        0 \\
1 &        T2 &        1 \\
\bottomrule
\end{tabular}


The paper(s) that researched this topic is/are the following: 
http://dx.doi.org/10.1371/journal.pone.0069871
http://dx.doi.org/10.1037/h0024598


In [None]:
observations

In [None]:
from docx.enum.dml import MSO_THEME_COLOR_INDEX
import docx
def add_hyperlink(paragraph, text, url):
    # This gets access to the document.xml.rels file and gets a new relation id value
    part = paragraph.part
    r_id = part.relate_to(url, docx.opc.constants.RELATIONSHIP_TYPE.HYPERLINK, is_external=True)

    # Create the w:hyperlink tag and add needed values
    hyperlink = docx.oxml.shared.OxmlElement('w:hyperlink')
    hyperlink.set(docx.oxml.shared.qn('r:id'), r_id, )

    # Create a w:r element and a new w:rPr element
    new_run = docx.oxml.shared.OxmlElement('w:r')
    rPr = docx.oxml.shared.OxmlElement('w:rPr')

    # Join all the xml elements together add add the required text to the w:r element
    new_run.append(rPr)
    new_run.text = text
    hyperlink.append(new_run)

    # Create a new Run object and add the hyperlink into it
    r = paragraph.add_run ()
    r._r.append (hyperlink)

    # A workaround for the lack of a hyperlink style (doesn't go purple after using the link)
    # Delete this if using a template that has the hyperlink style in it
    r.font.color.theme_color = MSO_THEME_COLOR_INDEX.HYPERLINK
    r.font.underline = True

    return hyperlink

In [None]:
def control_function2(document, hnumber, rankings, obs):        
    for index, row in rankings.iterrows(): 
        IV_H, IV, effect, DV = disassemble_statement(row['statement'])
        effect_string = effect_string_maker(effect)

        dois = obs[obs['IV_new'].str.contains(str(IV_H))]['doi'].unique().tolist()
        catT1 = obs[obs['IV_new'].str.contains(str(IV_H))]['categoryT1'].unique().tolist()[0]
        catT2 = obs[obs['IV_new'].str.contains(str(IV_H))]['categoryT2'].unique().tolist()[0]
        
        output = create_category_output(catT1, catT2, IV)
        document.add_heading(('Hypothesis ' + str(hnumber) + ': ' + query_label(IV) + ' ' + effect_string + ' ' + query_label(DV).lower()), level=1)
        p = document.add_paragraph('Dependent Variable (DV): ')
        add_hyperlink(p, str(DV), DV)

        p = document.add_paragraph('\nInependent Variable (IV): ')
        add_hyperlink(p, str(IV), IV)
            
        document.add_paragraph('Description IV: ' + query_description(IV) + 
                               '\n\nThe IV values that were researched and (if available) their descriptions are shown below: ')
        
        t = document.add_table(output.shape[0]+1, output.shape[1])
        
        # add the header rows.
        for j in range(output.shape[-1]):
            t.cell(0,j).text = output.columns[j]

        # add the rest of the data frame
        for i in range(output.shape[0]):
            for j in range(output.shape[-1]):
                t.cell(i+1,j).text = str(output.values[i,j])
                
        document.add_paragraph('\nThe paper(s) that researched this topic is/are the following: ')
        for doi in dois: 
            if type(doi) == float:
                continue
            else:
                p = document.add_paragraph('')
                add_hyperlink(p, str(doi), doi)
                
    document.save('hypotheses.docx')


# # Most likely examples 
# print('POSSIBLE NEW HYPOTHESES WITH HIGHEST PROBABILITIES')
# control_function(counts, rankings[:10], observations)
# # least likely examples 
# print('\n\nHYPOTHESES WITH VERY LOW PROBABILITIES')
# control_function(counts, rankings[len(rankings)-10:], observations)

In [None]:
import random

indexes = [0,1,2,3,4,5,6,7,8,9,550,551,552,553,554,555,556,557,558,559]

def random_index(data):
    choice = random.choice(data) 
    data.remove(choice) 
    return choice, data 

# choice, indexjes = simple(indexes)

document = Document()
document.add_heading('Hypotheses', 0)
hnumber = 0
for r in range(len(indexes)): 
    hnumber += 1
    choice, indexes = random_index(indexes)
    control_function2(document, hnumber, rankings[choice:choice+1], observations)
    print(hnumber, choice)