In [7]:
import nltk
import numpy as np
import collections
from tqdm import tqdm
import math
from nltk.stem import WordNetLemmatizer,PorterStemmer
from nltk.tokenize import RegexpTokenizer
from nltk.corpus import stopwords
from itertools import product
from itertools import combinations

In [8]:
# Read in the text corpus from a file
with open("tasaSentDocs.txt", "r") as file:
    text_corpus = file.read()

In [10]:

typefr=collections.Counter()
def clean(s1):
	stat=[]
	stop_words=set(stopwords.words("english"))

	#Tokenizing the sentence
	tokenizer= RegexpTokenizer(r'\w+')
	words=tokenizer.tokenize(s1)

	#Stemmer and Lemmatizer instance created
	ps=PorterStemmer()
	lemmatizer= WordNetLemmatizer()
	pronouns = ['he', 'him', 'his', 'she','her','hers']
	#Lemmatizing words and adding to the final array if they are not stopwords
	for w in words:
		if w not in stop_words or w in pronouns:
			w=lemmatizer.lemmatize(w)
			stat.append(w)
			typefr[w] += 1

	return stat

In [11]:
tokens = clean(text_corpus)

In [12]:
lr= WordNetLemmatizer()

In [13]:
m=len(tokens)
unique = typefr.keys()
m_prime = sum([typefr[t]**0.75 for t in unique])

In [23]:


def ppmi(word_pairs, tokens, context_window_size = 5):
    
    # Initialize a dictionary to store co-occurrence counts for your word pairs
    co_occurrence_counts = collections.Counter()

    # Iterate through the tokens and count co-occurrences of your word pairs within the context window
    for i in tqdm(range(m)):
        for words in word_pairs:
            if words[0] == tokens[i]:
                for j in range(max(0, i - context_window_size), min(len(tokens), i + context_window_size + 1)):
                    if words[1] == tokens[j]:
                        co_occurrence_counts[(words[0], words[1])] += 1
        

    # Calculate the PMI for each word pair
    ppmi_scores = {}
    for words in word_pairs:
        word1 = words[0]
        word2 = words[1]

        # if p_word1 == 0:
        #     return word1
        # if p_word2 == 0:
        #     return word2
        if co_occurrence_counts[(word1, word2)] == 0:
            ppmi = 0
            #print(word1, word2)
        else:
            ppmi = max(0, math.log(co_occurrence_counts[(word1, word2)]* m/(typefr[word1]*typefr[word2]),2))
        ppmi_scores[(word1, word2)] = ppmi


    return ppmi_scores

def socpmi(word_pairs, tokens, delta, gamma, context_window_size = 5):
    # second order co-occurrence PMI
    socpmi_scores = {}
    
    a = context_window_size
    

    for words in tqdm(word_pairs):
        w1 = words[0]
        w2 = words[1]
        neighboursw1=collections.Counter()
        n2w1=[]
        neighboursw2=collections.Counter()
        n2w2=[]
        for i in range(len(tokens)):
            if w1==tokens[i]:
                curr_window = tokens[max(0, i - a):min(len(tokens), i + a + 1)]
                if w2 in curr_window:
                    continue # skip if w2 is in the context window of w1

                #neighboursw1[tokens[i]]+=1
                for j in range(1,a+1):
                    neighboursw1[tokens[i+j]]+=1
                    neighboursw1[tokens[i-j]]+=1

            elif w2==tokens[i]:
                curr_window = tokens[max(0, i - a):min(len(tokens), i + a + 1)]
                if w1 in curr_window:
                    continue
                #neighboursw2[tokens[i]]+=1
                for j in range(1,a+1):
                    neighboursw2[tokens[i+j]]+=1
                    neighboursw2[tokens[i-j]]+=1   

        #print(neighboursw1)
        pmiw1={}
        #pmi1_old = {}
        for t in neighboursw1.keys():
            pmiw1[t]= max(0, math.log(neighboursw1[t]* m_prime/((typefr[t]**0.75)*typefr[w1]),2))
            #pmi1_old[t] = max(0, math.log(neighboursw1[t]* m/(typefr[t]*typefr[w1]),2))

        #print(neighboursw2)
        pmiw2={}
        #pmi2_old = {}
        for t in neighboursw2.keys():
            pmiw2[t]= max(0,math.log((neighboursw2[t]*m_prime/((typefr[t]**0.75)*typefr[w2])),2))
            #pmi2_old[t] = max(0, math.log(neighboursw2[t]* m/(typefr[t]*typefr[w2]),2))


        pmiw1_sorted = sorted(pmiw1, key=pmiw1.get, reverse=True)
        pmiw2_sorted = sorted(pmiw2, key=pmiw2.get, reverse=True)
        
        # for i in range(20):
        #     print(w1, pmiw1_sorted[i], pmiw1[pmiw1_sorted[i]], pmi1_old[pmiw1_sorted[i]])
        
        # for i in range(20):
        #     print(w2, pmiw2_sorted[i], pmiw2[pmiw2_sorted[i]], pmi2_old[pmiw2_sorted[i]])
        
        b1= math.floor((math.pow(math.log10(typefr[w1]),2)* math.log(len(unique),2))/delta)
        b2= math.floor((math.pow(math.log10(typefr[w2]),2) * math.log(len(unique),2))/delta)

        # print("b1:", b1)
        # print("b2:", b2)        
        # print(pmiw1_sorted)
        if b1>len(pmiw1_sorted):
            b1=len(pmiw1_sorted)
        
        if b2>len(pmiw2_sorted):
            b2=len(pmiw2_sorted)

        # print("b1:", b1)
        # print("b2:", b2)

        betasumw1=0
        betasumw2=0

        # print("pmiw1_sorted:", pmiw1_sorted[:b1])
        # print("pmiw2_sorted:", pmiw2_sorted[:b2])
        
        for i in range(0,b1):
            for j in range(0,b2):
                if pmiw1_sorted[i]==pmiw2_sorted[j]:
                    #print(pmiw1_sorted[i])
                    betasumw1+=math.pow(pmiw2[pmiw1_sorted[i]],gamma)
                    betasumw2+=math.pow(pmiw1[pmiw1_sorted[i]],gamma)

        #print("betasumw1:", betasumw1)
        #print("betasumw2:", betasumw2)
        if b1==0:
          b1 = 1
        if b2==0:
          b2 = 1
        similarity= betasumw1/b1 + betasumw2/b2
        
        socpmi_scores[(w1, w2)] = similarity

    return socpmi_scores

SOC-PMI settings: delta = 0.4, gamma = 1.75

In [8]:
def ratio(pairs, first, second):
    ratio = {}
    for pair in pairs:
        ppmi_val = first[pair]
        socpmi_val = second[pair]
        if ppmi_val == 0:
            ppmi_val = 0.01
        ratio[pair] = socpmi_val/ppmi_val 
    return ratio

In [9]:
def effect_size(X, Y, A, B, sim_scores):
    # for each x in set X, s= average similarity of x to each word a in set A - average similarity of x to each word b in set B
    # for each y in set Y, s= average similarity of y to each word a in set A - average similarity of y to each word b in set B
    # effect size = average of s for each x in X - average of s for each y in Y divided by the standard deviation of s for each x in X and y in Y
    
    s_x = []
    s_y = []
    for x in X:
        s = np.mean([sim_scores[(x, a)] for a in A]) - np.mean([sim_scores[(x, b)] for b in B])
        s_x.append(s)

    for y in Y:
        s = np.mean([sim_scores[(y, a)] for a in A]) - np.mean([sim_scores[(y, b)] for b in B])
        s_y.append(s)

    diff = np.mean(s_x) - np.mean(s_y)
    pooled_sd = np.std(s_x+s_y)
    return diff / pooled_sd
    

In [21]:
# if needed, do non-exhaustive test with 1000 permutation
def get_parts(X, Y):
  if len(X)!=len(Y):
    return "uneven target set lengths"
  half_length = len(X)
  all_combinations = combinations(X+Y, half_length)
    
  equal_splits = []
  for combo in tqdm(all_combinations):
      remaining = list(X+Y)
      for item in combo:
          remaining.remove(item)
      equal_splits.append((list(combo), remaining))
  return equal_splits

def p_test(X, Y, A, B, sim_scores, equal_splits):
  s_w = {}
  for i in X+Y:
    s_w[i] = np.mean([sim_scores[(i, a)] for a in A]) - np.mean([sim_scores[(i, b)] for b in B])

  obs = sum([s_w[x] for x in X]) - sum([s_w[y] for y in Y])

  stats = np.array([sum([s_w[x] for x in part[0]]) - sum([s_w[y] for y in part[1]]) for part in equal_splits])
  p = np.sum(stats > obs) / len(stats)
  return p

In [10]:
def check_words(X,Y,A,B):
    for word in X + Y + A + B:
        if lr.lemmatize(word) not in tokens:
            print(word)
    return

In [11]:
df = {'test': ['WEAT1','WEAT2','WEAT3','WEAT4','WEAT5','WEAT6','WEAT7','WEAT8','WEAT9','WEAT10','WEFAT1', 'WEFAT2'], 'first_order_es': [], 'second_order_es': [], 'ratio_es':[]}

For each test set below, the two targets listed are represented by X and Y, respectively, and the two attributes listed by A and B, respectively. For the WEFATs, there is only one target set W.

## WEAT 1


In [16]:
# targets: flowers and insects
X = ['aster', 'clover', 'hyacinth', 'marigold', 'poppy', 'azalea', 'crocus', 'iris', 'orchid', 'rose', 'bluebell', 'daffodil', 'lilac', 'pansy', 'tulip', 'buttercup', 'daisy', 'lily', 'peony', 'violet', 'carnation', 'gladiola','magnolia', 'petunia', 'zinnia']
Y = ['ant', 'caterpillar', 'flea', 'locust', 'spider', 'bedbug', 'centipede', 'fly', 'maggot', 'tarantula', 'bee', 'cockroach', 'gnat', 'mosquito', 'termite', 'beetle', 'cricket', 'hornet', 'moth', 'wasp', 'blackfly', 'dragonfly', 'horsefly', 'roach', 'weevil']
# attributes: pleasant and unpleasant
A = ['caress', 'freedom', 'health', 'love', 'peace', 'cheer', 'friend', 'heaven', 'loyal', 'pleasure', 'diamond', 'gentle', 'honest', 'lucky', 'rainbow', 'diploma', 'gift', 'honor', 'miracle', 'sunrise', 'family', 'happy', 'laughter', 'paradise', 'vacation']
B = ['abuse', 'crash', 'filth', 'murder', 'sickness', 'accident', 'death', 'grief', 'poison', 'stink', 'assault', 'disaster', 'hatred', 'pollute', 'tragedy', 'divorce', 'jail', 'poverty', 'ugly', 'cancer', 'kill', 'rotten', 'vomit', 'agony', 'prison']

In [13]:
check_words(X,Y,A,B)

gladiola
bedbug


In [14]:
# remove words not found in corpus
X.remove('gladiola')
Y.remove('bedbug')

In [17]:
pairs = list(product(X+Y, A+B))
first_order = ppmi(pairs, tokens)
second_order = socpmi(pairs, tokens, 0.4, 1.75)
sf_ratio = ratio(pairs, first_order, second_order)

100%|██████████| 2400/2400 [00:00<00:00, 26912.08it/s]


aster caress
aster freedom
aster health
aster love
aster peace
aster cheer
aster friend
aster heaven
aster loyal
aster pleasure
aster diamond
aster gentle
aster honest
aster lucky
aster rainbow
aster diploma
aster gift
aster honor
aster miracle
aster sunrise
aster family
aster happy
aster laughter
aster vacation
aster abuse
aster crash
aster filth
aster murder
aster sickness
aster accident
aster death
aster grief
aster poison
aster stink
aster assault
aster disaster
aster hatred
aster pollute
aster tragedy
aster divorce
aster jail
aster poverty
aster ugly
aster cancer
aster rotten
aster vomit
aster agony
aster prison
clover caress
clover freedom
clover health
clover cheer
clover friend
clover heaven
clover loyal
clover pleasure
clover diamond
clover gentle
clover honest
clover lucky
clover rainbow
clover diploma
clover gift
clover honor
clover miracle
clover sunrise
clover family
clover happy
clover laughter
clover paradise
clover vacation
clover abuse
clover crash
clover filth
clover 

100%|██████████| 2400/2400 [44:37<00:00,  1.12s/it]  


In [18]:
df['first_order_es'].append(effect_size(X, Y, A, B, first_order))
df['second_order_es'].append(effect_size(X, Y, A, B, second_order))
df['ratio_es'].append(effect_size(X, Y, A, B, sf_ratio))

## WEAT 2


In [35]:

# targets: instruments and weapons
X = ['bagpipe', 'cello', 'guitar', 'lute', 'trombone', 'banjo', 'clarinet', 'harmonica', 'mandolin', 'trumpet', 'bassoon', 'drum', 'harp', 'oboe', 'tuba', 'bell', 'fiddle', 'harpsichord', 'piano', 'viola', 'bongo', 'flute', 'horn', 'saxophone', 'violin']
Y = ['arrow', 'club', 'gun', 'missile', 'spear', 'axe', 'dagger', 'harpoon', 'pistol', 'sword', 'blade', 'dynamite', 'hatchet', 'rifle', 'tank', 'bomb', 'firearm', 'knife', 'shotgun', 'teargas', 'cannon', 'grenade', 'mace', 'slingshot', 'whip']
# attributes: pleasant and unpleasant (see above)

In [37]:
Y1 = [lr.lemmatize(w) for w in Y]
print(Y1 == Y)

True


In [20]:
check_words(X,Y,A,B)

teargas


In [26]:
X.remove('bassoon')
Y.remove('teargas')

In [27]:
pairs = list(product(X+Y, A+B))
first_order = ppmi(pairs, tokens)
second_order = socpmi(pairs, tokens, 0.4, 1.75)
sf_ratio = ratio(pairs, first_order, second_order)

100%|██████████| 2400/2400 [00:00<00:00, 78125.01it/s]


bagpipe caress
bagpipe freedom
bagpipe health
bagpipe love
bagpipe peace
bagpipe cheer
bagpipe friend
bagpipe heaven
bagpipe loyal
bagpipe pleasure
bagpipe diamond
bagpipe gentle
bagpipe honest
bagpipe lucky
bagpipe rainbow
bagpipe diploma
bagpipe gift
bagpipe honor
bagpipe miracle
bagpipe sunrise
bagpipe happy
bagpipe laughter
bagpipe paradise
bagpipe vacation
bagpipe abuse
bagpipe crash
bagpipe filth
bagpipe murder
bagpipe sickness
bagpipe accident
bagpipe death
bagpipe grief
bagpipe poison
bagpipe stink
bagpipe assault
bagpipe disaster
bagpipe hatred
bagpipe pollute
bagpipe tragedy
bagpipe divorce
bagpipe jail
bagpipe poverty
bagpipe ugly
bagpipe cancer
bagpipe kill
bagpipe rotten
bagpipe vomit
bagpipe agony
bagpipe prison
cello caress
cello freedom
cello health
cello love
cello peace
cello cheer
cello friend
cello heaven
cello loyal
cello pleasure
cello diamond
cello gentle
cello honest
cello lucky
cello rainbow
cello diploma
cello gift
cello honor
cello miracle
cello sunrise
cello

100%|██████████| 2400/2400 [57:49<00:00,  1.45s/it] 


In [29]:
df['first_order_es'].append(effect_size(X, Y, A, B, first_order))
df['second_order_es'].append(effect_size(X, Y, A, B, second_order))
df['ratio_es'].append(effect_size(X, Y, A, B, sf_ratio))

In [30]:
print(df)

{'test': ['WEAT1', 'WEAT2', 'WEAT3', 'WEAT4', 'WEAT5', 'WEAT6', 'WEAT7', 'WEAT8', 'WEAT9', 'WEAT10', 'WEFAT1', 'WEFAT2'], 'first_order_es': [0.39178046908432596, 0.6731976251858595], 'second_order_es': [1.4008827166212436, 0.8216800522465267], 'ratio_es': [1.3482204602215433, 0.5973968730911328]}


## WEAT 3


In [17]:

# targets: European American and African American names
X = ['Adam', 'Harry', 'Josh', 'Roger', 'Alan', 'Frank', 'Justin', 'Ryan', 'Andrew', 'Jack', 'Matthew', 'Stephen', 'Brad', 'Greg', 'Paul', 'Jonathan', 'Peter', 'Amanda', 'Courtney', 'Heather', 'Melanie', 'Katie', 'Betsy', 'Kristin', 'Nancy', 'Stephanie', 'Ellen', 'Lauren', 'Colleen', 'Emily', 'Megan', 'Rachel']
Y = ['Alonzo', 'Jamel', 'Theo', 'Alphonse', 'Jerome', 'Leroy', 'Torrance', 'Darnell', 'Lamar', 'Lionel', 'Tyree', 'Deion', 'Lamont', 'Malik', 'Terrence', 'Tyrone', 'Lavon', 'Marcellus', 'Wardell', 'Nichelle', 'Shereen', 'Ebony', 'Latisha', 'Shaniqua', 'Jasmine', 'Tanisha', 'Tia', 'Lakisha', 'Latoya', 'Yolanda', 'Malika', 'Yvette']
# attributes: pleasant (see above) and unpleasant
B = ['abuse', 'crash', 'filth', 'murder', 'sickness', 'accident', 'death', 'grief', 'poison', 'stink', 'assault', 'disaster', 'hatred', 'pollute', 'tragedy','bomb', 'divorce', 'jail', 'poverty', 'ugly', 'cancer','evil', 'kill', 'rotten', 'vomit']


In [18]:
X = [x.lower() for x in X]
Y = [y.lower() for y in Y]

In [19]:
r = ["jamel","deion","malik","terrence", "wardell","nichelle","shereen","latisha","shaniqua","tanisha","lakisha","latoya","malika","yvette"]
for name in r:
  Y.remove(name)

In [20]:
r = ["lauren", "colleen", "kristin","justin", "stephanie", "josh", "courtney", "megan","heather","ryan","brad","melanie","greg", "amanda"]
for name in r:
  X.remove(name)

In [24]:
pairs = list(product(X+Y, A+B))
first_order = ppmi(pairs, tokens)
second_order = socpmi(pairs, tokens, 0.4, 1.75)
parts = get_parts(X, Y)
print("\n first p:", p_test(X,Y,A,B,first_order, parts))
print("second p:", p_test(X,Y,A,B,second_order, parts))

100%|██████████| 5910028/5910028 [19:24<00:00, 5074.88it/s]
100%|██████████| 1800/1800 [27:57<00:00,  1.07it/s]
51149158it [1:28:22, 94567.72it/s] 

## WEAT 4


In [42]:
# targets: European American and African American names
X = ['Brad', 'Brendan', 'Geoffrey', 'Greg', 'Brett', 'Matthew', 'Neil', 'Todd', 'Allison', 'Anne', 'Carrie', 'Emily', 'Jill', 'Laurie', 'Meredith', 'Sarah']
Y = ['Darnell', 'Hakim', 'Jermaine', 'Kareem', 'Jamal', 'Leroy', 'Rasheed', 'Tyrone', 'Aisha', 'Ebony', 'Keisha', 'Kenya', 'Lakisha', 'Latoya', 'Tamika', 'Tanisha']
# attributes: pleasant and unpleasant (see above)


## WEAT 5


In [None]:
# targets: European American and African American names (see above)
# attributes: pleasant and unpleasant
A = ['joy', 'love', 'peace', 'wonderful', 'pleasure', 'friend', 'laughter', 'happy']
B = ['agony', 'terrible', 'horrible', 'nasty', 'evil', 'war', 'awful', 'failure']

## WEAT 6


In [None]:
# targets: male and female names
X = ['John', 'Paul', 'Mike', 'Kevin', 'Steve', 'Greg', 'Jeff', 'Bill']
Y = ['Amy', 'Joan', 'Lisa', 'Sarah', 'Diana', 'Kate', 'Ann', 'Donna']
# attributes: career and family
A = ['executive', 'management', 'professional', 'corporation', 'salary', 'office', 'business', 'career']
B = ['home', 'parents', 'children', 'family', 'cousins', 'marriage', 'wedding', 'relatives']


## WEAT 7


In [None]:
# targets: math and arts
X = ['math', 'algebra', 'geometry', 'calculus', 'equations', 'computation', 'numbers', 'addition']
Y = ['poetry', 'art', 'dance', 'literature', 'novel', 'symphony', 'drama', 'sculpture']
# attributes: male and female
A = ['male', 'man', 'boy', 'brother', 'he', 'him', 'his', 'son']
B = ['female', 'woman', 'girl', 'sister', 'she', 'her', 'hers', 'daughter']


## WEAT 8


In [None]:
# targets: science and arts
X = ['science', 'technology', 'physics', 'chemistry', 'Einstein', 'NASA', 'experiment', 'astronomy']
Y = ['poetry', 'art', 'Shakespeare', 'dance', 'literature', 'novel', 'symphony', 'drama']
# attributes: male and female
A = ['brother', 'father', 'uncle', 'grandfather', 'son', 'he', 'his', 'him']
B = ['sister', 'mother', 'aunt', 'grandmother', 'daughter', 'she', 'hers', 'her']


## WEAT 9


In [None]:
# targets: mental and physical disease
X = ['sad', 'hopeless', 'gloomy', 'tearful', 'miserable', 'depressed']
Y = ['sick', 'illness', 'influenza', 'disease', 'virus', 'cancer']
# attributes: temporary and permanent
A = ['impermanent', 'unstable', 'variable', 'fleeting', 'short-term', 'brief', 'occasional']
B = ['stable', 'always', 'constant', 'persistent', 'chronic', 'prolonged', 'forever']


## WEAT 10


In [None]:
# targets: young and old people's names
X = ['Tiffany', 'Michelle', 'Cindy', 'Kristy', 'Brad', 'Eric', 'Joey', 'Billy']
Y = ['Ethel', 'Bernice', 'Gertrude', 'Agnes', 'Cecil', 'Wilbert', 'Mortimer', 'Edgar']
# attributes: pleasant and unpleasant
A = ['joy', 'love', 'peace', 'wonderful', 'pleasure', 'friend', 'laughter', 'happy']
B = ['agony', 'terrible', 'horrible', 'nasty', 'evil', 'war', 'awful', 'failure']


## WEFAT 1


In [None]:
# targets: careers
W = ['technician', 'accountant', 'supervisor', 'engineer', 'worker', 'educator', 'clerk', 'counselor', 'inspector', 'mechanic', 'manager', 'therapist', 'administrator', 'salesperson', 'receptionist', 'librarian', 'advisor', 'pharmacist', 'janitor', 'psychologist', 'physician', 'carpenter', 'nurse', 'investigator', 'bartender', 'specialist', 'electrician', 'officer', 'pathologist', 'teacher', 'lawyer', 'planner', 'practitioner', 'plumber', 'instructor', 'surgeon', 'veterinarian', 'paramedic', 'examiner', 'chemist', 'machinist', 'appraiser', 'nutritionist', 'architect', 'hairdresser', 'baker', 'programmer', 'paralegal', 'hygienist', 'scientist']
# attributes: female and male
A = ['female', 'woman', 'girl', 'sister', 'she', 'her', 'hers', 'daughter']
B = ['male', 'man', 'boy', 'brother', 'he', 'him', 'his', 'son']


## WEFAT 2


In [None]:
# targets: androgynous names
W = ['Kelly', 'Tracy', 'Jamie', 'Jackie', 'Jesse', 'Courtney', 'Lynn', 'Taylor', 'Leslie', 'Shannon', 'Stacey', 'Jessie', 'Shawn', 'Stacy', 'Casey', 'Bobby', 'Terry', 'Lee', 'Ashley', 'Eddie', 'Chris', 'Jody', 'Pat', 'Carey', 'Willie', 'Morgan', 'Robbie', 'Joan', 'Alexis', 'Kris', 'Frankie', 'Bobbie', 'Dale', 'Robin', 'Billie', 'Adrian', 'Kim', 'Jaime', 'Jean', 'Francis', 'Marion', 'Dana', 'Rene', 'Johnnie', 'Jordan', 'Carmen', 'Ollie', 'Dominique', 'Jimmie', 'Shelby']
# attributes: female and male (see above)

## Statistics

In [1]:
from scipy import stats
import pandas as pd
import statsmodels.api as sm

In [2]:
# Load your dataset (replace 'data.csv' with your file name)
data = pd.read_csv('syp_data.csv')

In [50]:




# Define your predictor variables and outcome variable
predictors_block1 = ['first_order_es', 'second_order_es']

outcome_variable = 'WEAT_d'

# Model with Block 1 (X1 and X2)
X_block1 = data[predictors_block1]
X_block1 = sm.add_constant(X_block1)  # Add a constant for the intercept
y = data[outcome_variable]

model_block1 = sm.OLS(y, X_block1).fit()
print(model_block1.summary())


                            OLS Regression Results                            
Dep. Variable:                 WEAT_d   R-squared:                       0.236
Model:                            OLS   Adj. R-squared:                  0.017
Method:                 Least Squares   F-statistic:                     1.079
Date:                Tue, 12 Dec 2023   Prob (F-statistic):              0.390
Time:                        21:34:27   Log-Likelihood:                 3.2919
No. Observations:                  10   AIC:                           -0.5837
Df Residuals:                       7   BIC:                            0.3240
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                      coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------
const               1.2593      0.128     



In [51]:
# Calculate residuals for model_block1
residuals_block1 = model_block1.resid

# Semi-partial correlation for X1 controlling for X2
semi_partial_X1_X2 = residuals_block1.corr(data['first_order_es'] - data['first_order_es'].mean() - 
                                           (data['second_order_es'] - data['second_order_es'].mean()) * model_block1.params['second_order_es'])
print("Semi-partial Correlation for X1 controlling for X2:", semi_partial_X1_X2)

# Semi-partial correlation for X2 controlling for X1
semi_partial_X2_X1 = residuals_block1.corr(data['second_order_es'] - data['second_order_es'].mean() - 
                                           (data['first_order_es'] - data['first_order_es'].mean()) * model_block1.params['first_order_es'])
print("Semi-partial Correlation for X2 controlling for X1:", semi_partial_X2_X1)


Semi-partial Correlation for X1 controlling for X2: -3.707069899712588e-16
Semi-partial Correlation for X2 controlling for X1: 2.5261506401938434e-15


In [52]:

res = stats.pearsonr(data['first_order_es'], data['second_order_es'])
res

(-0.13101198011111612, 0.7182798901709236)

In [54]:
firstWEAT = stats.pearsonr(data['first_order_es'], data['WEAT_d'])

In [55]:
firstWEAT

(0.26170576394835104, 0.46514285086112794)

In [56]:
secondWEAT = stats.pearsonr(data['second_order_es'], data['WEAT_d'])

In [57]:
secondWEAT

(0.37104866303754214, 0.2911507787943368)

In [3]:
first = data['first_order_es'].drop([3,4])
second = data['second_order_es'].drop([3,4])
weat = data['WEAT_d'].drop([3,4])
iat = data['IAT_d'].drop([3,4])


In [63]:
firstIAT = stats.pearsonr(first, iat)
firstIAT

(-0.00018728112579270272, 0.9996488478973502)

In [64]:
secondIAT = stats.pearsonr(second, iat)
secondIAT

(0.22445474545027622, 0.5930687346207789)

In [4]:
stats.pearsonr(weat, iat)

(-0.13873796869963667, 0.7431851074536994)