#### Importing Packages

In [1]:
import numpy as np
import pandas as pd
import networkx as nx
from nltk.corpus import wordnet

#### Importing Data

In [3]:
df = pd.read_csv('all_records_cleaned.csv')

In [4]:
# Check the columns
df.columns

Index(['CUE', 'TARGET', 'NORMED?', '#G', '#P', 'FSG', 'BSG', 'MSG', 'OSG',
       '#M', 'MMIAS', '#O', 'OMIAS', 'QSS', 'QFR', 'QCON', 'QH', 'QPS', 'QMC',
       'QPR', 'QRSG', 'QUC', 'TSS', 'TFR', 'TCON', 'TH', 'TPS', 'TMC', 'TPR',
       'TRSG', 'TUC', 'TRF'],
      dtype='object')

In [5]:
# Check the data
df.head()

Unnamed: 0,CUE,TARGET,NORMED?,#G,#P,FSG,BSG,MSG,OSG,#M,...,TSS,TFR,TCON,TH,TPS,TMC,TPR,TRSG,TUC,TRF
0,A,B,NO,152,69,0.454,¥,¥,¥,¥,...,¥,¥,¥,,,¥,¥,¥,0.0,
1,A,ALPHABET,YES,152,10,0.066,0.046,0.002,0,2,...,11,2,¥,,N,0.5,0.25,0.062,1.0,
2,A,THE,NO,152,10,0.066,¥,¥,¥,¥,...,¥,¥,¥,,,¥,¥,¥,0.0,
3,A,GRADE,YES,152,9,0.059,0.277,0,0.0013,0,...,14,35,3.7,N,N,1,0.54,0.025,1.0,
4,A,LETTER,YES,152,6,0.039,0,0.003,0.0022,2,...,15,145,5.16,N,N,1.27,0.6,0.142,1.0,


#### Cleaning the Data

In [6]:
# Taking the required columns
df = df[['CUE','TARGET','NORMED?','#G','#P','FSG','BSG','QPS','TPS']]

- CUE Normed Word
- TARGET Response to Normed Word
- NORMED? Is Response Normed?
- #G Group size
- #P Number of Participants Producing Response
- FSG Forward Cue-to-Target Strength
- BSG Backward Target-to-Cue Strength
- QPS Cue: Part of Speech
- TPS Target: Part of Speech

In [7]:
# Filters used to clean the data
df = df[df['NORMED?']=='YES']
df['BSG'] = df['BSG'].astype(float)
df = df[df['BSG']==0]
df = df[df['#P']>2].reset_index()

#### Code for Part-of-Speech Tagging

In [9]:
# Filtering POS for CUES and TARGETS to be, nouns verbs and adjectives
exclude_list = ['PP','AV','ADV','P','I','C','AD']
df1 = df[~df['QPS'].str.contains('|'.join(exclude_list))]
df1['TPS'] = df1['TPS'].dropna()
df1 = df1[df1['TPS']!=' ']
df1 = df1[df1['TPS']!='']

- N: Noun
- V: Verb
- AJ: Adjective
- AD: Adverb
- P: Pronoun
- PP: Preposition
- I: Interjection
- C: Conjunction

In [10]:
# Getting count for each Part-of-Speech
df1['TPS'].value_counts()

N      16739
AJ      4321
V       4047
AD       425
P         75
ADJ       71
PP        55
I         33
A          6
INT        4
AV         3
PRP        2
C          2
Name: TPS, dtype: int64

In [11]:
# Dropping null values
df1 = df1.dropna()

In [12]:
include_list = ['N','V','AJ','AD','P','PP','I','C']
df1 = df1[df1['TPS'].str.contains('|'.join(include_list))]

In [13]:
# Getting the edge weights using FSG

df_group = df1.groupby(['QPS','TPS']).mean()
df_group = df_group.reset_index()

d_weights = {}
for row in df_group.itertuples():
    d_weights[(row.QPS,row.TPS)]=row.FSG

In [14]:
# Creating the Part-of-Speech network

lst = ['N','AJ','V','AD','P','PP','I','C']
G = nx.Graph()
for n in lst:
    G.add_node(n)
for row in df1.itertuples():
    if row.TPS in ['N','AJ','V','AD','P','PP','I','C']: 
        G.add_edge(row.QPS,row.TPS,weight = d_weights[(row.QPS,row.TPS)])

In [None]:
# Export the Part-of-Speech network
nx.write_gml(G,'POS.gml')

#### Code for Vowels Association

In [16]:
# Getting the required data for vowels

exclude_list = ['PP','AV','ADV','P','I','C']
df2 = df[~df['QPS'].str.contains('|'.join(exclude_list))]
df2['TPS'] = df2['TPS'].dropna()
df2 = df2[df2['TPS']!=' ']


df_vowels = df2[(df2['CUE'].str.startswith('A')) |
               (df2['CUE'].str.startswith('E')) | 
               (df2['CUE'].str.startswith('I')) |
               (df2['CUE'].str.startswith('O')) | 
               (df2['CUE'].str.startswith('U'))]   

In [17]:
# Creating the Vowels Association network

ls_vowels = list(set(list(df_vowels['CUE'])+list(df_vowels['TARGET'])))
G = nx.Graph()
for n in ls_vowels:
    G.add_node(n)
for row in df_vowels.itertuples():
    G.add_edge(row.CUE,row.TARGET)
    
vowels = df_vowels.groupby(['CUE', 'TARGET'])['FSG'].mean()
vowel = {}
for i,j in zip(vowels.index,vowels.values):
    vowel[i] = {'weight':j}
    
nx.set_edge_attributes(G,vowel)

In [None]:
# Export the Vowels Association network
nx.write_gml(G,'cue_target_vowels.gml')

#### Code for Sub-Topic and Topic Association

In [18]:
cue_hyp1 = []
cue_hyp2 = []
tar_hyp1 = []
tar_hyp2 = []

for i in range(len(df)):
    # Getting the sub-topics and topics for the CUE words
    try:
        My_sysn = wordnet.synsets(df.loc[i,'CUE'])[0]
        x = My_sysn.hypernyms()[0].name()
        cue_hyp1.append(x.split('.')[0])
        my_sysn2 = wordnet.synsets(x.split('.')[0])[0]
        try:
            y = my_sysn2.hypernyms()[0].name()
            cue_hyp2.append(y.split('.')[0])
        except:
            cue_hyp2.append('NA')
    except:
        cue_hyp1.append('NA')
        cue_hyp2.append('NA')
    
    # Getting the sub-topics and topics for the TARGET words
    try:
        My_sysn = wordnet.synsets(df.loc[i,'TARGET'])[0]
        x = My_sysn.hypernyms()[0].name()
        tar_hyp1.append(x.split('.')[0])
        my_sysn2 = wordnet.synsets(x.split('.')[0])[0]
        try:
            y = my_sysn2.hypernyms()[0].name()
            tar_hyp2.append(y.split('.')[0])
        except:
            tar_hyp2.append('NA')
    except:
        tar_hyp1.append('NA')
        tar_hyp2.append('NA')

In [19]:
# Appending the required data and filtering them

df3 = df.copy()
df3['cue_hyp1'] = cue_hyp1
df3['cue_hyp2'] = cue_hyp2
df3['tar_hyp1'] = tar_hyp1
df3['tar_hyp2'] = tar_hyp2
df3 = df3[['CUE','TARGET','cue_hyp1','cue_hyp2','tar_hyp1','tar_hyp2','FSG']]

In [20]:
# Checking the required data
df3

Unnamed: 0,CUE,TARGET,cue_hyp1,cue_hyp2,tar_hyp1,tar_hyp2,FSG
0,A,LETTER,metric_linear_unit,linear_unit,document,writing,0.039
1,A,PLUS,metric_linear_unit,linear_unit,quality,attribute,0.033
2,AARDVARK,ANIMAL,placental,mammal,organism,living_thing,0.322
3,AARDVARK,ANT,placental,mammal,hymenopterous_insect,insect,0.197
4,AARDVARK,MUSIC,placental,mammal,auditory_communication,communication,0.020
...,...,...,...,...,...,...,...
26100,ZUCCHINI,VEGETABLE,marrow,connective_tissue,produce,food,0.331
26101,ZUCCHINI,GREEN,marrow,connective_tissue,chromatic_color,color,0.149
26102,ZUCCHINI,FOOD,marrow,connective_tissue,substance,matter,0.088
26103,ZUCCHINI,BROCCOLI,marrow,connective_tissue,crucifer,herb,0.034


In [21]:
# Creating the Sub-Topic network

x = []
x.extend(df3.tar_hyp1)
x.extend(df3.cue_hyp1)
x = set(x)

# Adding the nodes
G = nx.Graph()
for i in x:
    if i=='NA':
        continue
    G.add_node(i)
    
# Adding the edges
for i,j in zip(df3['cue_hyp1'],df3['tar_hyp1']):
    if i=='NA' or j=='NA':
        continue
    G.add_edge(i,j)
    
# Getting the edge weights
df4 = df3[df3['cue_hyp2']!='NA']
df4 = df4[df4['tar_hyp2']!='NA']
hyp1 = df4.groupby(['cue_hyp1','tar_hyp1'])['FSG'].mean()

first_hyp = {}
for i,j in zip(hyp1.index,hyp1.values):
    first_hyp[i] = {'weight':j}
    
nx.set_edge_attributes(G,first_hyp)

In [None]:
# Export the Sub-Topic network
nx.write_gml(G,'first_hyp.gml')

In [22]:
# Creating the Topic network

x = []
x.extend(df3.tar_hyp2)
x.extend(df3.cue_hyp2)
x = set(x)

# Adding the nodes
G = nx.Graph()
for i in x:
    if i=='NA':
        continue
    G.add_node(i)
    
# Adding the edges
for i,j in zip(df3['cue_hyp2'],df3['tar_hyp2']):
    if i=='NA' or j=='NA':
        continue
    G.add_edge(i,j)
    
# Getting the edge weights
df5 = df3[df3['cue_hyp2']!='NA']
df5 = df5[df5['tar_hyp2']!='NA']
hyp2 = df5.groupby(['cue_hyp1','tar_hyp1'])['FSG'].mean()

second_hyp = {}
for i,j in zip(hyp2.index,hyp2.values):
    second_hyp[i] = {'weight':j}
    
nx.set_edge_attributes(G,second_hyp)

In [None]:
# Export the Topic network
nx.write_gml(G,'second_hyp.gml')