# Commodity Neighbors

We want to create nodes for geographical location, people, and commodities. In this notebook, I aim to add a neighbors attribute for commodities mentioned in particular texts. I'll look into the original filtered dataset to extract the commodities and respective neighbors. The code will be very similar to how neighbors were created for each proper name. 

What differs from the neighbors of commodities versus the neighbors of proper names is that commodity neighbors are particular people and geographical names, but since this dataset only contains the people, we'll take neighbors as just people. 

Best precaution is to cross check with the proper name neighbors to make sure that both neighbor sets contain one another. This would help in creating an edge between the proper name and the commodity. 

We will take as many lines necessary to find the neighbors, and will stop when the next commodity is hit, because if a line contains a commodity, it will be the first word of that line. 

In [1]:
import pandas as pd

In [2]:
commodities_Pnum = pd.read_csv("Downloads/commodities_CSV.csv")
edgelist = pd.read_csv("Downloads/fa19_edgelist3.csv")

  interactivity=interactivity, compiler=compiler, result=result)


In [3]:
commodities_Pnum = commodities_Pnum.set_index('Unnamed: 0')

In [4]:
commodities_Pnum

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,517,518,519,520,521,522,523,524,525,526
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
P124149,3(diš)[]NU,udu[sheep]N,u[ewe]N,mašgal[goat]N,,,,,,,...,,,,,,,,,,
P405489,1(diš)[]NU,gud[ox]N,udu[sheep]N,mašgal[goat]N,uzud[goat]N,sila[lamb]N,sila[lamb]N,maš[goat]N,gud[ox]N,udu[sheep]N,...,,,,,,,,,,
P332036,gud[ox]N,gud[ox]N,udu[sheep]N,udu[sheep]N,udu[sheep]N,sila[lamb]N,gud[ox]N,udu[sheep]N,mašgal[goat]N,gud[ox]N,...,,,,,,,,,,
P320496,x[NA]NA,x[NA]NA,udu[sheep]N,mašgal[goat]N,ud[sun]N,2(diš)[]NU,udu[sheep]N,udu[sheep]N,mašgal[goat]N,ud[sun]N,...,,,,,,,,,,
P124307,sila[lamb]N,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
P201038,4(geš₂)[]NU,sila[unit]N,mu[name]N,,,,,,,,...,,,,,,,,,,
P124203,udu[sheep]N,u[ewe]N,mašgal[goat]N,uzud[goat]N,,,,,,,...,,,,,,,,,,
P210433,x[NA]NA,ašgar[kid]N,ŋiri[foot]N,,,,,,,,...,,,,,,,,,,
P142905,udu[sheep]N,sila[lamb]N,maš[goat]N,ašgar[kid]N,,,,,,,...,,,,,,,,,,


In [2]:
filtered_1 = pd.read_csv("Downloads/filtered_1_with_neighbors_normalized.csv")

In [3]:
filtered_1['P_Number'] = [filtered_1.loc[i]['id_text'][-7:] for i in range(len(filtered_1))]

In [7]:
#commodity: word that succeeds a number
commodities = filtered_1[filtered_1['commodity?'] == "Yes"]
commodities_ids = commodities['id']
#What to do about commodity groups that do not have PNs? 

In [9]:
group_of_same_pnumber = filtered_1[filtered_1['P_Number'] == filtered_1.iloc[1]['P_Number']]
group_of_same_pnumber

Unnamed: 0,id,unnamed,lemma,normalizaton,id_text,id_line,id_word,label,prof?,role?,family?,number?,commodity?,P_Number,neighbors
0,0,0,3(diš)[]NU,,epsd2/admin/u3adm/P124149,3,P124149.3.1,o 1,No,No,No,Yes,No,P124149,[]
1,1,1,udu[sheep]N,,epsd2/admin/u3adm/P124149,3,P124149.3.2,o 1,No,No,No,No,Yes,P124149,[]
2,2,2,6(diš)[]NU,,epsd2/admin/u3adm/P124149,4,P124149.4.1,o 2,No,No,No,Yes,No,P124149,[]
3,3,3,u[ewe]N,,epsd2/admin/u3adm/P124149,4,P124149.4.2,o 2,No,No,No,No,Yes,P124149,[]
4,4,4,2(diš)[]NU,,epsd2/admin/u3adm/P124149,5,P124149.5.1,o 3,No,No,No,Yes,No,P124149,[]
5,5,5,mašgal[goat]N,,epsd2/admin/u3adm/P124149,5,P124149.5.2,o 3,No,No,No,No,Yes,P124149,[]
6,6,6,šugid[~animal]N,,epsd2/admin/u3adm/P124149,6,P124149.6.1,o 4,No,No,No,No,No,P124149,[]
7,7,7,emuhaldim[kitchen]N,,epsd2/admin/u3adm/P124149,6,P124149.6.2,o 4,No,No,No,No,No,P124149,[]
8,8,8,mu[name]N,,epsd2/admin/u3adm/P124149,7,P124149.7.1,o 5,No,No,No,No,No,P124149,[]
9,9,9,gardu[soldier]N,,epsd2/admin/u3adm/P124149,7,P124149.7.2,o 5,No,No,No,No,No,P124149,[]


In [10]:
lines_befaf_commodity = group_of_same_pnumber[((group_of_same_pnumber['id_line'] >= filtered_1.iloc[1]['id_line'] - 2) 
                                                        &(group_of_same_pnumber['id_line'] <= filtered_1.iloc[1]['id_line']))
                                                    | ((group_of_same_pnumber['id_line'] <= filtered_1.iloc[1]['id_line'] + 2)
                                                       & (group_of_same_pnumber['id_line'] >= filtered_1.iloc[1]['id_line']))]

lines_befaf_commodity


Unnamed: 0,id,unnamed,lemma,normalizaton,id_text,id_line,id_word,label,prof?,role?,family?,number?,commodity?,P_Number,neighbors
0,0,0,3(diš)[]NU,,epsd2/admin/u3adm/P124149,3,P124149.3.1,o 1,No,No,No,Yes,No,P124149,[]
1,1,1,udu[sheep]N,,epsd2/admin/u3adm/P124149,3,P124149.3.2,o 1,No,No,No,No,Yes,P124149,[]
2,2,2,6(diš)[]NU,,epsd2/admin/u3adm/P124149,4,P124149.4.1,o 2,No,No,No,Yes,No,P124149,[]
3,3,3,u[ewe]N,,epsd2/admin/u3adm/P124149,4,P124149.4.2,o 2,No,No,No,No,Yes,P124149,[]
4,4,4,2(diš)[]NU,,epsd2/admin/u3adm/P124149,5,P124149.5.1,o 3,No,No,No,Yes,No,P124149,[]
5,5,5,mašgal[goat]N,,epsd2/admin/u3adm/P124149,5,P124149.5.2,o 3,No,No,No,No,Yes,P124149,[]


In [35]:
for i in range(len(lines_befaf_commodity)):
    neighbors = []
    if  (lines_befaf_commodity.loc[i]['lemma'][-2:] == 'PN') | (lines_befaf_commodity.loc[i]['lemma'][-2:] == 'GN'):
        neighbors.append(lines_befaf_commodity.loc[i]['lemma'])
same = lines_befaf_commodity[lines_befaf_commodity['id_line'] == filtered_1.iloc[1]['id_line']]
neighbors += same['lemma'].tolist()
neighbors


['3(diš)[]NU', 'udu[sheep]N']

In [14]:
#get the words in the same text
#use a line of text as an attribute , in case there's no PN as a neighbor , that could include other commodities as well
def neighbors_for_comm(filtered, commodity_id_list):
    list_of_neighbors = []
    for com_id in commodity_id_list:
        group_of_same_pnumber = filtered[filtered['P_Number'] == filtered.iloc[com_id]['P_Number']]
    #list_of_pnumb_groups.append(group_of_same_pnumber)
        lines_befaf_commodity = group_of_same_pnumber[((group_of_same_pnumber['id_line'] >= filtered.iloc[com_id]['id_line'] - 2) 
                                                        &(group_of_same_pnumber['id_line'] <= filtered.iloc[com_id]['id_line']))
                                                    | ((group_of_same_pnumber['id_line'] <= filtered.iloc[com_id]['id_line'] + 2)
                                                       & (group_of_same_pnumber['id_line'] >= filtered.iloc[com_id]['id_line']))]
    #If group doesn't contain PN or GN: just take neighbors to be the same line
    #Otherwise make them the individual neighbors
        for i in range(len(lines_befaf_commodity)):
            neighbors = []
            if  (lines_befaf_commodity.iloc[i]['lemma'][-2:] == 'PN') | (lines_befaf_commodity.iloc[i]['lemma'][-2:] == 'GN'):
                neighbors.append(lines_befaf_commodity.iloc[i]['lemma'])
        same = lines_befaf_commodity[lines_befaf_commodity['id_line'] == filtered.iloc[com_id]['id_line']]
        neighbors += same['lemma'].tolist()
        list_of_neighbors.append(neighbors)
    return list_of_neighbors
            
    

Applying above function to all the 10 filtered files (result of function would be adding new column with commodity neighbors:

In [None]:
filtered_files = ["Downloads/filtered_1_with_neighbors_normalized.csv", 
                 "Downloads/filtered_2_with_neighbors_normalized.csv", 
                 "Downloads/filtered_3_with_neighbors_normalized.csv", 
                 "Downloads/filtered_4_with_neighbors_normalized.csv", 
                 "Downloads/filtered_5_with_neighbors_normalized.csv", 
                 "Downloads/filtered_6_with_neighbors_normalized.csv", 
                  "Downloads/filtered_7_with_neighbors_normalized.csv", 
                 "Downloads/filtered_8_with_neighbors_normalized.csv", 
                 "Downloads/filtered_9_with_neighbors_normalized.csv", 
                 "Downloads/filtered_10_with_neighbors_normalized.csv"]
for j in range(len(filtered_files)):
    filtered = pd.read_csv(filtered_files[j])
    filtered['P_Number'] = [filtered.loc[i]['id_text'][-7:] for i in range(len(filtered))]
    commodities = filtered[filtered['commodity?'] == "Yes"]
    commodities_id_list = commodities['id'].to_list()
    
    list_comm = neighbors_for_comm(filtered, commodity_id_list)
    
    list_comm_copy = list_comm
    commodity_neighbor_column = [0] * len(filtered)
    for i in range(len(commodity_neighbor_column)):
        if i in commodity_id_list:
            commodity_neighbor_column[i] = list_comm_copy[0]
            list_comm_copy = list_comm_copy[1:]
        else:
            commodity_neighbor_column[i] = []
        
    filtered['commodity_neighbors'] = commodity_neighbor_column
    name_file = "filtered_" + str(j+1) + "_with_comm/PN_neighbors_normalized.csv"
    filtered.to_csv(name_file)

In [16]:
list_comm = neighbors_for_comm(list_of_neighbors, commodity_id_list)
list_comm

[['3(diš)[]NU', 'udu[sheep]N'],
 ['6(diš)[]NU', 'u[ewe]N'],
 ['2(diš)[]NU', 'mašgal[goat]N'],
 ['ud[sun]N', '1(u)[]NU', '5(diš@t)-kam[]NU'],
 ['ki[place]N', 'du₁₁-ga-ta[NA]NA'],
 ['1(u)[]NU', '1(diš)[]NU'],
 ['1(diš)[]NU', 'gud[ox]N', 'niga[fattened]V/i'],
 ['1(diš)[]NU', 'gud[ox]N', 'niga[fattened]V/i'],
 ['1(diš)[]NU', 'udu[sheep]N'],
 ['1(diš)[]NU', 'mašgal[goat]N'],
 ['1(diš)[]NU', 'uzud[goat]N'],
 ['5(diš)[]NU', 'sila[lamb]N'],
 ['1(u)[]NU', '2(diš)[]NU', 'sila[lamb]N', 'ga[milk]N'],
 ['1(u)[]NU', '2(diš)[]NU', 'sila[lamb]N', 'ga[milk]N'],
 ['{d}šul-gi-a-a-mu-ta[]PN', '5(diš)[]NU', 'maš[goat]N', 'ga[milk]N'],
 ['{d}šul-gi-iri-mu[]PN',
  'uš[die]V/i',
  'ud[sun]N',
  '1(u)[]NU',
  'lal[small]V/i',
  '1(diš)-kam[]NU'],
 ['ki[place]N', '{d}šul-gi-a-a-mu-ta[]PN'],
 ['1(diš)[]NU', 'gud[ox]N', '2(u)[]NU', '5(diš)[]NU', 'udu[sheep]N'],
 ['1(diš)[]NU', 'gud[ox]N', '2(u)[]NU', '5(diš)[]NU', 'udu[sheep]N'],
 ['1(diš)[]NU', 'gud[ox]N', '2(u)[]NU', '5(diš)[]NU', 'udu[sheep]N'],
 ['1(diš)[]NU'

In [25]:
list_comm_copy = list_comm
commodity_neighbor_column = [0] * len(filtered_1)
for i in range(len(commodity_neighbor_column)):
    if i in commodity_id_list:
        commodity_neighbor_column[i] = list_comm_copy[0]
        list_comm_copy = list_comm_copy[1:]
    else:
        commodity_neighbor_column[i] = []
        

In [27]:
#we have running list of geogrpahic names
#for commodities, if we don't find the PN in the group- take the line as the neighbor
filtered_1['commodity_neighbors'] = commodity_neighbor_column
filtered_1

Unnamed: 0,id,unnamed,lemma,normalizaton,id_text,id_line,id_word,label,prof?,role?,family?,number?,commodity?,P_Number,neighbors,commodity_neighbors
0,0,0,3(diš)[]NU,,epsd2/admin/u3adm/P124149,3,P124149.3.1,o 1,No,No,No,Yes,No,P124149,[],[]
1,1,1,udu[sheep]N,,epsd2/admin/u3adm/P124149,3,P124149.3.2,o 1,No,No,No,No,Yes,P124149,[],"[3(diš)[]NU, udu[sheep]N]"
2,2,2,6(diš)[]NU,,epsd2/admin/u3adm/P124149,4,P124149.4.1,o 2,No,No,No,Yes,No,P124149,[],[]
3,3,3,u[ewe]N,,epsd2/admin/u3adm/P124149,4,P124149.4.2,o 2,No,No,No,No,Yes,P124149,[],"[6(diš)[]NU, u[ewe]N]"
4,4,4,2(diš)[]NU,,epsd2/admin/u3adm/P124149,5,P124149.5.1,o 3,No,No,No,Yes,No,P124149,[],[]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
55276,55276,341667,1(diš)[]NU,,epsd2/admin/u3adm/P126168,10,P126168.10.1,o 8,No,No,No,Yes,No,P126168,[],[]
55277,55277,341668,sila[lamb]N,,epsd2/admin/u3adm/P126168,10,P126168.10.2,o 8,No,No,No,No,Yes,P126168,[],"[1(diš)[]NU, sila[lamb]N, tu-ra-am-i₃-li₂[]PN]"
55278,55278,341669,tu-ra-am-i₃-li₂[]PN,Turamili[]PN,epsd2/admin/u3adm/P126168,10,P126168.10.3,o 8,No,No,No,No,No,P126168,"['1(diš)[]NU', 'sila[lamb]N', 'tu-ra-am-i₃-li₂...",[]
55279,55279,341670,2(diš)[]NU,,epsd2/admin/u3adm/P126168,11,P126168.11.1,o 9,No,No,No,Yes,No,P126168,[],[]


In [None]:
def n_neighbors_commodities(data, number_of_lines):
    big_list = [[] for i in range(len(data))]
    PN = data[data['lemma'].str.contains("PN")]
    pn_indices = PN.index.values.tolist()
    for i in pn_indices:
        #get rows of data to acquire the neighbors
        group_of_same_pnumber = data[data['P_Number'] == data.iloc[i]['P_Number']]
        group_of_n_lines_befaf = group_of_same_pnumber[((group_of_same_pnumber['id_line'] >= data.iloc[i]['id_line'] - number_of_lines) 
                                                        &(group_of_same_pnumber['id_line'] <= data.iloc[i]['id_line']))
                                                    | ((group_of_same_pnumber['id_line'] <= data.iloc[i]['id_line'] + number_of_lines)
                                                       & (group_of_same_pnumber['id_line'] >= data.iloc[i]['id_line']))]
        lemma_neighbors = group_of_n_lines_befaf['lemma'].values.tolist()
        if 'break' in lemma_neighbors:
            lemma_neighbors.remove('break')
        big_list[i] = lemma_neighbors
    return big_list

In [None]:
#Get neighbors from same text instead of 2 lines - get the PNs associated, GN
