# import libraries

In [27]:
import pandas as pd

# Step 0. import .csv file of cleaned Zotero library

In [85]:
dat = pd.read_csv("../0 data_zotero/zotero_NKelites_csv.csv")

In [86]:
# rename column names - remove spaces between words in each column label
newcolumns = {item:item.replace(" ","") for item in dat.columns}
dat = dat.rename(columns = newcolumns)

In [87]:
dat

Unnamed: 0,Key,ItemType,PublicationYear,Author,Title,PublicationTitle,ISBN,ISSN,DOI,Url,...,ProgrammingLanguage,Version,System,Code,CodeNumber,Section,Session,Committee,History,LegislativeBody
0,B9T9PACU,document,2022.0,,tag donkey - phantom document importing tags f...,,,,,,...,,3,,,,,,,,
1,8SJ7MB3J,journalArticle,2018.0,"Song, Wonjun; Wright, Joseph",THE NORTH KOREAN AUTOCRACY IN COMPARATIVE PERS...,Journal of East Asian Studies,,"1598-2408, 2234-6643",10.1017/jea.2018.8,https://www.cambridge.org/core/product/identif...,...,,4,,,,,,,,
2,28TJ2V6S,journalArticle,1994.0,"Ahn, Byung-joon",The Man Who Would Be Kim,Foreign Affairs,,0015-7120,10.2307/20046931,https://www.jstor.org/stable/20046931,...,,190,,,,,,,,
3,HTL8C5WY,journalArticle,2019.0,"Matherly, Carter",Examining Attitude Functions of North Korean C...,North Korean Review,,1551-2789,,https://www.jstor.org/stable/26632424,...,,246,,,,,,,,
4,GH5Z8P27,journalArticle,2004.0,"De Ceuster, Koen","Review of The North Korean Revolution, 1945–1950",Journal of East Asian Studies,,1598-2408,,https://www.jstor.org/stable/23417953,...,,190,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
406,7QUEQL64,book,2001.0,"Berkeley, Bill","The Graves Are Not Yet Full Race, Tribe And Po...",,978-0-465-00641-0,,,,...,,1094,,,,,,,,
407,FKELK72D,book,2013.0,"Armstrong, Charles K.",Tyranny of the Weak: North Korea and the World...,,978-0-8014-6894-0,,,,...,,1093,,,,,,,,
408,NAY7UPVP,book,2013.0,"Armstrong, Charles K.","The North Korean Revolution, 1945–1950",,978-0-8014-6879-7,,,,...,,1092,,,,,,,,
409,ZDWVXHK7,book,2013.0,"Kim, Suzy","Everyday Life in the North Korean Revolution, ...",,978-0-8014-6935-0,,,,...,,1091,,,,,,,,


# Step 1. create "raw" edgelist (using strings)

In [144]:
edgelist = dat.loc[(dat.ItemType=="journalArticle") & (dat.Author.notnull()) & (dat.PublicationTitle.notnull()),["PublicationYear","Author","PublicationTitle"]]

In [145]:
# remove "The" from Publication Titles
edgelist["PublicationTitle"] = edgelist["PublicationTitle"].str.replace("The ","")

In [146]:
edgelist[edgelist["PublicationTitle"].str.contains("The")]

Unnamed: 0,PublicationYear,Author,PublicationTitle
95,1981.0,"Easton, David",Political Theory


In [147]:
# check for duplicates
edgelist[edgelist.duplicated()]

Unnamed: 0,PublicationYear,Author,PublicationTitle
66,1978.0,"Kihl, Young Whan",American Political Science Review
72,2004.0,"De Ceuster, Koen",Journal of East Asian Studies
214,2010.0,"Byman, Daniel; Lind, Jennifer",International Security
275,2008.0,"McEachern, Patrick; Boyd, Molly",Journal of Asian Studies


In [148]:
# drop duplicates and confirm
edgelist.drop_duplicates(inplace=True)
edgelist[edgelist.duplicated()]

Unnamed: 0,PublicationYear,Author,PublicationTitle


In [149]:
edgelist

Unnamed: 0,PublicationYear,Author,PublicationTitle
1,2018.0,"Song, Wonjun; Wright, Joseph",Journal of East Asian Studies
2,1994.0,"Ahn, Byung-joon",Foreign Affairs
3,2019.0,"Matherly, Carter",North Korean Review
4,2004.0,"De Ceuster, Koen",Journal of East Asian Studies
8,2010.0,"Nincic, Miroslav",International Security
...,...,...,...
318,2020.0,"Bünte, Marco",Democratization
320,2017.0,"Dukalskis, Alexander; Gerschewski, Johannes",Contemporary Politics
328,2014.0,"Geddes, Barbara; Wright, Joseph; Frantz, Erica",Perspectives on Politics
329,2013.0,"Gerschewski, Johannes",Democratization


In [150]:
# check number of unique authors
len(edgelist.Author.unique())

113

In [151]:
# check number of unique PublicationTitles
len(edgelist.PublicationTitle.unique())

69

# Step 2. make "R-ready" nodelists

#### create node lists (e.g. author, journal). index nodes. rename strings as labels
#### add node attributes to each nodelist
#### concatenate the two node lists (e.g. author, journal) into a single "R-ready" nodelist

In [152]:
edgelist

Unnamed: 0,PublicationYear,Author,PublicationTitle
1,2018.0,"Song, Wonjun; Wright, Joseph",Journal of East Asian Studies
2,1994.0,"Ahn, Byung-joon",Foreign Affairs
3,2019.0,"Matherly, Carter",North Korean Review
4,2004.0,"De Ceuster, Koen",Journal of East Asian Studies
8,2010.0,"Nincic, Miroslav",International Security
...,...,...,...
318,2020.0,"Bünte, Marco",Democratization
320,2017.0,"Dukalskis, Alexander; Gerschewski, Johannes",Contemporary Politics
328,2014.0,"Geddes, Barbara; Wright, Joseph; Frantz, Erica",Perspectives on Politics
329,2013.0,"Gerschewski, Johannes",Democratization


#### still Step 2. create node lists (e.g. author, journal). index nodes. rename strings as labels

In [153]:
authors = edgelist.Author.drop_duplicates().to_frame().sort_values("Author").reset_index().reset_index()
authors = authors.drop(columns="index")
authors = authors.rename(columns={"level_0":"NodeIndex","Author":"NodeLabel"})
authors

Unnamed: 0,NodeIndex,NodeLabel
0,0,"Ahn, Byung-joon"
1,1,"Albright, Madeleine K."
2,2,"Andersson, Magnus; Bae, Jinsun"
3,3,"BARANY, ZOLTAN D."
4,4,"Bachman, David"
...,...,...
108,108,"Yee, Samuel"
109,109,"Yoon, Dae-Kyu; Yang, Moon-Soo"
110,110,"Young, Roland"
111,111,"Ziegler, Charles E."


In [154]:
authors["NodeType"]="Author"
authors["NodeShape"]="Circle"
authors["NodeSize"]=1
authors["NodeColor"]="Light Blue"

In [155]:
max(authors["NodeIndex"])

112

In [156]:
journals = edgelist.PublicationTitle.drop_duplicates().to_frame().sort_values("PublicationTitle").reset_index().reset_index()
journals = journals.drop(columns="index")
journals["level_0"] = journals["level_0"]+max(authors["NodeIndex"])+1
journals = journals.rename(columns={"level_0":"NodeIndex","PublicationTitle":"NodeLabel"})
journals

Unnamed: 0,NodeIndex,NodeLabel
0,113,ANNALS of the American Academy of Political an...
1,114,Academy of Management Journal
2,115,Acta Sociologica
3,116,American Historical Review
4,117,American Journal of Sociology
...,...,...
64,177,Studies in Comparative Communism
65,178,Systems Research and Behavioral Science
66,179,Systems practice
67,180,Third World Quarterly


In [157]:
journals["NodeType"]="Journal"
journals["NodeShape"]="Square"
journals["NodeSize"]=1
journals["NodeColor"]="White"

#### still Step 2. add node attributes to each node list

In [161]:
journals

Unnamed: 0,NodeIndex,NodeLabel,NodeType,NodeShape,NodeSize,NodeColor
0,113,ANNALS of the American Academy of Political an...,Journal,Square,1,White
1,114,Academy of Management Journal,Journal,Square,1,White
2,115,Acta Sociologica,Journal,Square,1,White
3,116,American Historical Review,Journal,Square,1,White
4,117,American Journal of Sociology,Journal,Square,1,White
...,...,...,...,...,...,...
64,177,Studies in Comparative Communism,Journal,Square,1,White
65,178,Systems Research and Behavioral Science,Journal,Square,1,White
66,179,Systems practice,Journal,Square,1,White
67,180,Third World Quarterly,Journal,Square,1,White


In [162]:
journals.to_csv("../2.1 data_node attributes/journal nodelist_add attributes.csv",index=False)

In [132]:
# open .csv file and add new column for node attributes

In [163]:
# import .csv file with sociology journals
sociology = pd.read_csv("../2.1 data_node attributes/sociology journals.csv")

In [164]:
sociology["IsSociologyJournal"]=1

In [165]:
sociology

Unnamed: 0,NodeLabel,IsSociologyJournal
0,Acta Sociologica,1
1,American Journal of Sociology,1
2,American Sociological Review,1
3,ANNALS of the American Academy of Political an...,1
4,Annual Review of Sociology,1
5,British Journal of Sociology,1
6,British Journal of Sociology of Education,1
7,Contemporary Sociology,1
8,Critical Sociology,1
9,Cultural Sociology,1


In [166]:
journals = journals.merge(sociology,on="NodeLabel",how="left")

In [168]:
#journals.loc[journals["IsSociologyJournal"].isnull(),"IsSociologyJournal"]=0
journals.loc[journals["IsSociologyJournal"]==1,"NodeColor"]="Red"

In [171]:
journals

Unnamed: 0,NodeIndex,NodeLabel,NodeType,NodeShape,NodeSize,NodeColor,IsSociologyJournal
0,113,ANNALS of the American Academy of Political an...,Journal,Square,1,Red,1.0
1,114,Academy of Management Journal,Journal,Square,1,White,
2,115,Acta Sociologica,Journal,Square,1,Red,1.0
3,116,American Historical Review,Journal,Square,1,White,
4,117,American Journal of Sociology,Journal,Square,1,Red,1.0
...,...,...,...,...,...,...,...
64,177,Studies in Comparative Communism,Journal,Square,1,White,
65,178,Systems Research and Behavioral Science,Journal,Square,1,White,
66,179,Systems practice,Journal,Square,1,White,
67,180,Third World Quarterly,Journal,Square,1,White,


In [173]:
journals.groupby("NodeColor").count()

Unnamed: 0_level_0,NodeIndex,NodeLabel,NodeType,NodeShape,NodeSize,IsSociologyJournal
NodeColor,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Red,8,8,8,8,8,8
White,61,61,61,61,61,0


In [174]:
authors

Unnamed: 0,NodeIndex,NodeLabel,NodeType,NodeShape,NodeSize,NodeColor
0,0,"Ahn, Byung-joon",Author,Circle,1,Light Blue
1,1,"Albright, Madeleine K.",Author,Circle,1,Light Blue
2,2,"Andersson, Magnus; Bae, Jinsun",Author,Circle,1,Light Blue
3,3,"BARANY, ZOLTAN D.",Author,Circle,1,Light Blue
4,4,"Bachman, David",Author,Circle,1,Light Blue
...,...,...,...,...,...,...
108,108,"Yee, Samuel",Author,Circle,1,Light Blue
109,109,"Yoon, Dae-Kyu; Yang, Moon-Soo",Author,Circle,1,Light Blue
110,110,"Young, Roland",Author,Circle,1,Light Blue
111,111,"Ziegler, Charles E.",Author,Circle,1,Light Blue


In [175]:
# export node dataframes to data_network
journals.to_csv("../4 data_edgelist and nodelists/nkelites_nodelist_journals.csv",index=False)
authors.to_csv("../4 data_edgelist and nodelists/nkelites_nodelist_authors.csv",index=False)

#### still Step 2. concatenate the two nodelists into an "R-ready" nodelist

In [143]:
# construct nodelist
nodelist = pd.DataFrame(columns=["NodeID","NodeLabel","NodeType","NodeShape","NodeSize","NodeColor"])

In [178]:
# drop IsSociologyJournal column from journals
journals.drop(columns=["IsSociologyJournal"],inplace=True)
journals

Unnamed: 0,NodeIndex,NodeLabel,NodeType,NodeShape,NodeSize,NodeColor
0,113,ANNALS of the American Academy of Political an...,Journal,Square,1,Red
1,114,Academy of Management Journal,Journal,Square,1,White
2,115,Acta Sociologica,Journal,Square,1,Red
3,116,American Historical Review,Journal,Square,1,White
4,117,American Journal of Sociology,Journal,Square,1,Red
...,...,...,...,...,...,...
64,177,Studies in Comparative Communism,Journal,Square,1,White
65,178,Systems Research and Behavioral Science,Journal,Square,1,White
66,179,Systems practice,Journal,Square,1,White
67,180,Third World Quarterly,Journal,Square,1,White


In [179]:
# concatenate author and journal nodelists into a single nodelist
nodelist = pd.concat([authors,journals])

In [180]:
nodelist

Unnamed: 0,NodeIndex,NodeLabel,NodeType,NodeShape,NodeSize,NodeColor
0,0,"Ahn, Byung-joon",Author,Circle,1,Light Blue
1,1,"Albright, Madeleine K.",Author,Circle,1,Light Blue
2,2,"Andersson, Magnus; Bae, Jinsun",Author,Circle,1,Light Blue
3,3,"BARANY, ZOLTAN D.",Author,Circle,1,Light Blue
4,4,"Bachman, David",Author,Circle,1,Light Blue
...,...,...,...,...,...,...
64,177,Studies in Comparative Communism,Journal,Square,1,White
65,178,Systems Research and Behavioral Science,Journal,Square,1,White
66,179,Systems practice,Journal,Square,1,White
67,180,Third World Quarterly,Journal,Square,1,White


#### still Step 2. export "R-ready" nodelist

In [181]:
nodelist.to_csv("../4 data_edgelist and nodelists/nkelites_nodelist_combined.csv",index=False)

# Step 3. make "R-ready" edgelist

#### create edge attributes
#### merge node attributes from node lists into edgelists, including Node Index
#### only select the variables relevant to edges. remove node attributes from the edgelist

#### create edge attributes

In [158]:
# create edge attributes - edgeweight
edgelist = edgelist.groupby(["Author","PublicationTitle"],as_index=False).agg(edgeweight=("PublicationYear","count"))
edgelist

Unnamed: 0,Author,PublicationTitle,edgeweight
0,"Ahn, Byung-joon",Foreign Affairs,1
1,"Albright, Madeleine K.",Foreign Policy,1
2,"Andersson, Magnus; Bae, Jinsun",North Korean Review,1
3,"BARANY, ZOLTAN D.",Journal of Political & Military Sociology,1
4,"Bachman, David",Journal of Asian Studies,1
...,...,...,...
112,"Yee, Samuel",Berkeley Undergraduate Journal,1
113,"Yoon, Dae-Kyu; Yang, Moon-Soo",Asian Perspective,1
114,"Young, Roland",ANNALS of the American Academy of Political an...,1
115,"Ziegler, Charles E.",British Journal of Political Science,1


In [None]:
# confirm values of edgeweights
edgelist.edgeweight.unique()

In [183]:
edgelist = edgelist.rename(columns={"Author":"AuthorLabel","PublicationTitle":"JournalLabel"})

#### merge node attributes from node lists into edgelists, including Node Index

In [186]:
edgelist = edgelist.merge(journals,how="inner",left_on="JournalLabel",right_on="NodeLabel")
edgelist

Unnamed: 0,AuthorLabel,JournalLabel,edgeweight,NodeIndex,NodeLabel,NodeType,NodeShape,NodeSize,NodeColor
0,"Ahn, Byung-joon",Foreign Affairs,1,138,Foreign Affairs,Journal,Square,1,White
1,"Harrison, Selig S.",Foreign Affairs,1,138,Foreign Affairs,Journal,Square,1,White
2,"Reiss, Mitchell B.; Gallucci, Robert L.",Foreign Affairs,1,138,Foreign Affairs,Journal,Square,1,White
3,"Stoessinger, John G.",Foreign Affairs,1,138,Foreign Affairs,Journal,Square,1,White
4,"Albright, Madeleine K.",Foreign Policy,1,139,Foreign Policy,Journal,Square,1,White
...,...,...,...,...,...,...,...,...,...
112,"Vittoz, Stanley",International Journal of Politics Culture and ...,1,143,International Journal of Politics Culture and ...,Journal,Square,1,White
113,"Von Bertalanffy, Ludwig",Academy of Management Journal,1,114,Academy of Management Journal,Journal,Square,1,White
114,"Yee, Samuel",Berkeley Undergraduate Journal,1,125,Berkeley Undergraduate Journal,Journal,Square,1,White
115,"Young, Roland",ANNALS of the American Academy of Political an...,1,113,ANNALS of the American Academy of Political an...,Journal,Square,1,Red


In [187]:
edgelist = edgelist.merge(authors,how="inner",left_on="AuthorLabel",right_on="NodeLabel")

Unnamed: 0,AuthorLabel,JournalLabel,edgeweight,NodeIndex_x,NodeLabel_x,NodeType_x,NodeShape_x,NodeSize_x,NodeColor_x,NodeIndex_y,NodeLabel_y,NodeType_y,NodeShape_y,NodeSize_y,NodeColor_y
0,"Ahn, Byung-joon",Foreign Affairs,1,138,Foreign Affairs,Journal,Square,1,White,0,"Ahn, Byung-joon",Author,Circle,1,Light Blue
1,"Harrison, Selig S.",Foreign Affairs,1,138,Foreign Affairs,Journal,Square,1,White,41,"Harrison, Selig S.",Author,Circle,1,Light Blue
2,"Reiss, Mitchell B.; Gallucci, Robert L.",Foreign Affairs,1,138,Foreign Affairs,Journal,Square,1,White,90,"Reiss, Mitchell B.; Gallucci, Robert L.",Author,Circle,1,Light Blue
3,"Stoessinger, John G.",Foreign Affairs,1,138,Foreign Affairs,Journal,Square,1,White,99,"Stoessinger, John G.",Author,Circle,1,Light Blue
4,"Albright, Madeleine K.",Foreign Policy,1,139,Foreign Policy,Journal,Square,1,White,1,"Albright, Madeleine K.",Author,Circle,1,Light Blue
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
112,"Vittoz, Stanley",International Journal of Politics Culture and ...,1,143,International Journal of Politics Culture and ...,Journal,Square,1,White,105,"Vittoz, Stanley",Author,Circle,1,Light Blue
113,"Von Bertalanffy, Ludwig",Academy of Management Journal,1,114,Academy of Management Journal,Journal,Square,1,White,106,"Von Bertalanffy, Ludwig",Author,Circle,1,Light Blue
114,"Yee, Samuel",Berkeley Undergraduate Journal,1,125,Berkeley Undergraduate Journal,Journal,Square,1,White,108,"Yee, Samuel",Author,Circle,1,Light Blue
115,"Young, Roland",ANNALS of the American Academy of Political an...,1,113,ANNALS of the American Academy of Political an...,Journal,Square,1,Red,110,"Young, Roland",Author,Circle,1,Light Blue


#### still Step 3. only select the variables relevant to edges. exclude other node attributes from the edgelist

In [192]:
edgelist = edgelist[["NodeIndex_y","NodeIndex_x","edgeweight"]]
edgelist.rename(columns={"NodeIndex_y":"AuthorNodeID","NodeIndex_x":"JournalNodeID"},inplace=True)

In [193]:
edgelist

Unnamed: 0,AuthorNodeID,JournalNodeID,edgeweight
0,0,138,1
1,41,138,1
2,90,138,1
3,99,138,1
4,1,139,1
...,...,...,...
112,105,143,1
113,106,114,1
114,108,125,1
115,110,113,1


#### still Step 3. export "R-ready" edgelist as .csv file

In [194]:
edgelist.to_csv("../4 data_edgelist and nodelists/nkelites_edgelist_author_journal.csv",index=False)