In [30]:
import pandas as pd
import re
import requests
import numpy as np
from bs4 import BeautifulSoup, NavigableString, Tag
from pathlib import Path
from time import sleep
from collections import Counter
import string
import math

from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk import word_tokenize

In [31]:
df = pd.read_csv("pub.csv", index_col=False)

In [32]:
df.head()

Unnamed: 0,pub_link,pub_title,pub_date,auth_name,auth_link,id,auth_name_extract,text
0,https://pureportal.coventry.ac.uk/en/publicati...,A bibliometric review of the Waqf literature,Jun 2022,Rashedul Hasan,['https://pureportal.coventry.ac.uk/en/persons...,0,rashedul hasan,a bibliometric review of the waqf literature r...
1,https://pureportal.coventry.ac.uk/en/publicati...,A note on COVID-19 instigated maximum drawdown...,May 2022,Rashedul Hasan,['https://pureportal.coventry.ac.uk/en/persons...,1,rashedul hasan,a note on covid-19 instigated maximum drawdown...
2,https://pureportal.coventry.ac.uk/en/publicati...,Bank stock valuation theories do they explain ...,1 Mar 2022,Alireza Zarei,['https://pureportal.coventry.ac.uk/en/persons...,2,alireza zarei,bank stock valuation theories do they explain ...
3,https://pureportal.coventry.ac.uk/en/publicati...,CEO Duality and Firm Performance A Systematic ...,25 May 2022,Mei Yu,['https://pureportal.coventry.ac.uk/en/persons...,3,mei yu,ceo duality and firm performance a systematic ...
4,https://pureportal.coventry.ac.uk/en/publicati...,CEO Financial Experience and Firms Earnings Ma...,7 Mar 2022,"Thai Nguyen, Thang Nguyen, Panagiotis Andrikop...",['https://pureportal.coventry.ac.uk/en/persons...,4,thai nguyen thang nguyen panagiotis andrikopoulos,ceo financial experience and firms earnings ma...


In [6]:
def basic_preprocess_case(text):
    text = text.lower()
    text = text.translate(str.maketrans('', '', string.punctuation))
    return text


def del_stop_words(text):

    stop_words = stopwords.words('english')
    text1 = ""
    for word in word_tokenize(text):
        if word not in stop_words and len(word) > 1:
            text1 = text1 + " " + word

    return text1

def stemming(data):
    stemmer= PorterStemmer()
    
    tokens = word_tokenize(str(data))
    new_text = ""
    for w in tokens:
        new_text = new_text + " " + stemmer.stem(w)
    return new_text

In [53]:
def preprocess(text):
    text = basic_preprocess_case(text)
    text = del_stop_words(text)
    text = stemming(text)
    return text

In [54]:
processed_text = []

for index, item in df.iterrows():
    text = item["pub_title"] + " " + item["auth_name_extract"] + " " + item["pub_date"]
    print("merged publication title, author name and publication date is:", text)
    processed_text.append(word_tokenize(str(preprocess(text))))
    print("result tokens after preprocess is:", word_tokenize(str(preprocess(text))))

merged publication title, author name and publication date is: A bibliometric review of the Waqf literature rashedul hasan Jun 2022
result tokens after preprocess is: ['bibliometr', 'review', 'waqf', 'literatur', 'rashedul', 'hasan', 'jun', '2022']
merged publication title, author name and publication date is: A note on COVID-19 instigated maximum drawdown in Islamic markets versus conventional counterparts rashedul hasan May 2022
result tokens after preprocess is: ['note', 'covid19', 'instig', 'maximum', 'drawdown', 'islam', 'market', 'versu', 'convent', 'counterpart', 'rashedul', 'hasan', 'may', '2022']
merged publication title, author name and publication date is: Bank stock valuation theories do they explain prices based on theories? alireza zarei 1 Mar 2022
result tokens after preprocess is: ['bank', 'stock', 'valuat', 'theori', 'explain', 'price', 'base', 'theori', 'alireza', 'zarei', 'mar', '2022']
merged publication title, author name and publication date is: CEO Duality and Fi

result tokens after preprocess is: ['nexu', 'environment', 'financi', 'perform', 'evid', 'gulf', 'cooper', 'council', 'bank', 'rashedul', 'hasan', '31', 'mar', '2022']
merged publication title, author name and publication date is: The relative effectiveness of R&D tax credits and R&D subsidies A comparative Meta-Regression Analysis mehtap hisarciklilar Jul 2022
result tokens after preprocess is: ['rel', 'effect', 'rd', 'tax', 'credit', 'rd', 'subsidi', 'compar', 'metaregress', 'analysi', 'mehtap', 'hisarciklilar', 'jul', '2022']
merged publication title, author name and publication date is: The rise and fall of institutional entrepreneurship in Islamic financial reporting standardisation projects ahmad abras 10 Apr 2022
result tokens after preprocess is: ['rise', 'fall', 'institut', 'entrepreneurship', 'islam', 'financi', 'report', 'standardis', 'project', 'ahmad', 'abra', '10', 'apr', '2022']
merged publication title, author name and publication date is: Tourism And Exports The Case o

result tokens after preprocess is: ['govern', 'account', 'reform', 'subsaharan', 'african', 'countri', 'select', 'ignor', 'epistem', 'commun', 'compet', 'logic', 'perspect', 'abdurafiu', 'noah', 'jul', '2021']
merged publication title, author name and publication date is: Impact of Maoism on the urban form and structure of cities in mainland China simon huston 7 May 2021
result tokens after preprocess is: ['impact', 'maoism', 'urban', 'form', 'structur', 'citi', 'mainland', 'china', 'simon', 'huston', 'may', '2021']
merged publication title, author name and publication date is: Internal corporate governance mechanisms and financial performance evidence from the UKs top FTSE 100 listed companies ibrahim elmghaamez 2 Apr 2021
result tokens after preprocess is: ['intern', 'corpor', 'govern', 'mechan', 'financi', 'perform', 'evid', 'uk', 'top', 'ftse', '100', 'list', 'compani', 'ibrahim', 'elmghaamez', 'apr', '2021']
merged publication title, author name and publication date is: Islamic mi

result tokens after preprocess is: ['lockdown', 'test', 'help', 'curb', 'covid19', 'transmiss', 'olubunmi', 'ajala', 'may', '2020']
merged publication title, author name and publication date is: Earnings management by share-financed acquirers prior to deal announcements The roles of financial expertise tenure and reputation. thai nguyen Feb 2020
result tokens after preprocess is: ['earn', 'manag', 'sharefinanc', 'acquir', 'prior', 'deal', 'announc', 'role', 'financi', 'expertis', 'tenur', 'reput', 'thai', 'nguyen', 'feb', '2020']
merged publication title, author name and publication date is: Effects of director networks on acquiring firms earnings management prior to M&A thai nguyen thang nguyen 28 Jan 2020
result tokens after preprocess is: ['effect', 'director', 'network', 'acquir', 'firm', 'earn', 'manag', 'prior', 'thai', 'nguyen', 'thang', 'nguyen', '28', 'jan', '2020']
merged publication title, author name and publication date is: Eurasian Business Perspectives Proceedings of the

result tokens after preprocess is: ['new', 'approach', 'assess', 'csr', 'sustain', 'disclosur', 'insight', 'updat', 'webbas', 'data', 'tariq', 'al', 'montas', 'apr', '2019']
merged publication title, author name and publication date is: Political ideology and NGO accountability the case of the Syrian conflict ahmad abras 2019
result tokens after preprocess is: ['polit', 'ideolog', 'ngo', 'account', 'case', 'syrian', 'conflict', 'ahmad', 'abra', '2019']
merged publication title, author name and publication date is: Report of the Key Development Needs (KDNs Assessment Exercise by the State of Osun's Citizens for Improved Governance and Better Service Delivery ejike udeogu 2019
result tokens after preprocess is: ['report', 'key', 'develop', 'need', 'kdn', 'assess', 'exercis', 'state', 'osun', 'citizen', 'improv', 'govern', 'better', 'servic', 'deliveri', 'ejik', 'udeogu', '2019']
merged publication title, author name and publication date is: Share-Option Based Compensation Expense Shareho

result tokens after preprocess is: ['predict', 'unlist', 'sme', 'default', 'incorpor', 'market', 'inform', 'accountingbas', 'model', 'improv', 'accuraci', 'pano', 'andrikopoulo', 'amir', 'khorasgani', 'sep', '2018']
merged publication title, author name and publication date is: Pulling effect of Colonial Legacies on Africa’s Inward FDI maktoba omar 5 Sep 2018
result tokens after preprocess is: ['pull', 'effect', 'coloni', 'legaci', 'africa', 'inward', 'fdi', 'maktoba', 'omar', 'sep', '2018']
merged publication title, author name and publication date is: Revisiting the evidence of earnings management prior to merger announcements an application of Benford’s law thai nguyen 26 Dec 2018
result tokens after preprocess is: ['revisit', 'evid', 'earn', 'manag', 'prior', 'merger', 'announc', 'applic', 'benford', 'law', 'thai', 'nguyen', '26', 'dec', '2018']
merged publication title, author name and publication date is: Risks Associated with Payment Banks and Mobile-Based Money Platforms hafiz 

merged publication title, author name and publication date is: Short and Long-Run Determinates of Tourist Flows The Case of South Korea dimitris  jim serenis 2017
result tokens after preprocess is: ['short', 'longrun', 'determin', 'tourist', 'flow', 'case', 'south', 'korea', 'dimitri', 'jim', 'sereni', '2017']
merged publication title, author name and publication date is: Smart Urban Planning simon huston 14 Sep 2017
result tokens after preprocess is: ['smart', 'urban', 'plan', 'simon', 'huston', '14', 'sep', '2017']
merged publication title, author name and publication date is: Smart urban regeneration Visions institutions and mechanisms for real estate simon huston 14 Sep 2017
result tokens after preprocess is: ['smart', 'urban', 'regener', 'vision', 'institut', 'mechan', 'real', 'estat', 'simon', 'huston', '14', 'sep', '2017']
merged publication title, author name and publication date is: Special issue of the Review of Behavioral Finance guest editors introduction panos andrikopoulo

result tokens after preprocess is: ['fatal', 'sensit', 'coalit', 'countri', 'studi', 'british', 'polish', 'australian', 'public', 'opinion', 'iraq', 'war', 'piotr', 'li', '31', 'jan', '2015']
merged publication title, author name and publication date is: Finance-Growth Virtuous and Dis-Virtuous Cycles (VDCs Theory and Empirical Evidence eliana lauretta 2015
result tokens after preprocess is: ['financegrowth', 'virtuou', 'disvirtu', 'cycl', 'vdc', 'theori', 'empir', 'evid', 'eliana', 'lauretta', '2015']
merged publication title, author name and publication date is: Fluctuations in the Exchange Rate and Aggregate Exports in Ukraine dimitris serenis 2015
result tokens after preprocess is: ['fluctuat', 'exchang', 'rate', 'aggreg', 'export', 'ukrain', 'dimitri', 'sereni', '2015']
merged publication title, author name and publication date is: Microfinance and Poverty Reduction The Case Study of Selected Institutions in the Greater Accra Region of Ghana francis darko 2015
result tokens after 

result tokens after preprocess is: ['dono', 'mercati', 'innovazion', 'la', 'virt', 'di', 'una', 'sana', 'crescita', 'economica', 'eliana', 'lauretta', '2013']
merged publication title, author name and publication date is: Ensuring sustainable corporate governance and financial reporting essential tools for improved user confidence abdurafiu noah 1 Jan 2013
result tokens after preprocess is: ['ensur', 'sustain', 'corpor', 'govern', 'financi', 'report', 'essenti', 'tool', 'improv', 'user', 'confid', 'abdurafiu', 'noah', 'jan', '2013']
merged publication title, author name and publication date is: Finance and Growth Understanding the Switch from “Virtuous To “Bad Cycles in the Finance-Growth Relationship eliana lauretta 2013
result tokens after preprocess is: ['financ', 'growth', 'understand', 'switch', 'virtuou', 'bad', 'cycl', 'financegrowth', 'relationship', 'eliana', 'lauretta', '2013']
merged publication title, author name and publication date is: Policy proposal on Finance and Growt

result tokens after preprocess is: ['develop', 'comprehens', 'crosscountri', 'econom', 'growth', 'databas', 'sailesh', 'tanna', '1997']


In [55]:
date_freq = {}
for i in range(len(processed_text)):
    tokens = processed_text[i]
    for w in tokens:
        try:
            print(w)
            print(date_freq[w])
            date_freq[w].add(i)
        except:
            date_freq[w] = {i}

for i in date_freq:
    date_freq[i] = len(date_freq[i])

bibliometr
review
waqf
literatur
rashedul
hasan
jun
2022
note
covid19
instig
maximum
drawdown
islam
market
versu
convent
counterpart
rashedul
{0}
hasan
{0}
may
2022
{0}
bank
stock
valuat
theori
explain
price
base
theori
{2}
alireza
zarei
mar
2022
{0, 1}
ceo
dualiti
firm
perform
systemat
review
{0}
research
agenda
mei
yu
25
may
{1}
2022
{0, 1, 2}
ceo
{3}
financi
experi
firm
{3}
earn
manag
import
experi
{4}
specif
thai
nguyen
thang
nguyen
{4}
panagioti
andrikopoulo
mar
{2}
2022
{0, 1, 2, 3}
ceo
{3, 4}
tenur
cost
debt
ruth
owusu
mensah
aug
2022
{0, 1, 2, 3, 4}
cheat
behaviour
among
opec
memberst
oil
price
{2}
fair
stabil
empir
analysi
masud
ibrahim
2022
{0, 1, 2, 3, 4, 5}
clinician
inform
acquisit
account
literaci
uk
clinic
commiss
group
john
ayuk
enombu
22
jun
{0}
2022
{0, 1, 2, 3, 4, 5, 6}
compet
institut
logic
power
dynam
islam
{1}
financi
{4}
report
standardis
project
ahmad
abra
10
may
{1, 3}
2022
{0, 1, 2, 3, 4, 5, 6, 7}
corpor
govern
ifsb
standard4
evid
islam
{8, 1}
bank
{2}
banglad

{11, 12, 15, 30, 62}
industri
{49, 44}
energi
demand
dynam
{8, 29}
hierarch
model
{16, 72, 39}
akin
sharimakin
jun
{0, 17, 50, 7}
2021
{40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71}
monitor
exchang
{12}
rate
instabl
12
{25, 38}
select
{48, 66}
islam
{1, 36, 69, 39, 8, 9, 56}
economi
{48}
alireza
{2, 23}
zarei
{2, 23}
sep
{42, 59, 52}
2021
{40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72}
perform
{33, 34, 3, 68, 12, 15, 48, 52, 56, 62}
measur
{25, 43}
smart
{29}
citi
{67, 29}
govern
{66, 68, 9, 17, 50, 52, 25, 57}
case
{37, 41, 46, 47, 29}
studi
{46}
egyptian
citi
{74, 67, 29}
council
{34, 53}
loai
{17, 29}
alsaid
{17, 29}
2021
{40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73}
probabl
merger
acquisit
{7}
deal
failur
{51}
sailesh
{64}
tanna

list
{64, 12, 52, 68}
european
{110}
footbal
club
valuead
report
{32, 36, 8, 49, 25, 27, 31}
perspect
{66, 41, 42, 107, 108, 109, 50}
daniel
{116, 101, 38}
rang
25
{3}
nov
{70, 45, 48, 87, 55, 62}
2020
{94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117}
provid
enabl
environ
{49, 28, 101}
promot
sustain
{32, 16, 118, 31, 29, 63}
develop
{16, 17, 80, 30, 63}
goal
{63}
coventri
univers
experi
{4}
jaliyyah
{19, 63}
bello
{19, 63}
sep
{103, 73, 42, 52, 86, 59}
2020
{94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118}
reflect
{65}
ontolog
mytholog
creativ
sandar
{41, 71}
win
{41, 90, 71}
jul
{66, 35, 15, 80, 112, 19, 23, 90, 92}
2020
{94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119}
risk
{45, 54, 23}
govern
{66, 99, 68, 100, 103, 9, 74, 76, 80, 17, 50, 52, 25, 57}
cybercrim
hierarch
{72}
re

expens
sharehold
{126}
return
{58, 138}
financi
{4, 8, 139, 12, 19, 21, 150, 153, 27, 34, 36, 41, 48, 52, 61, 62, 68, 105, 110, 111, 112, 116, 122, 123}
crisi
{31, 110, 23}
alaa
{151, 27, 110, 55}
alhaj
{151, 27, 110, 55}
ismail
{151, 27, 110, 55}
apr
{159, 161, 36, 68, 38, 39, 100, 12, 13, 116, 54, 27, 30, 95}
2019
{133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163}
shari
{122, 100}
ah
{122, 100}
govern
{121, 66, 99, 68, 100, 163, 103, 9, 74, 76, 143, 80, 17, 50, 144, 52, 25, 57}
islami
bank
{2, 34, 135, 9, 45, 143, 144, 113, 53, 85, 87, 117, 150, 156, 151, 127}
bangladesh
{9, 99, 113}
limit
implic
{50, 77, 158, 159}
ownership
{64, 27}
chang
{158}
hafij
{99, 132, 9, 140, 47, 113, 122}
ullah
{99, 132, 9, 140, 47, 113, 122}
29
may
{1, 3, 135, 8, 11, 14, 144, 20, 148, 25, 26, 32, 160, 37, 46, 67, 81, 99, 104, 121, 124}
2019
{133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 1

evid
{131, 136, 9, 11, 149, 27, 34, 38, 168, 169, 174, 48, 52, 54, 55, 183, 57, 59, 190, 68, 70, 87, 95, 102, 103, 109, 112}
south
korea
jin
{169, 180, 70}
suk
{169, 180, 70}
park
{169, 180, 70}
2018
{174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204}
two
{18}
centuri
farmland
price
{2, 6, 39, 72, 167, 116, 117, 28, 158}
england
{160}
huston
{32, 65, 96, 67, 97, 194, 195, 20, 24, 155, 94, 31}
2018
{174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205}
uk
{68, 7, 109, 145, 146, 149, 55, 124, 125}
consum
{145}
perceiv
risk
{191, 138, 45, 54, 23, 121, 123, 159}
buy
product
{64, 12, 128, 168}
emerg
{200, 136, 48, 148, 23}
economi
{128, 130, 73, 48, 148}
moder
{64}
mediat
model
{135, 72, 39, 16, 188}
wei
{192}
song
{192}
may
{1, 3, 135, 8, 11, 14, 144, 20, 148, 25, 26, 32, 160, 37, 16

portfolio
{84}
differ
{28, 39}
market
{1, 33, 223, 70, 136, 169, 200, 112, 209, 19, 116, 117, 180, 23, 188, 221, 159}
condit
{98}
sarkar
{193, 131, 39, 136, 138, 109, 143, 84, 56, 156}
kabir
{193, 131, 39, 136, 138, 109, 143, 84, 56, 156}
2017
{209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229}
short
longrun
determin
{58, 196, 180, 102}
tourist
flow
{214}
case
{162, 99, 100, 37, 167, 41, 74, 170, 46, 47, 208, 81, 211, 218, 219, 124, 29}
south
{205}
korea
{205}
dimitri
{37}
jim
sereni
{211, 37, 219}
2017
{209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230}
smart
{74, 29, 95}
urban
{67}
plan
{29}
simon
{32, 65, 96, 67, 97, 194, 195, 227, 20, 215, 24, 89, 155, 216, 94, 31}
huston
{32, 65, 96, 67, 97, 194, 195, 224, 227, 206, 20, 215, 24, 155, 216, 94, 31}
14
{192, 227, 171, 215, 216, 91}
sep
{130, 131, 227, 103, 73, 42, 218, 142, 52, 86, 119, 216, 215, 59, 188, 189}
2017
{209, 2

{0, 129, 261, 7, 17, 147, 174, 50, 192, 195, 72, 201, 76, 96, 97, 234, 107, 108, 110, 242, 251}
2015
{269, 270, 271, 272, 273, 274}
compound
interestreturn
lien
{152, 220}
luu
{152, 220}
30
{32, 130, 102, 172, 48, 209, 242, 26, 60, 62}
apr
{159, 161, 36, 68, 38, 39, 100, 164, 12, 13, 268, 116, 54, 183, 186, 27, 30, 95}
2015
{269, 270, 271, 272, 273, 274, 275}
corpor
{9, 10, 11, 12, 141, 142, 15, 143, 17, 144, 145, 269, 25, 156, 33, 176, 49, 50, 51, 52, 177, 185, 62, 68, 80, 101, 103, 115}
govern
{9, 143, 144, 17, 25, 163, 165, 176, 177, 50, 52, 57, 185, 66, 68, 74, 76, 80, 99, 100, 103, 121}
china
{67, 195, 198, 183, 57}
uncertainti
rule
{98}
mark
{239}
holm
{239}
13
{144, 89, 226}
jul
{224, 66, 35, 262, 15, 80, 112, 272, 19, 246, 23, 120, 153, 90, 187, 92, 157, 184}
2015
{269, 270, 271, 272, 273, 274, 275, 276}
corpor
{9, 10, 11, 12, 141, 142, 15, 143, 17, 144, 145, 269, 277, 25, 156, 33, 176, 49, 50, 51, 52, 177, 185, 62, 68, 80, 101, 103, 115}
social
{64, 11, 12, 15, 80, 145, 30, 88

2014
{301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318}
rate
{196, 295, 296, 73, 304, 306, 307, 310, 280, 283}
profit
{297, 58, 275, 77}
financialis
{184}
econom
{248, 198, 71, 137, 202, 107, 108, 45, 173, 270, 298, 147, 211, 214, 184}
growth
{202, 308, 246, 71}
theoret
{115}
empir
{128, 257, 259, 260, 134, 6, 297, 44, 236, 174, 143, 115, 313, 282}
investig
{97, 155, 260, 265}
nigerian
{290, 297, 210, 148, 279, 218}
economi
{128, 130, 290, 294, 263, 73, 267, 269, 301, 207, 48, 148, 218}
ejik
{184, 290, 163, 134}
udeogu
{184, 290, 163, 134}
2014
{301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319}
virtuou
{282, 38}
bad
cycl
{224, 294, 282, 38}
financegrowth
{282}
relationship
{128, 203, 62, 223}
eliana
{38, 282, 270, 308, 250, 123, 221}
lauretta
{38, 282, 270, 308, 250, 123, 221}
2014
{301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320}
idea
british
{281, 18

{363}
human
{64, 129, 154}
capit
{64, 33, 102, 134, 135, 186, 10, 297, 45, 77, 22, 184, 154, 316}
trade
{128, 90, 169, 202, 92, 304, 58, 28}
fdi
{303, 240, 183, 124, 189}
econom
{248, 320, 198, 71, 137, 202, 107, 108, 45, 173, 270, 298, 329, 147, 211, 214, 343, 184}
growth
{320, 358, 71, 327, 328, 202, 362, 308, 246}
thailand
{202, 52}
caus
sailesh
{259, 159, 168, 183, 64, 202, 75, 203, 204, 335, 209, 85, 86, 355, 359, 360, 363, 238, 111, 117, 246, 248, 251, 255}
tanna
{259, 159, 168, 183, 64, 202, 75, 203, 204, 335, 209, 85, 86, 213, 355, 359, 360, 363, 238, 111, 117, 246, 248, 251, 255}
2005
{363, 364}
corpor
{9, 10, 11, 12, 141, 142, 15, 143, 17, 144, 145, 269, 277, 278, 279, 25, 156, 33, 301, 176, 49, 50, 51, 52, 177, 185, 62, 323, 68, 326, 332, 80, 101, 357, 103, 115}
board
{136, 143, 144, 15, 50, 239, 272}
incent
empir
{128, 257, 320, 259, 260, 355, 134, 6, 297, 330, 44, 236, 174, 143, 115, 313, 282}
evid
{257, 131, 263, 136, 9, 11, 149, 282, 27, 34, 290, 38, 294, 168, 169, 295, 

In [57]:
total_vocab_size = len(date_freq)
total_vocab_size

1330

In [56]:
date_freq

{'bibliometr': 2,
 'review': 10,
 'waqf': 1,
 'literatur': 3,
 'rashedul': 12,
 'hasan': 12,
 'jun': 26,
 '2022': 40,
 'note': 1,
 'covid19': 4,
 'instig': 1,
 'maximum': 1,
 'drawdown': 1,
 'islam': 27,
 'market': 33,
 'versu': 2,
 'convent': 7,
 'counterpart': 4,
 'may': 25,
 'bank': 44,
 'stock': 15,
 'valuat': 2,
 'theori': 9,
 'explain': 2,
 'price': 15,
 'base': 4,
 'alireza': 3,
 'zarei': 3,
 'mar': 27,
 'ceo': 4,
 'dualiti': 1,
 'firm': 18,
 'perform': 23,
 'systemat': 1,
 'research': 6,
 'agenda': 1,
 'mei': 7,
 'yu': 9,
 '25': 6,
 'financi': 48,
 'experi': 6,
 'earn': 6,
 'manag': 12,
 'import': 4,
 'specif': 1,
 'thai': 6,
 'nguyen': 8,
 'thang': 6,
 'panagioti': 3,
 'andrikopoulo': 11,
 'tenur': 3,
 'cost': 11,
 'debt': 6,
 'ruth': 1,
 'owusu': 1,
 'mensah': 1,
 'aug': 22,
 'cheat': 1,
 'behaviour': 1,
 'among': 1,
 'opec': 1,
 'memberst': 1,
 'oil': 2,
 'fair': 2,
 'stabil': 4,
 'empir': 18,
 'analysi': 28,
 'masud': 3,
 'ibrahim': 12,
 'clinician': 1,
 'inform': 6,
 'acqu

In [38]:
total_vocab = [x for x in date_freq]
total_vocab

['bibliometr',
 'review',
 'waqf',
 'literatur',
 'rashedul',
 'hasan',
 'jun',
 '2022',
 'note',
 'covid19',
 'instig',
 'maximum',
 'drawdown',
 'islam',
 'market',
 'versu',
 'convent',
 'counterpart',
 'may',
 'bank',
 'stock',
 'valuat',
 'theori',
 'explain',
 'price',
 'base',
 'alireza',
 'zarei',
 'mar',
 'ceo',
 'dualiti',
 'firm',
 'perform',
 'systemat',
 'research',
 'agenda',
 'mei',
 'yu',
 '25',
 'financi',
 'experi',
 'earn',
 'manag',
 'import',
 'specif',
 'thai',
 'nguyen',
 'thang',
 'panagioti',
 'andrikopoulo',
 'tenur',
 'cost',
 'debt',
 'ruth',
 'owusu',
 'mensah',
 'aug',
 'cheat',
 'behaviour',
 'among',
 'opec',
 'memberst',
 'oil',
 'fair',
 'stabil',
 'empir',
 'analysi',
 'masud',
 'ibrahim',
 'clinician',
 'inform',
 'acquisit',
 'account',
 'literaci',
 'uk',
 'clinic',
 'commiss',
 'group',
 'john',
 'ayuk',
 'enombu',
 '22',
 'compet',
 'institut',
 'logic',
 'power',
 'dynam',
 'report',
 'standardis',
 'project',
 'ahmad',
 'abra',
 '10',
 'corpor'

In [39]:
print(total_vocab[:20])

['bibliometr', 'review', 'waqf', 'literatur', 'rashedul', 'hasan', 'jun', '2022', 'note', 'covid19', 'instig', 'maximum', 'drawdown', 'islam', 'market', 'versu', 'convent', 'counterpart', 'may', 'bank']


In [58]:
def doc_freq(word):
    indice = 0
    try:
        indice = date_freq[word]
    except:
        pass
    return indice

In [59]:
doc_id = 0

tf_idf = {}
len_docs = len(processed_text)

for i in range(len(processed_text)):
    
    tokens = processed_text[i]
    print(tokens)
    counter = Counter(tokens)
    print(counter)
    words_count = len(tokens)
    print(words_count)
    unique_token = np.unique(tokens)
    print(unique_token)
    for token in unique_token:
        print(token)
        tf = counter[token]/words_count
        print(tf)
        doc_fre = doc_freq(token)
        print(doc_fre)
        idf = np.log((len_docs+1)/(doc_fre+1))
        print(idf)
        tf_idf[doc_id, token] = tf*idf
        print(idf)
    
    doc_id += 1

['bibliometr', 'review', 'waqf', 'literatur', 'rashedul', 'hasan', 'jun', '2022']
Counter({'bibliometr': 1, 'review': 1, 'waqf': 1, 'literatur': 1, 'rashedul': 1, 'hasan': 1, 'jun': 1, '2022': 1})
8
['2022' 'bibliometr' 'hasan' 'jun' 'literatur' 'rashedul' 'review' 'waqf']
2022
0.125
40
2.2080063529395075
2.2080063529395075
bibliometr
0.125
2
4.822966130975706
4.822966130975706
hasan
0.125
12
3.3566290621822787
3.3566290621822787
jun
0.125
26
2.6257415536394864
2.6257415536394864
literatur
0.125
3
4.535284058523925
4.535284058523925
rashedul
0.125
12
3.3566290621822787
3.3566290621822787
review
0.125
10
3.523683146845445
3.523683146845445
waqf
0.125
1
5.2284312390838705
5.2284312390838705
['note', 'covid19', 'instig', 'maximum', 'drawdown', 'islam', 'market', 'versu', 'convent', 'counterpart', 'rashedul', 'hasan', 'may', '2022']
Counter({'note': 1, 'covid19': 1, 'instig': 1, 'maximum': 1, 'drawdown': 1, 'islam': 1, 'market': 1, 'versu': 1, 'convent': 1, 'counterpart': 1, 'rashedul': 1,

2.2080063529395075
2.2080063529395075
african
0.06666666666666667
8
3.724353842307596
3.724353842307596
bello
0.06666666666666667
5
4.1298189504157605
4.1298189504157605
contagion
0.06666666666666667
4
4.312140507209715
4.312140507209715
crise
0.06666666666666667
2
4.822966130975706
4.822966130975706
effect
0.06666666666666667
22
2.786084203714666
2.786084203714666
financi
0.06666666666666667
48
2.029758121533189
2.029758121533189
jaliyyah
0.06666666666666667
5
4.1298189504157605
4.1298189504157605
jul
0.06666666666666667
20
2.8770559819203925
2.8770559819203925
khaleq
0.06666666666666667
4
4.312140507209715
4.312140507209715
major
0.06666666666666667
3
4.535284058523925
4.535284058523925
market
0.06666666666666667
33
2.3952178950276544
2.3952178950276544
mohammad
0.06666666666666667
4
4.312140507209715
4.312140507209715
newaz
0.06666666666666667
5
4.1298189504157605
4.1298189504157605
stock
0.06666666666666667
15
3.1489896974040343
3.1489896974040343
['futur', 'educ', 'research', 'pre

8
3.724353842307596
3.724353842307596
analysi
0.058823529411764705
28
2.5542825896573413
2.5542825896573413
capit
0.058823529411764705
15
3.1489896974040343
3.1489896974040343
corpor
0.058823529411764705
37
2.28399225991743
2.28399225991743
crosscountri
0.058823529411764705
3
4.535284058523925
4.535284058523925
diffus
0.058823529411764705
2
4.822966130975706
4.822966130975706
effect
0.058823529411764705
22
2.786084203714666
2.786084203714666
elmghaamez
0.058823529411764705
8
3.724353842307596
3.724353842307596
ibrahim
0.058823529411764705
12
3.3566290621822787
3.3566290621822787
ifr
0.058823529411764705
4
4.312140507209715
4.312140507209715
innov
0.058823529411764705
6
3.975668270588502
3.975668270588502
jan
0.058823529411764705
34
2.366230358154402
2.366230358154402
market
0.058823529411764705
33
2.3952178950276544
2.3952178950276544
multin
0.058823529411764705
1
5.2284312390838705
5.2284312390838705
perform
0.058823529411764705
23
2.7435245892958697
2.7435245892958697
theori
0.058823

10
['2021' 'analysi' 'aug' 'equiti' 'global' 'invest' 'panetsid' 'privat'
 'public' 'styliani']
2021
0.1
54
1.9142452344113448
1.9142452344113448
analysi
0.1
28
2.5542825896573413
2.5542825896573413
aug
0.1
22
2.786084203714666
2.786084203714666
equiti
0.1
10
3.523683146845445
3.523683146845445
global
0.1
8
3.724353842307596
3.724353842307596
invest
0.1
12
3.3566290621822787
3.3566290621822787
panetsid
0.1
2
4.822966130975706
4.822966130975706
privat
0.1
4
4.312140507209715
4.312140507209715
public
0.1
13
3.282521090028557
3.282521090028557
styliani
0.1
2
4.822966130975706
4.822966130975706
['histor', 'institutionalist', 'perspect', 'persist', 'state', 'control', 'financi', 'sector', 'reform', 'insight', 'case', 'myanmar', 'sandar', 'win', '26', 'jan', '2021']
Counter({'histor': 1, 'institutionalist': 1, 'perspect': 1, 'persist': 1, 'state': 1, 'control': 1, 'financi': 1, 'sector': 1, 'reform': 1, 'insight': 1, 'case': 1, 'myanmar': 1, 'sandar': 1, 'win': 1, '26': 1, 'jan': 1, '2021': 

4.535284058523925
['databas', 'facilit', 'account', 'case', 'australian', 'mercuri', 'account', 'via', 'nation', 'pollut', 'inventori', 'hafij', 'ullah', 'jan', '2021']
Counter({'account': 2, 'databas': 1, 'facilit': 1, 'case': 1, 'australian': 1, 'mercuri': 1, 'via': 1, 'nation': 1, 'pollut': 1, 'inventori': 1, 'hafij': 1, 'ullah': 1, 'jan': 1, '2021': 1})
15
['2021' 'account' 'australian' 'case' 'databas' 'facilit' 'hafij'
 'inventori' 'jan' 'mercuri' 'nation' 'pollut' 'ullah' 'via']
2021
0.06666666666666667
54
1.9142452344113448
1.9142452344113448
account
0.13333333333333333
28
2.5542825896573413
2.5542825896573413
australian
0.06666666666666667
3
4.535284058523925
4.535284058523925
case
0.06666666666666667
26
2.6257415536394864
2.6257415536394864
databas
0.06666666666666667
2
4.822966130975706
4.822966130975706
facilit
0.06666666666666667
1
5.2284312390838705
5.2284312390838705
hafij
0.06666666666666667
12
3.3566290621822787
3.3566290621822787
inventori
0.06666666666666667
1
5.2284

2.029758121533189
2.029758121533189
inclus
0.2
3
4.535284058523925
4.535284058523925
nikhil
0.2
4
4.312140507209715
4.312140507209715
sapr
0.2
4
4.312140507209715
4.312140507209715
['financi', 'inclus', 'corpor', 'social', 'respons', 'firm', 'perform', 'analysi', 'interact', 'relationship', 'mahbub', 'khan', '30', 'nov', '2021']
Counter({'financi': 1, 'inclus': 1, 'corpor': 1, 'social': 1, 'respons': 1, 'firm': 1, 'perform': 1, 'analysi': 1, 'interact': 1, 'relationship': 1, 'mahbub': 1, 'khan': 1, '30': 1, 'nov': 1, '2021': 1})
15
['2021' '30' 'analysi' 'corpor' 'financi' 'firm' 'inclus' 'interact'
 'khan' 'mahbub' 'nov' 'perform' 'relationship' 'respons' 'social']
2021
0.06666666666666667
54
1.9142452344113448
1.9142452344113448
30
0.06666666666666667
14
3.2135282185416054
3.2135282185416054
analysi
0.06666666666666667
28
2.5542825896573413
2.5542825896573413
corpor
0.06666666666666667
37
2.28399225991743
2.28399225991743
financi
0.06666666666666667
48
2.029758121533189
2.02975812153

34
2.366230358154402
2.366230358154402
merger
0.1
3
4.535284058523925
4.535284058523925
probabl
0.1
1
5.2284312390838705
5.2284312390838705
sailesh
0.1
30
2.487591215158669
2.487591215158669
tanna
0.1
31
2.455842516844089
2.455842516844089
['reason', 'behind', 'worldwid', 'divers', 'ident', 'issuanc', 'good', 'govern', 'code', 'ibrahim', 'elmghaamez', 'jun', '2021']
Counter({'reason': 1, 'behind': 1, 'worldwid': 1, 'divers': 1, 'ident': 1, 'issuanc': 1, 'good': 1, 'govern': 1, 'code': 1, 'ibrahim': 1, 'elmghaamez': 1, 'jun': 1, '2021': 1})
13
['2021' 'behind' 'code' 'divers' 'elmghaamez' 'good' 'govern' 'ibrahim'
 'ident' 'issuanc' 'jun' 'reason' 'worldwid']
2021
0.07692307692307693
54
1.9142452344113448
1.9142452344113448
behind
0.07692307692307693
1
5.2284312390838705
5.2284312390838705
code
0.07692307692307693
3
4.535284058523925
4.535284058523925
divers
0.07692307692307693
4
4.312140507209715
4.312140507209715
elmghaamez
0.07692307692307693
8
3.724353842307596
3.724353842307596
goo

1
5.2284312390838705
5.2284312390838705
hisarciklilar
0.07142857142857142
12
3.3566290621822787
3.3566290621822787
i̇nşaat
0.07142857142857142
1
5.2284312390838705
5.2284312390838705
mar
0.07142857142857142
27
2.5893739094686117
2.5893739094686117
mehtap
0.07142857142857142
12
3.3566290621822787
3.3566290621822787
sanayii
0.07142857142857142
1
5.2284312390838705
5.2284312390838705
sektörel
0.07142857142857142
1
5.2284312390838705
5.2284312390838705
standartlarının
0.07142857142857142
1
5.2284312390838705
5.2284312390838705
türkiy
0.07142857142857142
1
5.2284312390838705
5.2284312390838705
çalışma
0.07142857142857142
1
5.2284312390838705
5.2284312390838705
üzerinden
0.07142857142857142
1
5.2284312390838705
5.2284312390838705
['understand', 'public', 'sentiment', 'relat', 'african', 'continent', 'free', 'trade', 'agreement', 'olubunmi', 'ajala', 'jul', '2021']
Counter({'understand': 1, 'public': 1, 'sentiment': 1, 'relat': 1, 'african': 1, 'continent': 1, 'free': 1, 'trade': 1, 'agreemen

weak
0.08333333333333333
1
5.2284312390838705
5.2284312390838705
['determin', 'capit', 'flight', 'new', 'panel', 'evid', 'subsaharan', 'africa', 'ssa', 'alloysiu', 'egbulonu', '30', 'dec', '2020']
Counter({'determin': 1, 'capit': 1, 'flight': 1, 'new': 1, 'panel': 1, 'evid': 1, 'subsaharan': 1, 'africa': 1, 'ssa': 1, 'alloysiu': 1, 'egbulonu': 1, '30': 1, 'dec': 1, '2020': 1})
14
['2020' '30' 'africa' 'alloysiu' 'capit' 'dec' 'determin' 'egbulonu'
 'evid' 'flight' 'new' 'panel' 'ssa' 'subsaharan']
2020
0.07142857142857142
39
2.2326989655298792
2.2326989655298792
30
0.07142857142857142
14
3.2135282185416054
3.2135282185416054
africa
0.07142857142857142
7
3.8421368779639797
3.8421368779639797
alloysiu
0.07142857142857142
2
4.822966130975706
4.822966130975706
capit
0.07142857142857142
15
3.1489896974040343
3.1489896974040343
dec
0.07142857142857142
15
3.1489896974040343
3.1489896974040343
determin
0.07142857142857142
11
3.436671769855815
3.436671769855815
egbulonu
0.07142857142857142
2
4.

8
3.724353842307596
3.724353842307596
audit
0.06666666666666667
5
4.1298189504157605
4.1298189504157605
consequ
0.06666666666666667
2
4.822966130975706
4.822966130975706
earli
0.06666666666666667
1
5.2284312390838705
5.2284312390838705
elmghaamez
0.06666666666666667
8
3.724353842307596
3.724353842307596
evid
0.06666666666666667
48
2.029758121533189
2.029758121533189
financi
0.06666666666666667
48
2.029758121533189
2.029758121533189
ibrahim
0.06666666666666667
12
3.3566290621822787
3.3566290621822787
intern
0.13333333333333333
13
3.282521090028557
3.282521090028557
jul
0.06666666666666667
20
2.8770559819203925
2.8770559819203925
market
0.06666666666666667
33
2.3952178950276544
2.3952178950276544
standard
0.06666666666666667
6
3.975668270588502
3.975668270588502
['green', 'green', 'bank', 'invest', 'bangladesh', 'paradox', 'green', 'bank', 'practic', 'hafij', 'ullah', '2020']
Counter({'green': 3, 'bank': 2, 'invest': 1, 'bangladesh': 1, 'paradox': 1, 'practic': 1, 'hafij': 1, 'ullah': 1,

provid
0.07142857142857142
1
5.2284312390838705
5.2284312390838705
sep
0.07142857142857142
28
2.5542825896573413
2.5542825896573413
sustain
0.07142857142857142
13
3.282521090028557
3.282521090028557
univers
0.07142857142857142
8
3.724353842307596
3.724353842307596
['reflect', 'ontolog', 'mytholog', 'creativ', 'sandar', 'win', 'jul', '2020']
Counter({'reflect': 1, 'ontolog': 1, 'mytholog': 1, 'creativ': 1, 'sandar': 1, 'win': 1, 'jul': 1, '2020': 1})
8
['2020' 'creativ' 'jul' 'mytholog' 'ontolog' 'reflect' 'sandar' 'win']
2020
0.125
39
2.2326989655298792
2.2326989655298792
creativ
0.125
1
5.2284312390838705
5.2284312390838705
jul
0.125
20
2.8770559819203925
2.8770559819203925
mytholog
0.125
1
5.2284312390838705
5.2284312390838705
ontolog
0.125
1
5.2284312390838705
5.2284312390838705
reflect
0.125
3
4.535284058523925
4.535284058523925
sandar
0.125
3
4.535284058523925
4.535284058523925
win
0.125
4
4.312140507209715
4.312140507209715
['risk', 'govern', 'cybercrim', 'hierarch', 'regress', '

 'may' 'model' 'ratio' 'structur']
2019
0.08333333333333333
41
2.183908801360447
2.183908801360447
adequaci
0.08333333333333333
1
5.2284312390838705
5.2284312390838705
alpha
0.08333333333333333
1
5.2284312390838705
5.2284312390838705
baldwin
0.08333333333333333
5
4.1298189504157605
4.1298189504157605
bank
0.08333333333333333
44
2.114915929873496
2.114915929873496
capit
0.08333333333333333
15
3.1489896974040343
3.1489896974040343
islam
0.08333333333333333
27
2.5893739094686117
2.5893739094686117
kenneth
0.08333333333333333
4
4.312140507209715
4.312140507209715
may
0.08333333333333333
25
2.6634818816223333
2.6634818816223333
model
0.08333333333333333
9
3.6189933266497696
3.6189933266497696
ratio
0.08333333333333333
1
5.2284312390838705
5.2284312390838705
structur
0.08333333333333333
11
3.436671769855815
3.436671769855815
['board', 'composit', 'insur', 'firm', 'perform', 'evid', 'emerg', 'market', 'sarkar', 'kabir', '2019']
Counter({'board': 1, 'composit': 1, 'insur': 1, 'firm': 1, 'perfo

insur
0.06666666666666667
2
4.822966130975706
4.822966130975706
kabir
0.06666666666666667
16
3.0883650755875993
3.0883650755875993
qualiti
0.06666666666666667
12
3.3566290621822787
3.3566290621822787
sarkar
0.06666666666666667
16
3.0883650755875993
3.0883650755875993
studi
0.06666666666666667
17
3.031206661747651
3.031206661747651
['corpor', 'govern', 'ghana', 'analysi', 'board', 'account', 'ghanaian', 'list', 'bank', 'mei', 'yu', '13', 'may', '2019']
Counter({'corpor': 1, 'govern': 1, 'ghana': 1, 'analysi': 1, 'board': 1, 'account': 1, 'ghanaian': 1, 'list': 1, 'bank': 1, 'mei': 1, 'yu': 1, '13': 1, 'may': 1, '2019': 1})
14
['13' '2019' 'account' 'analysi' 'bank' 'board' 'corpor' 'ghana'
 'ghanaian' 'govern' 'list' 'may' 'mei' 'yu']
13
0.07142857142857142
4
4.312140507209715
4.312140507209715
2019
0.07142857142857142
41
2.183908801360447
2.183908801360447
account
0.07142857142857142
28
2.5542825896573413
2.5542825896573413
analysi
0.07142857142857142
28
2.5542825896573413
2.5542825896

2019
0.07692307692307693
41
2.183908801360447
2.183908801360447
account
0.07692307692307693
28
2.5542825896573413
2.5542825896573413
alaa
0.07692307692307693
5
4.1298189504157605
4.1298189504157605
alhaj
0.07692307692307693
5
4.1298189504157605
4.1298189504157605
bank
0.07692307692307693
44
2.114915929873496
2.114915929873496
dec
0.07692307692307693
15
3.1489896974040343
3.1489896974040343
impact
0.07692307692307693
27
2.5893739094686117
2.5893739094686117
ismail
0.07692307692307693
5
4.1298189504157605
4.1298189504157605
measur
0.07692307692307693
8
3.724353842307596
3.724353842307596
payment
0.07692307692307693
3
4.535284058523925
4.535284058523925
perform
0.07692307692307693
23
2.7435245892958697
2.7435245892958697
sharebas
0.07692307692307693
1
5.2284312390838705
5.2284312390838705
treatment
0.07692307692307693
1
5.2284312390838705
5.2284312390838705
['essenti', 'person', 'financ', 'practic', 'guid', 'employe', 'lien', 'luu', '11', 'jan', '2019']
Counter({'essenti': 1, 'person': 1,

0.07692307692307693
27
2.5893739094686117
2.5893739094686117
base
0.07692307692307693
4
4.312140507209715
4.312140507209715
compens
0.07692307692307693
3
4.535284058523925
4.535284058523925
crisi
0.07692307692307693
7
3.8421368779639797
3.8421368779639797
expens
0.07692307692307693
1
5.2284312390838705
5.2284312390838705
financi
0.07692307692307693
48
2.029758121533189
2.029758121533189
ismail
0.07692307692307693
5
4.1298189504157605
4.1298189504157605
return
0.07692307692307693
6
3.975668270588502
3.975668270588502
sharehold
0.07692307692307693
3
4.535284058523925
4.535284058523925
shareopt
0.07692307692307693
1
5.2284312390838705
5.2284312390838705
['shari', 'ah', 'govern', 'islami', 'bank', 'bangladesh', 'limit', 'implic', 'ownership', 'chang', 'hafij', 'ullah', '29', 'may', '2019']
Counter({'shari': 1, 'ah': 1, 'govern': 1, 'islami': 1, 'bank': 1, 'bangladesh': 1, 'limit': 1, 'implic': 1, 'ownership': 1, 'chang': 1, 'hafij': 1, 'ullah': 1, '29': 1, 'may': 1, '2019': 1})
15
['2019' 

 'hafij' 'ifsb' 'islami' 'jun' 'limit' 'standard' 'statu' 'ullah']
2018
0.0625
35
2.3380594811877056
2.3380594811877056
aaoifi
0.0625
1
5.2284312390838705
5.2284312390838705
bangladesh
0.0625
8
3.724353842307596
3.724353842307596
bank
0.0625
44
2.114915929873496
2.114915929873496
compar
0.0625
6
3.975668270588502
3.975668270588502
complianc
0.0625
8
3.724353842307596
3.724353842307596
empir
0.0625
18
2.977139440477375
2.977139440477375
evid
0.0625
48
2.029758121533189
2.029758121533189
hafij
0.0625
12
3.3566290621822787
3.3566290621822787
ifsb
0.0625
2
4.822966130975706
4.822966130975706
islami
0.0625
3
4.535284058523925
4.535284058523925
jun
0.0625
26
2.6257415536394864
2.6257415536394864
limit
0.0625
4
4.312140507209715
4.312140507209715
standard
0.0625
6
3.975668270588502
3.975668270588502
statu
0.0625
1
5.2284312390838705
5.2284312390838705
ullah
0.0625
12
3.3566290621822787
3.3566290621822787
['compet', 'institut', 'logic', 'islam', 'financi', 'report', 'standardis', 'compar', 'st

0.07692307692307693
12
3.3566290621822787
3.3566290621822787
2018
0.07692307692307693
35
2.3380594811877056
2.3380594811877056
adapt
0.07692307692307693
1
5.2284312390838705
5.2284312390838705
experi
0.07692307692307693
6
3.975668270588502
3.975668270588502
jun
0.07692307692307693
26
2.6257415536394864
2.6257415536394864
open
0.07692307692307693
5
4.1298189504157605
4.1298189504157605
perform
0.07692307692307693
23
2.7435245892958697
2.7435245892958697
role
0.07692307692307693
14
3.2135282185416054
3.2135282185416054
selfdetermin
0.07692307692307693
1
5.2284312390838705
5.2284312390838705
servic
0.15384615384615385
6
3.975668270588502
3.975668270588502
song
0.07692307692307693
5
4.1298189504157605
4.1298189504157605
wei
0.07692307692307693
5
4.1298189504157605
4.1298189504157605
['sukuk', 'financi', 'asset', 'review', 'sarkar', 'kabir', '2018']
Counter({'sukuk': 1, 'financi': 1, 'asset': 1, 'review': 1, 'sarkar': 1, 'kabir': 1, '2018': 1})
7
['2018' 'asset' 'financi' 'kabir' 'review' '

vers
0.08333333333333333
1
5.2284312390838705
5.2284312390838705
['role', 'extern', 'debt', 'foreign', 'direct', 'investmentgrowth', 'relationship', 'sailesh', 'tanna', 'glauco', 'de', 'vita', '12', 'oct', '2018']
Counter({'role': 1, 'extern': 1, 'debt': 1, 'foreign': 1, 'direct': 1, 'investmentgrowth': 1, 'relationship': 1, 'sailesh': 1, 'tanna': 1, 'glauco': 1, 'de': 1, 'vita': 1, '12': 1, 'oct': 1, '2018': 1})
15
['12' '2018' 'de' 'debt' 'direct' 'extern' 'foreign' 'glauco'
 'investmentgrowth' 'oct' 'relationship' 'role' 'sailesh' 'tanna' 'vita']
12
0.06666666666666667
8
3.724353842307596
3.724353842307596
2018
0.06666666666666667
35
2.3380594811877056
2.3380594811877056
de
0.06666666666666667
3
4.535284058523925
4.535284058523925
debt
0.06666666666666667
6
3.975668270588502
3.975668270588502
direct
0.06666666666666667
7
3.8421368779639797
3.8421368779639797
extern
0.06666666666666667
1
5.2284312390838705
5.2284312390838705
foreign
0.06666666666666667
11
3.436671769855815
3.43667176

acquisit
0.09090909090909091
7
3.8421368779639797
3.8421368779639797
good
0.09090909090909091
2
4.822966130975706
4.822966130975706
jan
0.09090909090909091
34
2.366230358154402
2.366230358154402
perspect
0.09090909090909091
17
3.031206661747651
3.031206661747651
saliesh
0.09090909090909091
1
5.2284312390838705
5.2284312390838705
serial
0.09090909090909091
1
5.2284312390838705
5.2284312390838705
tanna
0.09090909090909091
31
2.455842516844089
2.455842516844089
target
0.09090909090909091
3
4.535284058523925
4.535284058523925
['buy', 'friend', 'import', 'econom', 'flow', 'assembl', 'iraq', 'war', 'coalit', 'piotr', 'li', 'mehtap', 'hisarciklilar', '20', 'dec', '2017']
Counter({'buy': 1, 'friend': 1, 'import': 1, 'econom': 1, 'flow': 1, 'assembl': 1, 'iraq': 1, 'war': 1, 'coalit': 1, 'piotr': 1, 'li': 1, 'mehtap': 1, 'hisarciklilar': 1, '20': 1, 'dec': 1, '2017': 1})
16
['20' '2017' 'assembl' 'buy' 'coalit' 'dec' 'econom' 'flow' 'friend'
 'hisarciklilar' 'import' 'iraq' 'li' 'mehtap' 'piotr

17
3.031206661747651
3.031206661747651
guest
0.08333333333333333
1
5.2284312390838705
5.2284312390838705
introduct
0.08333333333333333
1
5.2284312390838705
5.2284312390838705
issu
0.08333333333333333
3
4.535284058523925
4.535284058523925
jun
0.08333333333333333
26
2.6257415536394864
2.6257415536394864
pano
0.08333333333333333
7
3.8421368779639797
3.8421368779639797
review
0.08333333333333333
10
3.523683146845445
3.523683146845445
special
0.08333333333333333
1
5.2284312390838705
5.2284312390838705
['stock', 'liquid', 'sme', 'likelihood', 'bankruptci', 'evid', 'us', 'market', 'mohamad', 'nazri', 'abd', 'karim', 'dec', '2017']
Counter({'stock': 1, 'liquid': 1, 'sme': 1, 'likelihood': 1, 'bankruptci': 1, 'evid': 1, 'us': 1, 'market': 1, 'mohamad': 1, 'nazri': 1, 'abd': 1, 'karim': 1, 'dec': 1, '2017': 1})
14
['2017' 'abd' 'bankruptci' 'dec' 'evid' 'karim' 'likelihood' 'liquid'
 'market' 'mohamad' 'nazri' 'sme' 'stock' 'us']
2017
0.07142857142857142
38
2.258016773514169
2.258016773514169
ab

4.535284058523925
['welfar', 'implic', 'public', 'healthcar', 'financ', 'macromicro', 'simul', 'analysi', 'uganda', 'judith', 'kabajulizi', '30', 'jun', '2017']
Counter({'welfar': 1, 'implic': 1, 'public': 1, 'healthcar': 1, 'financ': 1, 'macromicro': 1, 'simul': 1, 'analysi': 1, 'uganda': 1, 'judith': 1, 'kabajulizi': 1, '30': 1, 'jun': 1, '2017': 1})
14
['2017' '30' 'analysi' 'financ' 'healthcar' 'implic' 'judith' 'jun'
 'kabajulizi' 'macromicro' 'public' 'simul' 'uganda' 'welfar']
2017
0.07142857142857142
38
2.258016773514169
2.258016773514169
30
0.07142857142857142
14
3.2135282185416054
3.2135282185416054
analysi
0.07142857142857142
28
2.5542825896573413
2.5542825896573413
financ
0.07142857142857142
17
3.031206661747651
3.031206661747651
healthcar
0.07142857142857142
4
4.312140507209715
4.312140507209715
implic
0.07142857142857142
10
3.523683146845445
3.523683146845445
judith
0.07142857142857142
7
3.8421368779639797
3.8421368779639797
jun
0.07142857142857142
26
2.6257415536394864
2

0.07142857142857142
6
3.975668270588502
3.975668270588502
['financi', 'innov', 'bank', 'system', 'hous', 'market', 'concept', 'financi', 'innov', 'enough', 'explain', 'bank', 'regulatori', 'arbitrag', 'eliana', 'lauretta', '2016']
Counter({'financi': 2, 'innov': 2, 'bank': 2, 'system': 1, 'hous': 1, 'market': 1, 'concept': 1, 'enough': 1, 'explain': 1, 'regulatori': 1, 'arbitrag': 1, 'eliana': 1, 'lauretta': 1, '2016': 1})
17
['2016' 'arbitrag' 'bank' 'concept' 'eliana' 'enough' 'explain' 'financi'
 'hous' 'innov' 'lauretta' 'market' 'regulatori' 'system']
2016
0.058823529411764705
22
2.786084203714666
2.786084203714666
arbitrag
0.058823529411764705
1
5.2284312390838705
5.2284312390838705
bank
0.11764705882352941
44
2.114915929873496
2.114915929873496
concept
0.058823529411764705
1
5.2284312390838705
5.2284312390838705
eliana
0.058823529411764705
15
3.1489896974040343
3.1489896974040343
enough
0.058823529411764705
1
5.2284312390838705
5.2284312390838705
explain
0.058823529411764705
2
4

7
3.8421368779639797
3.8421368779639797
market
0.09090909090909091
33
2.3952178950276544
2.3952178950276544
pano
0.09090909090909091
7
3.8421368779639797
3.8421368779639797
portfolio
0.09090909090909091
5
4.1298189504157605
4.1298189504157605
region
0.09090909090909091
6
3.975668270588502
3.975668270588502
['twitter', 'us', 'stock', 'market', 'influenc', 'micro‑blogg', 'share', 'price', 'piotr', 'li', '2016']
Counter({'twitter': 1, 'us': 1, 'stock': 1, 'market': 1, 'influenc': 1, 'micro‑blogg': 1, 'share': 1, 'price': 1, 'piotr': 1, 'li': 1, '2016': 1})
11
['2016' 'influenc' 'li' 'market' 'micro‑blogg' 'piotr' 'price' 'share'
 'stock' 'twitter' 'us']
2016
0.09090909090909091
22
2.786084203714666
2.786084203714666
influenc
0.09090909090909091
4
4.312140507209715
4.312140507209715
li
0.09090909090909091
18
2.977139440477375
2.977139440477375
market
0.09090909090909091
33
2.3952178950276544
2.3952178950276544
micro‑blogg
0.09090909090909091
1
5.2284312390838705
5.2284312390838705
piotr
0.

3
4.535284058523925
4.535284058523925
public
0.058823529411764705
13
3.282521090028557
3.282521090028557
sensit
0.058823529411764705
2
4.822966130975706
4.822966130975706
studi
0.058823529411764705
17
3.031206661747651
3.031206661747651
war
0.058823529411764705
8
3.724353842307596
3.724353842307596
['financegrowth', 'virtuou', 'disvirtu', 'cycl', 'vdc', 'theori', 'empir', 'evid', 'eliana', 'lauretta', '2015']
Counter({'financegrowth': 1, 'virtuou': 1, 'disvirtu': 1, 'cycl': 1, 'vdc': 1, 'theori': 1, 'empir': 1, 'evid': 1, 'eliana': 1, 'lauretta': 1, '2015': 1})
11
['2015' 'cycl' 'disvirtu' 'eliana' 'empir' 'evid' 'financegrowth'
 'lauretta' 'theori' 'vdc' 'virtuou']
2015
0.09090909090909091
32
2.4250708581773353
2.4250708581773353
cycl
0.09090909090909091
7
3.8421368779639797
3.8421368779639797
disvirtu
0.09090909090909091
1
5.2284312390838705
5.2284312390838705
eliana
0.09090909090909091
15
3.1489896974040343
3.1489896974040343
empir
0.09090909090909091
18
2.977139440477375
2.97713944

0.07692307692307693
5
4.1298189504157605
4.1298189504157605
['retir', 'lien', 'luu', 'apr', '2015']
Counter({'retir': 1, 'lien': 1, 'luu': 1, 'apr': 1, '2015': 1})
5
['2015' 'apr' 'lien' 'luu' 'retir']
2015
0.2
32
2.4250708581773353
2.4250708581773353
apr
0.2
27
2.5893739094686117
2.5893739094686117
lien
0.2
7
3.8421368779639797
3.8421368779639797
luu
0.2
7
3.8421368779639797
3.8421368779639797
retir
0.2
1
5.2284312390838705
5.2284312390838705
['smart', 'sustain', 'urban', 'regener', 'institut', 'qualiti', 'financi', 'innov', 'simon', 'huston', 'nov', '2015']
Counter({'smart': 1, 'sustain': 1, 'urban': 1, 'regener': 1, 'institut': 1, 'qualiti': 1, 'financi': 1, 'innov': 1, 'simon': 1, 'huston': 1, 'nov': 1, '2015': 1})
12
['2015' 'financi' 'huston' 'innov' 'institut' 'nov' 'qualiti' 'regener'
 'simon' 'smart' 'sustain' 'urban']
2015
0.08333333333333333
32
2.4250708581773353
2.4250708581773353
financi
0.08333333333333333
48
2.029758121533189
2.029758121533189
huston
0.08333333333333333


5.2284312390838705
5.2284312390838705
lien
0.09090909090909091
7
3.8421368779639797
3.8421368779639797
luu
0.09090909090909091
7
3.8421368779639797
3.8421368779639797
problem
0.09090909090909091
1
5.2284312390838705
5.2284312390838705
state
0.09090909090909091
8
3.724353842307596
3.724353842307596
unit
0.09090909090909091
4
4.312140507209715
4.312140507209715
['manag', 'refinanc', 'risk', 'islam', 'bank', 'kenneth', 'baldwin', '17', 'aug', '2015']
Counter({'manag': 1, 'refinanc': 1, 'risk': 1, 'islam': 1, 'bank': 1, 'kenneth': 1, 'baldwin': 1, '17': 1, 'aug': 1, '2015': 1})
10
['17' '2015' 'aug' 'baldwin' 'bank' 'islam' 'kenneth' 'manag' 'refinanc'
 'risk']
17
0.1
2
4.822966130975706
4.822966130975706
2015
0.1
32
2.4250708581773353
2.4250708581773353
aug
0.1
22
2.786084203714666
2.786084203714666
baldwin
0.1
5
4.1298189504157605
4.1298189504157605
bank
0.1
44
2.114915929873496
2.114915929873496
islam
0.1
27
2.5893739094686117
2.5893739094686117
kenneth
0.1
4
4.312140507209715
4.3121405

0.06666666666666667
2
4.822966130975706
4.822966130975706
middl
0.06666666666666667
2
4.822966130975706
4.822966130975706
north
0.06666666666666667
1
5.2284312390838705
5.2284312390838705
polit
0.06666666666666667
6
3.975668270588502
3.975668270588502
stock
0.06666666666666667
15
3.1489896974040343
3.1489896974040343
uncertainti
0.06666666666666667
2
4.822966130975706
4.822966130975706
volatil
0.06666666666666667
8
3.724353842307596
3.724353842307596
wang
0.06666666666666667
4
4.312140507209715
4.312140507209715
['shari', 'ah', 'complianc', 'islam', 'bank', 'empir', 'studi', 'select', 'islam', 'bank', 'bangladesh', 'hafij', 'ullah', '15', 'mar', '2014']
Counter({'islam': 2, 'bank': 2, 'shari': 1, 'ah': 1, 'complianc': 1, 'empir': 1, 'studi': 1, 'select': 1, 'bangladesh': 1, 'hafij': 1, 'ullah': 1, '15': 1, 'mar': 1, '2014': 1})
16
['15' '2014' 'ah' 'bangladesh' 'bank' 'complianc' 'empir' 'hafij' 'islam'
 'mar' 'select' 'shari' 'studi' 'ullah']
15
0.0625
3
4.535284058523925
4.5352840585

4.535284058523925
cycl
0.07692307692307693
7
3.8421368779639797
3.8421368779639797
eliana
0.07692307692307693
15
3.1489896974040343
3.1489896974040343
financ
0.07692307692307693
17
3.031206661747651
3.031206661747651
growth
0.07692307692307693
12
3.3566290621822787
3.3566290621822787
lauretta
0.07692307692307693
15
3.1489896974040343
3.1489896974040343
polici
0.07692307692307693
4
4.312140507209715
4.312140507209715
propos
0.07692307692307693
2
4.822966130975706
4.822966130975706
relationship
0.07692307692307693
7
3.8421368779639797
3.8421368779639797
switch
0.07692307692307693
2
4.822966130975706
4.822966130975706
understand
0.07692307692307693
6
3.975668270588502
3.975668270588502
virtuou
0.07692307692307693
5
4.1298189504157605
4.1298189504157605
['present', 'intern', 'workshop', 'econom', 'scienc', 'heterogen', 'interact', 'agent', 'wehia', 'eliana', 'lauretta', '23', 'jun', '2013']
Counter({'present': 1, 'intern': 1, 'workshop': 1, 'econom': 1, 'scienc': 1, 'heterogen': 1, 'intera

0.07692307692307693
2
4.822966130975706
4.822966130975706
standard
0.07692307692307693
6
3.975668270588502
3.975668270588502
student
0.07692307692307693
3
4.535284058523925
4.535284058523925
uk
0.07692307692307693
20
2.8770559819203925
2.8770559819203925
univers
0.15384615384615385
8
3.724353842307596
3.724353842307596
['diseas', 'eldorado', 'simon', 'huston', 'jan', '2011']
Counter({'diseas': 1, 'eldorado': 1, 'simon': 1, 'huston': 1, 'jan': 1, '2011': 1})
6
['2011' 'diseas' 'eldorado' 'huston' 'jan' 'simon']
2011
0.16666666666666666
6
3.975668270588502
3.975668270588502
diseas
0.16666666666666666
2
4.822966130975706
4.822966130975706
eldorado
0.16666666666666666
1
5.2284312390838705
5.2284312390838705
huston
0.16666666666666666
28
2.5542825896573413
2.5542825896573413
jan
0.16666666666666666
34
2.366230358154402
2.366230358154402
simon
0.16666666666666666
25
2.6634818816223333
2.6634818816223333
['beni', 'culturali', 'la', 'creativit', 'artistica', 'come', 'attrattori', 'turistici', 

distribut
0.16666666666666666
3
4.535284058523925
4.535284058523925
incomebas
0.16666666666666666
2
4.822966130975706
4.822966130975706
li
0.16666666666666666
18
2.977139440477375
2.977139440477375
piotr
0.16666666666666666
18
2.977139440477375
2.977139440477375
terror
0.16666666666666666
5
4.1298189504157605
4.1298189504157605
['evolut', 'residenti', 'properti', 'price', 'premia', 'metropoli', 'reconstitut', 'contamin', 'simon', 'huston', '2010']
Counter({'evolut': 1, 'residenti': 1, 'properti': 1, 'price': 1, 'premia': 1, 'metropoli': 1, 'reconstitut': 1, 'contamin': 1, 'simon': 1, 'huston': 1, '2010': 1})
11
['2010' 'contamin' 'evolut' 'huston' 'metropoli' 'premia' 'price'
 'properti' 'reconstitut' 'residenti' 'simon']
2010
0.09090909090909091
4
4.312140507209715
4.312140507209715
contamin
0.09090909090909091
1
5.2284312390838705
5.2284312390838705
evolut
0.09090909090909091
2
4.822966130975706
4.822966130975706
huston
0.09090909090909091
28
2.5542825896573413
2.5542825896573413
met

0.1
7
3.8421368779639797
3.8421368779639797
applic
0.1
6
3.975668270588502
3.975668270588502
bank
0.1
44
2.114915929873496
2.114915929873496
predict
0.1
5
4.1298189504157605
4.1298189504157605
quantit
0.1
2
4.822966130975706
4.822966130975706
sailesh
0.1
30
2.487591215158669
2.487591215158669
tanna
0.1
31
2.455842516844089
2.455842516844089
target
0.1
3
4.535284058523925
4.535284058523925
techniqu
0.1
1
5.2284312390838705
5.2284312390838705
['theori', 'practic', 'new', 'approach', 'real', 'estat', 'risk', 'part', 'ii', 'kenneth', 'baldwin', '2005']
Counter({'theori': 1, 'practic': 1, 'new': 1, 'approach': 1, 'real': 1, 'estat': 1, 'risk': 1, 'part': 1, 'ii': 1, 'kenneth': 1, 'baldwin': 1, '2005': 1})
12
['2005' 'approach' 'baldwin' 'estat' 'ii' 'kenneth' 'new' 'part' 'practic'
 'real' 'risk' 'theori']
2005
0.08333333333333333
3
4.535284058523925
4.535284058523925
approach
0.08333333333333333
8
3.724353842307596
3.724353842307596
baldwin
0.08333333333333333
5
4.1298189504157605
4.129818

In [28]:
# tf_idf

In [42]:
for i in tf_idf:
    tf_idf[i] *= 0.3

In [60]:
len(tf_idf)

4609

In [44]:
vocab_balancing = np.zeros((len(processed_text), total_vocab_size)) #fillign with zeros to rest
for i in tf_idf:
    try:
        ind = total_vocab.index(i[1])
        vocab_balancing[i[0]][ind] = tf_idf[i]
#         print(vocab_balancing)
    except:
        pass

# vocab_balancing

In [45]:
def gen_vector(tokens):

    vocab_zeros = np.zeros((len(total_vocab)))
    
    counter = Counter(tokens)
    tok_count = len(tokens)
    
    for token in np.unique(tokens):
        
        tf = counter[token]/tok_count
        df = doc_freq(token)
        idf = math.log((len(processed_text)+1)/(df+1))

        try:
            ind = total_vocab.index(token)
            vocab_zeros[ind] = tf*idf
        except:
            pass
    return vocab_zeros

In [50]:
from scipy import spatial

def cosine_similarity(text):

    preprocessed_text = preprocess(text)
    tokens = word_tokenize(preprocessed_text)
    
    print("\nSearch input:", text)
    print("")
    print(tokens)
    
    cosine_list = []
    
    search_vector = gen_vector(tokens)
    for d in vocab_balancing:
        cal_cos = 1 - spatial.distance.cosine(search_vector, d)
        cosine_list.append(cal_cos)
    
#     print(cosine_list)
    cosine_list_sort = np.array(cosine_list).argsort()[::-1]
    
    print("")
    
    print(cosine_list_sort)
    return cosine_list_sort

In [51]:
def search_pub(text):
    indices = cosine_similarity(text)
    new_df = df.iloc[indices]
    new_df
    results = new_df.to_dict('records')
    return results

In [61]:
search_pub("lockdown and testing of the covid 19")


Search input: lockdown and testing of the covid 19

['lockdown', 'test', 'covid', '19']

[104 166  22 239  85  30  71 371 125 119 120 121 122 123 124 127 126 117
 128 129 130 131 132 133 134 135 118 115 116 105  96  97  98  99 100 101
 102 103 106 137 107 108 109 110 111 112 113 114 136 139 138 161 163 164
 165 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 162
 160  94 159 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154
 155 156 157 158  95  92  93  23  25  26  27  28  29  31  32  33  34  35
  36  37  38  39  40  41  42  43  44  24  21 184  20   1   2   3   4   5
   6   7   8   9  10  11  12  13  14  15  16  17  18  19  45  46  47  48
  72  73  74  75  76  77  78  79  80  81  82  83  84  86  87  88  89  90
  91  70  69  68  57  49  50  51  52  53  54  55  56  58  67  59  60  61
  62  63  64  65  66 183 185 370 313 304 305 306 307 308 309 310 311 312
 314 279 315 316 317 318 319 320 321 322 323 303 302 301 300 281 282 283
 284 285 286 287 288 289 290 291 2

[{'pub_link': 'https://pureportal.coventry.ac.uk/en/publications/do-lockdown-and-testing-help-in-curbing-covid-19-transmission',
  'pub_title': 'Do lockdown and testing help in curbing COVID-19 transmission?',
  'pub_date': '4 May 2020',
  'auth_name': 'Olubunmi Ajala',
  'auth_link': "['https://pureportal.coventry.ac.uk/en/persons/olubunmi-ajala']",
  'id': 104,
  'auth_name_extract': 'olubunmi ajala',
  'text': 'do lockdown and testing help in curbing covid-19 transmission? olubunmi ajala 4 May 2020'},
 {'pub_link': 'https://pureportal.coventry.ac.uk/en/publications/stress-testing-and-reverse-stress-testing-an-approach-for-a-resil',
  'pub_title': 'Stress Testing and Reverse Stress Testing An Approach for a Resilient Islamic Financial Industry',
  'pub_date': '2019',
  'auth_name': 'Samir Alamad',
  'auth_link': "['https://pureportal.coventry.ac.uk/en/persons/samir-alamad']",
  'id': 166,
  'auth_name_extract': 'samir alamad',
  'text': 'stress testing and reverse stress testing an a