In [1]:
import os, re, copy, json, pprint, argparse, warnings, xmltojson, shutil
import pandas as pd
from pathlib import Path
import bibtexparser
from collections import Counter
warnings.filterwarnings('ignore')

`PURPOSE`: authors populated based on the citation counts by financial anomalies related literature. 

Steps:
1. Get the titles from `reference.bib`
2. Match the title to the refrence file to get metadata 
3. count the number of times the title is cited by financial anomalies related literature collected in file `financial_anomalies_cited_network_raw`

In [2]:
with open('../financialanomalies/references.bib') as bibtex_file:
    bib_database = bibtexparser.load(bibtex_file)

def match_title(title_str):
    for item in bib_database.entries:
        title_str= re.sub('[^a-zA-Z0-9]', ' ', title_str)
        title_to_be_compared= re.sub('[^a-zA-Z0-9]', ' ', item.get('title').lower())

        if title_str==title_to_be_compared:
            return item.get('title').lower(), item.get('journal').lower(), item.get('author').lower()
        
with open("financial_anomalies_cited_network_raw", "r") as fp:
    academic_networks = json.load(fp)

In [3]:
counter = 0
all_titles=[]
for item in academic_networks:
    if not isinstance(item.get('main_title'), type(None)) | isinstance(item.get('cited_titles'), type(None)):
        all_titles.append([re.sub('[^a-zA-Z0-9]', ' ', sitem[0].lower().strip())  
                           for sitem in item.get('cited_titles') if not isinstance(sitem[0], type(None))])
        
all_titles = sorted([(v,k) for k,v in dict(Counter([subitem for item in all_titles for subitem in item ])).items()], reverse=True)
meta_data={}
for sitem in all_titles:
    try:
        meta_data['title']=sitem[1]
        meta_data['authors']=match_title(sitem[1])[2]
        meta_data['citation_count']=sitem[0]
        print(meta_data)
    except Exception:
        pass
   

{'title': 'common risk factors in the returns on stocks and bonds', 'authors': 'fama, eugene f and french, kenneth r', 'citation_count': 100}
{'title': 'the cross section of expected stock returns', 'authors': 'fama, eugene f and french, kenneth r', 'citation_count': 97}
{'title': 'returns to buying winners and selling losers  implications for stock market efficiency', 'authors': 'jegadeesh, narasimhan and titman, sheridan', 'citation_count': 63}
{'title': 'on persistence in mutual fund performance', 'authors': 'carhart, mark m', 'citation_count': 54}
{'title': 'risk  return  and equilibrium  empirical tests', 'authors': 'fama, eugene f and macbeth, james d', 'citation_count': 52}
{'title': 'liquidity risk and expected stock returns', 'authors': 'pastor, lubos and stambaugh, robert f', 'citation_count': 32}
{'title': 'evidence of predictable behavior of security returns', 'authors': 'jegadeesh, narasimhan', 'citation_count': 30}
{'title': 'the cross section of volatility and expected r

In [4]:
!clear
!git add .
!git status
!git commit -m "frequency of cited titles"
!git push
!clear 


[H[2J