In [45]:
"""
Script to read data from json databases and save them as DataFrames

Created on: Tue May 5 23:02:40 CEST 2018
@author: Wiktor

TO DO:
- how to deal with studies with multiple authors?
"""

import pandas as pd
import json
from pprint import pprint
import os
import sys


if sys.platform == "linux":
    studies_path = "/home/wiktor/git/MasterThesis/Literature/Competition_Studies_Database/studies.json"
    authors_path = "/home/wiktor/git/MasterThesis/Literature/Competition_Studies_Database/authors.json"   
else:    
    studies_path = "/Users/Wiktor/Dropbox/Git/MasterThesis/Literature/Competition_Studies_Database/studies.json"
    authors_path = "/Users/Wiktor/Dropbox/Git/MasterThesis/Literature/Competition_Studies_Database/authors.json"


# Reading authors database
with open(os.path.join(authors_path)) as file:
    authors = json.load(file)

def get_author_institution(json_authors):
    """
    Function that reads in the database .json file and returns flat pd.DataFrame with author and her/his institutions 
    """
    
    # Load authors and list of their affiliations as DataFrame
    institutions = pd.DataFrame.from_dict(json_authors, orient='index')['affiliations']

    # List of authors that will be mapped to institutions
    # (necessary since there can be more than one institution per author)
    authors_index = institutions.index.values

    # Merge all institutions into single list and to each institution append corresponding author (dict)
    authors_institutions = [dict(affiliation, **{'author': author}) for ls_affiliations, author in zip(institutions, authors_index)
                                                            for affiliation in ls_affiliations]

    # Convert authors institutions to DataFrame
    authors_institutions = pd.DataFrame(authors_institutions)
    
    return authors_institutions

# Reading studies database
with open(os.path.join(studies_path)) as file:
    studies = json.load(file)
    
    
def get_study_author(json_studies):
    
    # Load articles and their authors as a list
    articles = pd.DataFrame.from_dict(studies, orient='index')['authors']

    # Create index of aticles
    articles_index = articles.index.values

    # Join all authors into a single list and to each author assign corresponding article 
    studies_authors = [dict(author, **{'study': article}) for ls_articles, article in zip(articles, articles_index) 
                                                          for author in ls_articles]

    # Get only author and study index (ignore name and surname)
    studies_authors = pd.DataFrame(studies_authors)[['index', 'study']]

    studies_authors.columns = ['author_index', 'study_index']

    return studies_authors


In [22]:
authors_institutions = get_author_institution(authors)
studies_authors = get_study_author(studies)

# Number of unique authors (126)
authors_institutions['author'].nunique()

#Number of authors affiliated with university (97)
sum(authors_institutions['university'])

# Number of authors affiliated with central bank (19)
sum(authors_institutions['central bank'])

# Number of authors affiliated with governmental institutions (13)
sum(authors_institutions['governmental'])

# Number of authors affiliated with governmental institutions (12)
sum(authors_institutions['private'])

13

### Authors statsistics

|                                         |     |
|-----------------------------------------|-----|
| # of authors                            | 126 |
| # of authors affiliated with CB         | 19  |
| # of authors affiliated with university | 97  |
| # of authors affiliated with private    | 13  |
| # of authors affiliated with government | 12  |

In [47]:
authors_institutions

Unnamed: 0,author,central bank,governmental,institution,private,share,university
0,Adnan_Kasman,0,0,Dokuz Eylul University,0,1.00,1
1,Ahmad_Binti,0,0,University of Malaya,0,1.00,1
2,Ahmed_Ammar,0,0,University of Sfax,0,1.00,1
3,Alessandro_Giustiniani,0,1,International Monetaty Fund,0,1.00,0
4,Ali_Kutan,0,0,University of Southern Illinois,0,1.00,1
5,Alli_Nathan,0,0,University of Calgary,0,1.00,1
6,Ananthakrishnan_Prasad,0,1,International Monetary Fund,0,1.00,0
7,Anastasia_Fillipaki,1,0,Bank of Grece,0,0.50,0
8,Anastasia_Fillipaki,0,0,Athens University,0,0.50,1
9,Anthony_Rezitis,0,0,University of Patras,0,1.00,1
