In [1]:
import pandas as pd
import numpy as np
from os.path import join
import requests
import sqlite3

In [2]:
from peewee import SqliteDatabase, fn, JOIN
from playhouse.reflection import generate_models, print_model, print_table_sql

In [3]:
db = SqliteDatabase('star.db')
models = generate_models(db)

globals().update({
    "Paper": models['papers'],
    "DomainPaperToField": models['domain_paper_to_field'],
    "DomainPaperToMethod": models['domain_paper_to_method']
})

In [4]:
# Get unique fields
unique_field_rows = (
    DomainPaperToField
        .select(DomainPaperToField.field)
        .distinct()
)
unique_fields = [ row.field for row in unique_field_rows ]
unique_fields

['Engineering',
 'Computer Science',
 'Materials Science',
 'Medicine',
 'Environmental Science',
 'Geography',
 'Biology',
 'Geology',
 'Chemistry',
 'Mathematics',
 'Physics',
 'Business',
 'Economics',
 'Education',
 'Philosophy',
 'Sociology',
 'Psychology',
 'Art',
 'History',
 'Political Science',
 'Agricultural And Food Sciences',
 'Linguistics',
 'Law']

In [5]:
field_count_rows = (
    DomainPaperToField
        .select(DomainPaperToField.field, DomainPaperToField.ss_id, Paper.is_preprint, Paper.is_domain_full, fn.COUNT(DomainPaperToField.ss_id).alias('paper_count'))
        .where(Paper.is_preprint == 0, Paper.is_domain_full == 1)
        .join(Paper, on=(DomainPaperToField.ss_id == Paper.ss_id))
        .group_by(DomainPaperToField.field)
)

field_count_df = pd.DataFrame(data=[ { "field": row.field, "paper_count": row.paper_count } for row in field_count_rows ])
field_count_df.to_csv('papers_by_field-is_domain_full.csv')
field_count_df

Unnamed: 0,field,paper_count
0,Agricultural And Food Sciences,137
1,Art,206
2,Biology,2696
3,Business,451
4,Chemistry,868
5,Economics,483
6,Education,112
7,Environmental Science,1232
8,Geography,681
9,Geology,611


In [6]:
field_count_2_rows = (
    DomainPaperToField
        .select(DomainPaperToField.field, DomainPaperToField.ss_id, Paper.is_preprint, Paper.is_domain_partial, fn.COUNT(DomainPaperToField.ss_id).alias('paper_count'))
        .where(Paper.is_preprint == 0, Paper.is_domain_partial == 1)
        .join(Paper, on=(DomainPaperToField.ss_id == Paper.ss_id))
        .group_by(DomainPaperToField.field)
)

field_count_2_df = pd.DataFrame(data=[ { "field": row.field, "paper_count": row.paper_count } for row in field_count_2_rows ])
field_count_2_df.to_csv('papers_by_field-is_domain_partial.csv')
field_count_2_df

Unnamed: 0,field,paper_count
0,Agricultural And Food Sciences,145
1,Art,406
2,Biology,4319
3,Business,514
4,Chemistry,1026
5,Computer Science,22950
6,Economics,755
7,Education,127
8,Engineering,926
9,Environmental Science,1551


In [7]:
venue_count_rows = (
    Paper
        .select(Paper.venue, fn.COUNT(Paper.ss_id).alias('paper_count'))
        .where(Paper.is_preprint == 0, Paper.is_domain_full == 1)
        .group_by(Paper.venue)
)

venue_count_df = pd.DataFrame(data=[ { "venue": row.venue, "paper_count": row.paper_count } for row in venue_count_rows ])
venue_count_df = venue_count_df.replace('', np.nan).dropna(subset=['venue'])
venue_count_df = venue_count_df.sort_values(by='paper_count', ascending=False)
venue_count_df.to_csv('papers_by_venue-is_domain_full.csv')
venue_count_df

Unnamed: 0,venue,paper_count
3194,Scientific Reports,409
2799,PLoS ONE,289
2632,Nature Communications,272
2460,Measurement science and technology,86
1324,Frontiers in Genetics,79
...,...,...
1545,IEEE transactions on bio-medical engineering,1
1549,IEEE/PES Transmission and Distribution Confere...,1
1550,IEICE ESS FUNDAMENTALS REVIEW,1
1556,IISE Transactions on Healthcare Systems Engine...,1


In [8]:
venue_count_2_rows = (
    Paper
        .select(Paper.venue, fn.COUNT(Paper.ss_id).alias('paper_count'))
        .where(Paper.is_preprint == 0, Paper.is_domain_partial == 1)
        .group_by(Paper.venue)
)

venue_count_2_df = pd.DataFrame(data=[ { "venue": row.venue, "paper_count": row.paper_count } for row in venue_count_2_rows ])
venue_count_2_df = venue_count_2_df.replace('', np.nan).dropna(subset=['venue'])
venue_count_2_df = venue_count_2_df.sort_values(by='paper_count', ascending=False)
venue_count_2_df.to_csv('papers_by_venue-is_domain_partial.csv')
venue_count_2_df

Unnamed: 0,venue,paper_count
1821,IEEE Transactions on Image Processing,593
1831,IEEE Transactions on Neural Networks and Learn...,587
3756,Scientific Reports,550
2223,Italian National Conference on Sensors,541
1833,IEEE Transactions on Pattern Analysis and Mach...,533
...,...,...
1856,IEICE Transactions on Fundamentals of Electron...,1
1857,"IET Radar, Sonar &amp; Navigation",1
1862,IJISCS (International Journal of Information S...,1
1863,IMA Journal of Applied Mathematics,1


In [11]:
method_field_rows = (DomainPaperToField
         .select(DomainPaperToField.field, DomainPaperToMethod.method_acronym, fn.COUNT(DomainPaperToField.ss_id).alias('paper_count'))
         .where(Paper.is_preprint == 0, Paper.is_domain_full == 1)
         .join(DomainPaperToMethod, on=(DomainPaperToField.ss_id == DomainPaperToMethod.ss_id))
         .join(Paper, on=(DomainPaperToField.ss_id == Paper.ss_id))
         .group_by(DomainPaperToField.field, DomainPaperToMethod.method_acronym)
)

method_field_df = pd.DataFrame(data=[
    {
        "field": row.field,
        "method_acronym": row.domain_paper_to_method.method_acronym,
        "paper_count": row.paper_count
    } for row in method_field_rows
])
method_field_df.to_csv('papers_by_field_and_method-is_domain_full.csv')
method_field_df

Unnamed: 0,field,method_acronym,paper_count
0,Agricultural And Food Sciences,F-MVU,1
1,Agricultural And Food Sciences,FA,12
2,Agricultural And Food Sciences,G-SVD,1
3,Agricultural And Food Sciences,GDA,1
4,Agricultural And Food Sciences,IDMAP,2
...,...,...,...
688,Sociology,SMA,4
689,Sociology,SNE,1
690,Sociology,T-SNE,5
691,Sociology,TF,1


In [12]:
method_field_2_rows = (DomainPaperToField
         .select(DomainPaperToField.field, DomainPaperToMethod.method_acronym, fn.COUNT(DomainPaperToField.ss_id).alias('paper_count'))
         .where(Paper.is_preprint == 0, Paper.is_domain_partial == 1)
         .join(DomainPaperToMethod, on=(DomainPaperToField.ss_id == DomainPaperToMethod.ss_id))
         .join(Paper, on=(DomainPaperToField.ss_id == Paper.ss_id))
         .group_by(DomainPaperToField.field, DomainPaperToMethod.method_acronym)
)

method_field_2_df = pd.DataFrame(data=[
    {
        "field": row.field,
        "method_acronym": row.domain_paper_to_method.method_acronym,
        "paper_count": row.paper_count
    } for row in method_field_2_rows
])
method_field_2_df.to_csv('papers_by_field_and_method-is_domain_partial.csv')
method_field_2_df

Unnamed: 0,field,method_acronym,paper_count
0,Agricultural And Food Sciences,F-MVU,1
1,Agricultural And Food Sciences,FA,12
2,Agricultural And Food Sciences,G-SVD,1
3,Agricultural And Food Sciences,GDA,1
4,Agricultural And Food Sciences,IDMAP,2
...,...,...,...
970,Sociology,SMA,4
971,Sociology,SNE,2
972,Sociology,T-SNE,5
973,Sociology,TF,4


In [14]:
method_venue_rows = (DomainPaperToMethod
         .select(DomainPaperToMethod.method_acronym, Paper.venue, fn.COUNT(Paper.ss_id).alias('paper_count'))
         .where(Paper.is_preprint == 0, Paper.is_domain_full == 1)
         .join(Paper, on=(DomainPaperToMethod.ss_id == Paper.ss_id))
         .group_by(Paper.venue, DomainPaperToMethod.method_acronym)
)

method_venue_df = pd.DataFrame(data=[
    {
        "venue": row.papers.venue,
        "method_acronym": row.method_acronym,
        "paper_count": row.paper_count
    } for row in method_venue_rows
])
method_venue_df = method_venue_df.replace('', np.nan).dropna(subset=['venue'])
method_venue_df.to_csv('papers_by_venue_and_method-is_domain_full.csv')
method_venue_df

Unnamed: 0,venue,method_acronym,paper_count
69,11TH INTERNATIONAL CONFERENCE ON MATHEMATICAL ...,AE,1
70,2013 IEEE 8th Conference on Industrial Electro...,LDA,1
71,2014 IEEE Far East Forum on Nondestructive Eva...,TF,1
72,2014 IEEE International Ultrasonics Symposium,LDA,1
73,2016 HONET-ICT,NMF,1
...,...,...,...
7097,www.amfiteatrueconomic.ro,LMNN,1
7098,İktisadi İdari ve Siyasal Araştırmalar Dergisi,NMF,1
7099,Вестник университета,PCA,1
7100,Полис. Политические исследования,LDA,1


In [15]:
method_venue_2_rows = (DomainPaperToMethod
         .select(DomainPaperToMethod.method_acronym, Paper.venue, fn.COUNT(Paper.ss_id).alias('paper_count'))
         .where(Paper.is_preprint == 0, Paper.is_domain_partial == 1)
         .join(Paper, on=(DomainPaperToMethod.ss_id == Paper.ss_id))
         .group_by(Paper.venue, DomainPaperToMethod.method_acronym)
)

method_venue_2_df = pd.DataFrame(data=[
    {
        "venue": row.papers.venue,
        "method_acronym": row.method_acronym,
        "paper_count": row.paper_count
    } for row in method_venue_2_rows
])
method_venue_2_df = method_venue_2_df.replace('', np.nan).dropna(subset=['venue'])
method_venue_2_df.to_csv('papers_by_venue_and_method-is_domain_partial.csv')
method_venue_2_df

Unnamed: 0,venue,method_acronym,paper_count
68,#MSM,TF,1
69,11TH INTERNATIONAL CONFERENCE ON MATHEMATICAL ...,AE,1
70,2013 IEEE 13th International Conference on Dat...,DM,1
71,2013 IEEE 8th Conference on Industrial Electro...,LDA,1
72,2013 IEEE International Conference on Image Pr...,NMF,1
...,...,...,...
10501,npj Systems Biology and Applications,UMAP,3
10502,npj Vaccines,T-SNE,1
10503,www.amfiteatrueconomic.ro,LMNN,1
10504,Вестник университета,PCA,1
