Define environment variables and handler

In [None]:
%run ../env_variables.py

Import required libraries

In [None]:
import pandas as pd

In [None]:
import helpers.queries as q
import helpers.scopus_helpers as sh
from helpers import handler as h

In [None]:
author = 'mergoni'
max_date = '2021-02-01'
file_name_prefix = author + '_scopus_'

Retrieve original results from Scopus API

In [None]:
original_df = sh.retrieve_results_from_list_of_queries(
    list_of_queries = [q.mergoni_scopus_original_query],
    max_date = max_date)

Retrieve results after applying steps 1 and 2

In [None]:
lang_bias_helper_df = sh.retrieve_results_from_list_of_queries(
    list_of_queries = [q.mergoni_scopus_lang_bias_helper_query],
    max_date = max_date)

In [None]:
local_bias_helper_df__weird = sh.retrieve_results_from_list_of_queries(
    list_of_queries = q.mergoni_scopus_local_bias_helper_queries_weird,
    max_date = max_date)

In [None]:
local_bias_helper_df__non_weird = sh.retrieve_results_from_list_of_queries(
    list_of_queries = q.mergoni_scopus_local_bias_helper_queries_non_weird,
    max_date = max_date)

Analyze the difference between the original results and the new ones

In [None]:
lang_bias_helper_new_records_df = lang_bias_helper_df[
    ~lang_bias_helper_df['dc:identifier'].isin(original_df['dc:identifier'])
    ].copy().reset_index(drop=True)

In [None]:
local_bias_helper__non_weird_new_records_df = local_bias_helper_df__non_weird[
    ~local_bias_helper_df__non_weird['dc:identifier'].isin(original_df['dc:identifier'])
    ].copy().reset_index(drop=True)

In [None]:
local_bias_helper__weird_new_records_df = local_bias_helper_df__weird[
    ~local_bias_helper_df__weird['dc:identifier'].isin(original_df['dc:identifier'])
    ].copy().reset_index(drop=True)

In [None]:
df_dict = {
    'original': original_df,
    'lang_bias_helper': lang_bias_helper_df,
    'local_bias_helper__non_weird': local_bias_helper_df__non_weird,
    'local_bias_helper__weird': local_bias_helper_df__weird,
    'lang_bias_helper_new_records': lang_bias_helper_new_records_df,
    'local_bias_helper__non_weird_new_records': local_bias_helper__non_weird_new_records_df,
    'local_bias_helper__weird_new_records': local_bias_helper__weird_new_records_df
    }

In [None]:
if h.save_to_csv:
    sh.export_to_csv(original_df, file_name_prefix + 'original')
    sh.export_to_csv(lang_bias_helper_df, file_name_prefix + 'lang_bias_helper')
    sh.export_to_csv(local_bias_helper_df__weird, file_name_prefix + 'local_bias_helper__weird')
    sh.export_to_csv(local_bias_helper_df__non_weird, file_name_prefix + 'local_bias_helper__non_weird')
    sh.export_to_csv(lang_bias_helper_new_records_df, file_name_prefix + 'lang_bias_helper_new_records')
    sh.export_to_csv(local_bias_helper__non_weird_new_records_df,
                    file_name_prefix + 'local_bias_helper__non_weird_new_records')
    sh.export_to_csv(local_bias_helper__weird_new_records_df,
                    file_name_prefix + 'local_bias_helper__weird_new_records')

In [None]:
report = dict()

In [None]:
for key, value in df_dict.items():
    report[key] = len(value)
    report[key + '_localized'] = len(value[value['localization_in_title']])

In [None]:
report_df = pd.DataFrame.from_dict(
    report, orient='index', columns=['count']).reset_index()

In [None]:
sh.export_to_csv(report_df, file_name_prefix + 'report')

In [None]:
lang_bias_helper_new_records_df['dc:title'].to_clipboard(index=False, header=False)