In [2]:
def install_missing_packages(package_names):
    """
    Install Missing Packages

    This function checks if a list of packages is already installed and installs any missing packages using pip.

    Parameters:
    - package_names (list): A list of package names to be installed.

    Returns:
    - None

    Note: This function requires the `subprocess` and `importlib` modules to be imported.

    Example Usage:
    install_missing_packages(['h2o', 'numpy', 'pandas'])
    """
    import importlib
    import subprocess


    for package_name in package_names:
        try:
            importlib.import_module(package_name)
            print(f"{package_name} package is already installed")
        except ImportError:
            print(f"{package_name} package not found, installing with pip...")
            subprocess.call(['pip', 'install', package_name])


In [3]:
package_list = ["bibtexparser"]
install_missing_packages(package_list)

bibtexparser package not found, installing with pip...
Collecting bibtexparser
  Downloading bibtexparser-1.4.0.tar.gz (51 kB)
[2K     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 51.9/51.9 kB 2.5 MB/s eta 0:00:00
[?25h  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: bibtexparser
  Building wheel for bibtexparser (setup.py): started
  Building wheel for bibtexparser (setup.py): finished with status 'done'
  Created wheel for bibtexparser: filename=bibtexparser-1.4.0-py3-none-any.whl size=42429 sha256=424cc75e91bdf0be7eda20a77aff576c6367f0ffe2cc718551f3148d0b260a96
  Stored in directory: /Users/yil1/Library/Caches/pip/wheels/6d/48/ea/211993480bbd28915707cff265dc40aa95db736838b6d014a8
Successfully built bibtexparser
Installing collected packages: bibtexparser
Successfully installed bibtexparser-1.4.0


In [4]:
import os
import re
import bibtexparser
from bibtexparser.bwriter import BibTexWriter
import webbrowser
 
def generate_citation_key(author, year, title):
    """
    Generate a citation key by combining the last name of the first author, the first significant word from the title,
    and the year. The generated key is in lowercase.
 
    Args:
        author (str): The name of the author in 'Last, First' format.
        year (str): The year of publication.
        title (str): The title of the work.
 
    Returns:
        citation_key (str): The generated citation key in the format 'lastname_firstword_year'
    """
    exclude_words = ['the', 'a', 'an']
    first_word = re.findall(r'\b(\w+)\b', title)
    first_word = [word.lower() for word in first_word if word.lower() not in exclude_words][0]
    first_author_parts = author.split(', ')
    first_author_last_name = first_author_parts[0].lower()
    citation_key = f"{first_author_last_name}_{first_word}_{year}"
    return citation_key
 
def extract_info(filepath, info_option=1, counts_to_extract=200, start_count=0):
    """
    Extracts information from a .bib file, sorts entries by citation count and year, and returns formatted strings
    based on the info_option.
 
    Args:
        filepath (str): The complete file path of the .bib file.
        info_option (int, optional): Determines the format of the output strings.
            1: citation count and title only.
            2: citation key, title, author, and year.
            3: citation key, title, author, year, and abstract.
            Default is 1.
        counts_to_extract (int, optional): Maximum number of entries to extract. Default is 200.
        start_count (int, optional): Index to start extraction from (0-based). Default is 0.
 
    Returns:
        A list of formatted strings representing the extracted entries.
    """
    with open(filepath, encoding='utf-8') as bibtex_file:
        bib_database = bibtexparser.load(bibtex_file)
 
    data = []
    for entry in bib_database.entries:
        title = entry.get('title', '')
        authors = entry.get('author', '').split(' and ')[0]  # Only consider the first author
        year = entry.get('year', '0000')  # Default year set to '0000' if not available
        cite = generate_citation_key(authors, year, title)  # Generate citation key
        abstract = entry.get('abstract', '')  # Extract abstract
        note = entry.get('note', '')
        match = re.search(r'cited By (\d+)', note, re.IGNORECASE)
        citation_count = match.group(1) if match else '0'  # Default citation count set to '0' if not available
 
        data.append({
            'cite': cite,
            'title': title,
            'authors': authors,
            'year': year,
            'abstract': abstract,
            'citation_count': citation_count
        })
 
    # Sort data by citation count, year in reverse order, then by authors and cite
    data.sort(key=lambda x: (-int(x['citation_count']), -int(x['year']), x['authors'], x['cite']))
    return format_data(data[start_count:counts_to_extract], info_option)
 
def format_data(data, info_option):
    """
    Formats the extracted entries from a .bib file and writes the output to an HTML file. Also opens the HTML file in a web browser.
 
    Args:
        data (list): List of dictionaries representing the extracted entries. Each dictionary contains keys 'cite', 'title', 'authors', 'year', 'abstract', and 'citation_count'.
        info_option (int): Determines the format of the output strings.
            1: citation count and title only.
            2: citation key, title, author, and year.
            3: citation key, title, author, year, and abstract.
 
    Returns:
        A list of formatted strings representing the extracted entries.
    """
    data_str = []
    for entry in data:
        if info_option == 1:
            entry_str = f"{entry['citation_count']}: {entry['title']}"
        elif info_option == 2:
            entry_str = f"{entry['cite']}: {entry['title']} ({entry['authors']}, {entry['year']})"
        elif info_option == 3:
            entry_str = f"{entry['cite']}: {entry['title']} ({entry['authors']}, {entry['year']})\n{entry['abstract']}"
        data_str.append(entry_str)
 
    html_content = "<br>".join(data_str)
    html_file_path = os.path.join(os.getcwd(), "paper", "output.html")
    with open(html_file_path, 'w', encoding='utf-8') as html_file:
        html_file.write(html_content)
 
    webbrowser.open_new_tab('file://' + os.path.realpath(html_file_path))
    return data_str
 

In [5]:
# Get the current working directory
current_dir = os.getcwd()
print(current_dir)

/Users/yil1/p2p-model-bondora/code paper 2


In [11]:
# Define the file name
file_name = 'reference.bib'

# Build the complete file path
file_path = os.path.join(current_dir, "paper", file_name)
print(file_path)

/Users/yil1/p2p-model-bondora/code paper 2/paper/reference.bib


In [12]:
# Extract information from the file and print the result
# Option 1: Just titles and citation counts
print(extract_info(file_path, info_option=1, counts_to_extract=10))

['0: Application of credit-scoring methods in a decision support system of investment for peer-to-peer lending', '0: Concealing borrowers’ failure history in online {P2P} lending: {A} natural experiment', '0: Interdependence between online peer-to-peer lending and cryptocurrency markets and its effects on financial inclusion', '0: Actions speak louder than words: {Imputing} users’ reputation from transaction history', '0: Intelligent credit scoring using deep learning methods', '0: Psychological distancing and language intensity in {Peer}-to-{Peer} lending', '0: Pricing strategies in {BigTech} lending: {Evidence} from {China}', '0: Does green credit financing mode with cap-and-trade scheme really benefit all members?', '0: Big data, artificial intelligence and machine learning: {A} transformative symbiosis in favour of financial technology', '0: The perfect bail-in: {Financing} without banks using peer-to-peer lending']


In [13]:
# Extract and print 80 entries with citation keys, titles, authors, and years
print(extract_info(file_path, info_option=2, counts_to_extract=80))

['babaei_application_2023: Application of credit-scoring methods in a decision support system of investment for peer-to-peer lending (Babaei, Golnoosh, 2023)', 'bai_concealing_2023: Concealing borrowers’ failure history in online {P2P} lending: {A} natural experiment (Bai, Jiaru, 2023)', 'chung_interdependence_2023: Interdependence between online peer-to-peer lending and cryptocurrency markets and its effects on financial inclusion (Chung, Sunghun, 2023)', 'deng_actions_2023: Actions speak louder than words: {Imputing} users’ reputation from transaction history (Deng, Jiaying, 2023)', 'gicić_intelligent_2023: Intelligent credit scoring using deep learning methods (Gicić, Adaleta, 2023)', 'huang_psychological_2023: Psychological distancing and language intensity in {Peer}-to-{Peer} lending (Huang, Jin, 2023)', 'lu_pricing_2023: Pricing strategies in {BigTech} lending: {Evidence} from {China} (Lu, Lei, 2023)', 'ma_does_2023: Does green credit financing mode with cap-and-trade scheme real

In [14]:
# Extract and print top 10 entries with citation keys, titles, authors, and years
print(extract_info(file_path, info_option=2, counts_to_extract=10, start_count=0))

['babaei_application_2023: Application of credit-scoring methods in a decision support system of investment for peer-to-peer lending (Babaei, Golnoosh, 2023)', 'bai_concealing_2023: Concealing borrowers’ failure history in online {P2P} lending: {A} natural experiment (Bai, Jiaru, 2023)', 'chung_interdependence_2023: Interdependence between online peer-to-peer lending and cryptocurrency markets and its effects on financial inclusion (Chung, Sunghun, 2023)', 'deng_actions_2023: Actions speak louder than words: {Imputing} users’ reputation from transaction history (Deng, Jiaying, 2023)', 'gicić_intelligent_2023: Intelligent credit scoring using deep learning methods (Gicić, Adaleta, 2023)', 'huang_psychological_2023: Psychological distancing and language intensity in {Peer}-to-{Peer} lending (Huang, Jin, 2023)', 'lu_pricing_2023: Pricing strategies in {BigTech} lending: {Evidence} from {China} (Lu, Lei, 2023)', 'ma_does_2023: Does green credit financing mode with cap-and-trade scheme real

In [15]:
# Extract and print top 10 entries with citation keys, titles, authors, years, and abstracts
print(extract_info(file_path, info_option=3, counts_to_extract=10, start_count=0))

['babaei_application_2023: Application of credit-scoring methods in a decision support system of investment for peer-to-peer lending (Babaei, Golnoosh, 2023)\nAbstract Peer-to-peer lending, as a novel lending model, has challenged investors to make effective investment decisions. Issued loans are grouped into default and nondefault. Therefore, different classification methods can be utilized to predict the status of loans in the future. Our study aim is to propose an investment decision model based on the nondefault loans predicted using three different classifiers, including random forest (RF) that is a multitude of decision trees, support vector machine, and naïve Bayes. In fact, we combine each of these classifiers with the portfolio optimization problem to understand which combination leads to the best portfolio concerning the risk and return. In order to find the best combined model, numerical studies are conducted based on real-world data. In addition, the performances of these c