In [1]:
import requests

In [2]:
from scholarly import scholarly


def get_citations(author_name):
    search_query = scholarly.search_author(author_name)
    try:
        author = next(search_query)
        author = scholarly.fill(author, sections=["basics", "indices"])

        h_index = author.get("hindex", 0)
        i10_index = author.get("i10index", 0)
        return h_index, i10_index
    except StopIteration:
        print("Автор не найден")
        return 0, 0
    except Exception as e:
        print(f"Произошла ошибка: {e}")
        return 0, 0

In [3]:
import xml.etree.ElementTree as ET

from tqdm import tqdm


def fetch_arxiv_papers(query, max_results=10):
    base_url = "http://export.arxiv.org/api/query?"
    query = f'search_query=all:"{query}"&start=0&max_results={max_results}&sortBy=submittedDate&sortOrder=descending'
    url = base_url + query

    response = requests.get(url)
    if response.status_code != 200:
        print(f"Ошибка при запросе: {response.status_code}")
        return None

    root = ET.fromstring(response.content)
    papers = []
    for entry in tqdm(root.findall("{http://www.w3.org/2005/Atom}entry")):
        authors = [author.find("{http://www.w3.org/2005/Atom}name").text for author in entry.findall("{http://www.w3.org/2005/Atom}author")]
        paper_info = {
            "title": entry.find("{http://www.w3.org/2005/Atom}title").text,
            "summary": entry.find("{http://www.w3.org/2005/Atom}summary").text,
            "published": entry.find("{http://www.w3.org/2005/Atom}published").text,
            "link": entry.find("{http://www.w3.org/2005/Atom}id").text.strip(),
            "authors": authors
        }

        h_indices = []
        i10_indices = []

        for author in authors:
            h_index, i10_index = get_citations(author)
            h_indices.append(h_index)
            i10_indices.append(i10_index)

        average_h_index = sum(h_indices) / len(h_indices) if h_indices else 0
        average_i10_index = sum(i10_indices) / len(i10_indices) if i10_indices else 0

        paper_info["h_index"] = average_h_index
        paper_info["i10_index"] = average_i10_index

        papers.append(paper_info)

    return papers

In [4]:
papers_data = fetch_arxiv_papers("recommender systems", max_results=20)


  0%|          | 0/20 [00:00<?, ?it/s]

Автор не найден


 15%|█▌        | 3/20 [00:42<04:18, 15.20s/it]

Автор не найден


 20%|██        | 4/20 [01:04<04:41, 17.58s/it]

Автор не найден


 25%|██▌       | 5/20 [01:07<03:07, 12.49s/it]

Автор не найден


 30%|███       | 6/20 [01:22<03:06, 13.30s/it]

Автор не найден


 35%|███▌      | 7/20 [02:10<05:20, 24.63s/it]

Автор не найден


 40%|████      | 8/20 [02:27<04:26, 22.20s/it]

Автор не найден


 45%|████▌     | 9/20 [02:47<03:56, 21.53s/it]

Автор не найден


 55%|█████▌    | 11/20 [03:24<03:00, 20.02s/it]

Автор не найден
Автор не найден


 65%|██████▌   | 13/20 [03:59<02:12, 18.90s/it]

Автор не найден
Автор не найден


 80%|████████  | 16/20 [05:40<01:53, 28.40s/it]

Автор не найден


 90%|█████████ | 18/20 [07:37<01:29, 44.64s/it]

Автор не найден


 95%|█████████▌| 19/20 [07:46<00:33, 33.99s/it]

Автор не найден


100%|██████████| 20/20 [08:46<00:00, 26.32s/it]

Автор не найден





In [5]:
papers_data[:2]

[{'title': 'Can Large Language Models Assess Serendipity in Recommender Systems?',
  'summary': "  Serendipity-oriented recommender systems aim to counteract\nover-specialization in user preferences. However, evaluating a user's\nserendipitous response towards a recommended item can be challenging because of\nits emotional nature. In this study, we address this issue by leveraging the\nrich knowledge of large language models (LLMs), which can perform a variety of\ntasks. First, this study explored the alignment between serendipitous\nevaluations made by LLMs and those made by humans. In this investigation, a\nbinary classification task was given to the LLMs to predict whether a user\nwould find the recommended item serendipitously. The predictive performances of\nthree LLMs on a benchmark dataset in which humans assigned the ground truth of\nserendipitous items were measured. The experimental findings reveal that\nLLM-based assessment methods did not have a very high agreement rate wit

In [6]:
papers_data

[{'title': 'Can Large Language Models Assess Serendipity in Recommender Systems?',
  'summary': "  Serendipity-oriented recommender systems aim to counteract\nover-specialization in user preferences. However, evaluating a user's\nserendipitous response towards a recommended item can be challenging because of\nits emotional nature. In this study, we address this issue by leveraging the\nrich knowledge of large language models (LLMs), which can perform a variety of\ntasks. First, this study explored the alignment between serendipitous\nevaluations made by LLMs and those made by humans. In this investigation, a\nbinary classification task was given to the LLMs to predict whether a user\nwould find the recommended item serendipitously. The predictive performances of\nthree LLMs on a benchmark dataset in which humans assigned the ground truth of\nserendipitous items were measured. The experimental findings reveal that\nLLM-based assessment methods did not have a very high agreement rate wit

In [7]:
import psycopg2

In [9]:

# Параметры подключения к вашей базе данных
conn_params = {
    "host": "localhost",
    "database": "postgres",
    "user": "postgres",
    "password": "mysecretpassword"
}

try:
    # Подключение к базе данных
    conn = psycopg2.connect(**conn_params)
    cursor = conn.cursor()

    # Создание таблицы articles
    cursor.execute("""
        CREATE TABLE IF NOT EXISTS articles (
            id SERIAL PRIMARY KEY,
            title TEXT NOT NULL,
            summary TEXT,
            published TIMESTAMP WITH TIME ZONE,
            link TEXT NOT NULL UNIQUE,
            h_index FLOAT,
            i10_index FLOAT
        );
    """)

    # Создание таблицы authors
    cursor.execute("""
        CREATE TABLE IF NOT EXISTS authors (
            id SERIAL PRIMARY KEY,
            name TEXT NOT NULL,
            article_id INTEGER,
            FOREIGN KEY (article_id) REFERENCES articles(id),
            UNIQUE (name, article_id)
        );
    """)

    # Сохранение изменений
    conn.commit()
    print("Таблица успешно создана.")

except (Exception, psycopg2.DatabaseError) as error:
    print(f"Ошибка при создании таблицы: {error}")
finally:
    # Закрытие соединения с базой данных
    if conn is not None:
        cursor.close()
        conn.close()
        print("Соединение с базой данных закрыто.")


Таблица успешно создана.
Соединение с базой данных закрыто.


In [10]:
conn_params = {
    "host": "localhost",
    "database": "postgres",
    "user": "postgres",
    "password": "mysecretpassword"
}

In [11]:
def articles2db(conn_params, data):
    if isinstance(data, dict):
        data = [data]

    conn = None

    try:
        conn = psycopg2.connect(**conn_params)
        cursor = conn.cursor()

        for item in data:
            cursor.execute("""
                INSERT INTO articles (title, summary, published, link, h_index, i10_index)
                VALUES (%s, %s, %s, %s, %s, %s)
                ON CONFLICT (link) DO NOTHING
                RETURNING id;
            """, (item["title"], item["summary"], item["published"], item["link"], item["h_index"], item["i10_index"]))
            article_id = cursor.fetchone()[0] if cursor.rowcount > 0 else None

            if article_id:
                for author in item["authors"]:
                    cursor.execute("""
                        INSERT INTO authors (name, article_id) VALUES (%s, %s)
                        ON CONFLICT (name, article_id) DO NOTHING;
                    """, (author, article_id)
                    )

        conn.commit()
    except (Exception, psycopg2.DatabaseError) as error:
        print(f"Ошибка при создании таблицы: {error}")
        if conn:
            conn.rollback()
    finally:
        if conn is not None:
            cursor.close()
            conn.close()
            print("Соединение с базой данных закрыто.")

In [12]:
articles2db(conn_params, papers_data)

Соединение с базой данных закрыто.


In [13]:
import psycopg2

# Установление соединения
conn = psycopg2.connect(**conn_params)
cursor = conn.cursor()

# Выполнение запроса
cursor.execute("""
SELECT table_name
FROM information_schema.tables
WHERE table_schema = 'public'
ORDER BY table_name;
""")

# Получение и вывод результатов
tables = cursor.fetchall()
for table in tables:
    print(table[0])

# Закрытие соединения
cursor.close()
conn.close()


articles
authors


In [14]:
def print_first_five_rows_of_each_table(conn_params):
    conn = psycopg2.connect(**conn_params)
    cursor = conn.cursor()

    # Получение списка всех таблиц в схеме public
    cursor.execute("""
    SELECT table_name
    FROM information_schema.tables
    WHERE table_schema = 'public'
    """)
    tables = cursor.fetchall()

    for table_name in tables:
        print(f"Первые 5 строк из таблицы {table_name[0]}:")
        try:
            cursor.execute(f"SELECT * FROM {table_name[0]} LIMIT 50;")
            rows = cursor.fetchall()
            for row in rows:
                print(row)
        except psycopg2.Error as e:
            print(f"Ошибка при выборке из таблицы {table_name[0]}: {e}")
        print("\n")  # Добавляем пустую строку между таблицами для лучшей читаемости

    cursor.close()
    conn.close()


print_first_five_rows_of_each_table(conn_params)

Первые 5 строк из таблицы articles:
(1, 'Can Large Language Models Assess Serendipity in Recommender Systems?', "  Serendipity-oriented recommender systems aim to counteract\nover-specialization in user preferences. However, evaluating a user's\nserendipitous response towards a recommended item can be challenging because of\nits emotional nature. In this study, we address this issue by leveraging the\nrich knowledge of large language models (LLMs), which can perform a variety of\ntasks. First, this study explored the alignment between serendipitous\nevaluations made by LLMs and those made by humans. In this investigation, a\nbinary classification task was given to the LLMs to predict whether a user\nwould find the recommended item serendipitously. The predictive performances of\nthree LLMs on a benchmark dataset in which humans assigned the ground truth of\nserendipitous items were measured. The experimental findings reveal that\nLLM-based assessment methods did not have a very high ag