In [1]:
from types import SimpleNamespace

args = SimpleNamespace(zotero_library_id="5820275",
                       zotero_api_key="---",
                       file_path ="./test_out.txt")


In [2]:
import argparse
import pandas as pd
import json
import time
from datetime import datetime
import re
from pyzotero import zotero
from slack_sdk.webhook import WebhookClient

def validate_inputs(file_path, api_key, library_id):
    """Validate user inputs."""
    if not file_path:
        raise ValueError("Input file path is required.")
    if not api_key:
        raise ValueError("Zotero API key is required.")
    if not library_id:
        raise ValueError("Zotero library ID is required.")

def post_to_slack(webhook_url, publications):
    """Post publication details to Slack via webhook."""
    webhook = WebhookClient(webhook_url)
    for pub in publications:
        details = (
            f"*{pub['data'].get('title', 'No Title')}*"
            f"Author(s): {', '.join([creator.get('lastName', 'Unknown') for creator in pub['data'].get('creators', [])])}\n"
            f"Published in: {pub['data'].get('publicationTitle', 'Unknown')} ({pub['data'].get('date', 'No Date')})\n"
            f"URL: {pub['data'].get('url', 'No URL')}\n"
        )
        response = webhook.send(text=details)
        if response.status_code != 200 or response.body != "ok":
            print(f"Failed to post to Slack: {response.body}")
            break

def fetch_new_publications(zot, last_date):
    """Fetch new publications from Zotero since the last date."""
    items = zot.top(limit=100, sort='dateAdded', direction='desc')
    new_items = []
    try:
        # Convert last_date (milliseconds) to offset-aware datetime
        last_date_dt = datetime.fromtimestamp(int(last_date) / 1000).replace(tzinfo=None)
        for item in items:
            # Convert Zotero's dateAdded to offset-naive datetime
            item_date = datetime.fromisoformat(item['data']['dateAdded'].replace("Z", "+00:00")).replace(tzinfo=None)
            if item_date > last_date_dt:
                new_items.append(item)
    except Exception as e:
        print(f"Date parsing error: {e}")
    return new_items


In [None]:

def main():
    # Parse command-line arguments
    parser = argparse.ArgumentParser(description="Post new Zotero publications to Slack.")
    parser.add_argument("--file_path", required=True, help="Path to the input CSV file containing webhooks and last date.")
    parser.add_argument("--zotero_api_key", required=True, help="Zotero API key.")
    parser.add_argument("--zotero_library_id", required=True, help="Zotero library ID.")
    args = parser.parse_args()

    # Validate inputs
    try:
        validate_inputs(args.file_path, args.zotero_api_key, args.zotero_library_id)
    except Exception as e:
        print(f"Input validation failed: {e}")
        return

    # Initialize Zotero API
    zot = zotero.Zotero(args.zotero_library_id, 'group', args.zotero_api_key)

    # Read input file
    try:
        webhooks_df = pd.read_csv(args.file_path)
    except Exception as e:
        print(f"Failed to read input file: {e}")
        return

    updated_dates = []

    for _, row in webhooks_df.iterrows():
        last_date = row["lastDate"]
        webhook_url = row["webhook"]

        # Fetch new publications
        try:
            new_pubs = fetch_new_publications(zot, last_date)
            print(f"Found {len(new_pubs)} new publications for webhook {webhook_url}.")
        except Exception as e:
            print(f"Failed to fetch publications: {e}")
            updated_dates.append(last_date)  # Append current date if fetching fails
            continue

        # Post to Slack
        try:
            post_to_slack(webhook_url, new_pubs)
        except Exception as e:
            print(f"Failed to post to Slack: {e}")
            updated_dates.append(last_date)  # Append current date if posting fails
            continue

        # Update last date
        if new_pubs:
            latest_date = int(datetime.fromisoformat(new_pubs[-1]['data']['dateAdded'].replace("Z", "+00:00")).timestamp() * 1000)
            updated_dates.append(latest_date)
        else:
            updated_dates.append(last_date)

    # Ensure updated_dates matches the DataFrame length
    if len(updated_dates) != len(webhooks_df):
        print("Error: Mismatch between updated_dates and DataFrame length.")
        return

    # Update the DataFrame and save back to the CSV file
    webhooks_df["lastDate"] = updated_dates
    try:
        webhooks_df.to_csv(args.file_path, index=False)
        print(f"Updated file saved to {args.file_path}.")
    except Exception as e:
        print(f"Failed to save updated file: {e}")

if __name__ == "__main__":
    main()

In [3]:
zot = zotero.Zotero(args.zotero_library_id, 'group', args.zotero_api_key)
webhooks_df = pd.read_csv(args.file_path)

In [4]:
for _, row in webhooks_df.iterrows():
    last_date = row["lastDate"]
    webhook_url = row["webhook"]

In [None]:
print(datetime.now())

In [None]:
def fetch_new_publications(zot, last_date):
    """Fetch new publications from Zotero since the last date."""
    items = zot.top(limit=100, sort='dateAdded', direction='desc')
    new_items = []
    try:
        # Convert last_date (milliseconds) to offset-aware datetime
        last_date_dt = datetime.fromtimestamp(int(last_date) / 1000).replace(tzinfo=None)
        for item in items:
            # Convert Zotero's dateAdded to offset-naive datetime
            item_date = datetime.fromisoformat(item['data']['dateAdded'].replace("Z", "+00:00")).replace(tzinfo=None)
            if item_date > last_date_dt:
                new_items.append(item)
    except Exception as e:
        print(f"Date parsing error: {e}")
    return new_items


In [27]:
last_date = "1000000"

In [None]:
new_pubs = fetch_new_publications(zot, last_date)
print(f"Found {len(new_pubs)} new publications for webhook {webhook_url}.")

In [49]:
new_pubs = fetch_new_publications(zot, last_date)

In [None]:
for i in range(len(new_pubs)):
    ja = new_pubs[i]['data'].get('journalAbbreviation', 'Unknown')
    print(ja)
    ja = new_pubs[i]['data'].get('publicationTitle', 'Unknown')
    print(ja)
    a = new_pubs[i]['data'].get('publisher', 'Unknown')
    print(ja)
    

In [None]:
new_pubs[2].get('links').get('alternate').get('href')

In [61]:
webhook = WebhookClient(webhook_url)

In [68]:
from slack_sdk import WebClient
from slack_sdk.errors import SlackApiError
slack_token = "something starting with xoxb"
client = WebClient(token=slack_token)

response  = client.users_list()
#result = client.users_list()

In [None]:
print(response)

In [107]:
members2 = []
for member in response.get('members'):
    if not member.get('deleted'):
        members2.append(member)

In [None]:
print(response.get('members')[1].get("id"))


# Collect real_name_normalized for all members that are not deleted
members = []
for member in response.get('members'):
    if not member.get('deleted'):
        members.append(member.get("profile").get("display_name_normalized"))



In [114]:
def replace_names_in_notes(notes, slack_users_df):
    """Replace names in notes with matches from Slack users, inserting user IDs."""
    def find_best_match(name):
        name_cleaned = name.lstrip("@").lower()
        best_match = None
        highest_score = 0
        for _, row in slack_users_df.iterrows():
            normalized_cleaned = row["display_name_normalized"].replace(" ", "").lower()
            score = fuzz.ratio(name_cleaned, normalized_cleaned)
            if score > highest_score and score >= 50:  # Threshold of 50
                highest_score = score
                best_match = row["id"]
        return best_match

    def replacer(match):
        name = match.group(0)
        matched_id = find_best_match(name)
        if matched_id:
            return f"<@{matched_id}>"
        return name

    return re.sub(r"@\w+", replacer, notes)

def get_slack_users(slack_token):
    """Fetch Slack users and return a DataFrame with display_name_normalized and id."""
    client = WebClient(token=slack_token)
    response = client.users_list()

    members = []
    for member in response.get('members', []):
        if not member.get('deleted'):
            profile = member.get("profile", {})
            display_name = profile.get("display_name_normalized")
            user_id = member.get("id")
            if display_name and user_id:
                members.append({"display_name_normalized": display_name, "id": user_id})

    # Convert the list of members to a DataFrame
    members_df = pd.DataFrame(members)
    return members_df

In [115]:
slack_users = get_slack_users(slack_token)


In [None]:
slack_users

In [95]:
from fuzzywuzzy import fuzz

In [None]:
replace_names_in_notes("esting to share a paper via zotero. Picked one that I am not sure about how meaningful it is - Deep learning enrichment… @Attila what do you think ;-)",slack_users)


In [None]:
notes_str = replace_names_in_notes(notes_str, slack_users)


In [None]:
webhook.send(
            text="fallback",
            blocks=[
                {
                    "type": "section",
                    "text": {
                        "type": "mrkdwn",
                        "text": "<@Attila>"
                    }
                }
            ]
        )