In [2]:
# Importing dependencies
import requests
import pandas as pd
import numpy as np
from scipy.spatial.distance import pdist, squareform
from scipy.stats import entropy
import spacy
from langdetect import detect, DetectorFactory
from langdetect import detect_langs
import fastlang
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics.pairwise import cosine_distances
from collections import Counter

In [58]:
isbn = 9780060935467
url = f"https://openlibrary.org/search.json?q=isbn:{isbn}&fields=title,isbn,author_name,subject"
tags = requests.get(url).json()['docs'][0]['subject']
selected_tags = [tags[i] for i in [3, 4, 5, 6, 7, 8, 12, 13, 15, 16]]
print(selected_tags)

In [60]:
# Load pre-trained model for embeddings
model = SentenceTransformer('all-MiniLM-L6-v2')

# Desired diversity area and overarching discipline
desired_diversity = 'gender representation'
overarching_discipline = 'african studies'

# Function to categorize tags
def categorize_tags(tag_embeddings, diversity_embedding, discipline_embedding, tags):
    # Generate embeddings for tags, diversity area, and discipline
    tag_embeddings = model.encode(selected_tags)
    diversity_embedding = model.encode([desired_diversity])
    discipline_embedding = model.encode([overarching_discipline])
    categorized_tags = {}
    for i, tag in enumerate(tags):
        # Calculate cosine similarity to both diversity and discipline
        diversity_sim = cosine_similarity([tag_embeddings[i]], diversity_embedding)[0][0]
        discipline_sim = cosine_similarity([tag_embeddings[i]], discipline_embedding)[0][0]

        # Categorize based on higher similarity
        if diversity_sim > discipline_sim:
            categorized_tags[tag] = 'Diversity'
        elif discipline_sim > diversity_sim:
            categorized_tags[tag] = 'Discipline'
        else:
            categorized_tags[tag] = 'Neither'

    return categorized_tags

# Categorize tags
categorized_tags = categorize_tags(tag_embeddings, diversity_embedding, discipline_embedding, selected_tags)
print(categorized_tags)



{'racial segregation': 'Discipline', 'mob mentality': 'Diversity', 'Southern Gothic': 'Discipline', 'southern life': 'Discipline', 'racial injustice': 'Discipline', 'class': 'Diversity', 'laws': 'Discipline', 'loss of innocence': 'Diversity', 'domestic fiction': 'Diversity', 'legal stories': 'Discipline'}


In [79]:
cat_alpha, cat_beta, cat_gamma = [], [], []
for tag in selected_tags:
    if categorized_tags[tag] == 'Discipline':
        cat_alpha.append(tag)
    elif categorized_tags[tag] == 'Diversity':
        cat_beta.append(tag)
    else:
        cat_gamma.append(tag)
        
print("alpha (overarching discipline):", cat_alpha, 
      "\n beta (desired diversity):", cat_beta,
     "\n gamma (neither):", cat_gamma)

alpha (overarching discipline): ['racial segregation', 'Southern Gothic', 'southern life', 'racial injustice', 'laws', 'legal stories'] 
 beta (desired diversity): ['mob mentality', 'class', 'loss of innocence', 'domestic fiction'] 
 gamma (neither): []


In [86]:
# Load pre-trained model for topic embeddings
model = SentenceTransformer('all-MiniLM-L6-v2')

# Define Rao's entropy formula
def raos_entropy(cat_alpha, cat_beta):
    # Generate embeddings for topics
    alpha_embeddings = model.encode(np.unique(cat_alpha))
    beta_embeddings = model.encode(np.unique(cat_beta))

    # Full list of syllabus topics
    syllabus_topics = cat_alpha + cat_beta

    # Calculate proportions (p_i and p_j)
    topic_counts = Counter(syllabus_topics)
    total_topics = len(syllabus_topics)
    p_alpha = np.array([topic_counts[topic] / total_topics for topic in cat_alpha])
    p_beta = np.array([topic_counts[topic] / total_topics for topic in cat_beta])
    entropy = 0.0
    # Calculate pairwise cosine distances between topics
    distance_matrix = cosine_distances(alpha_embeddings, beta_embeddings)
    
    # Sum over all topic pairs
    for i in range(len(cat_alpha)):
        for j in range(len(cat_beta)):
            entropy += p_alpha[i] * p_beta[j] * distance_matrix[i, j]
    
    return entropy

# Calculate diversity using Rao's entropy
entropy = raos_entropy(cat_alpha, cat_beta)
print(f"Rao's Entropy (Diversity): {entropy}")

Rao's Entropy (Diversity): 0.17498199820518495


In [78]:
z = cat_alpha.extend(['abc'])
print(z)

None


In [16]:
cat_a, cat_b, cat_c = ['a'], ['b'], ['c']

In [12]:
# # Define some APA syllabus labels for grouping
# apa_labels = {
#     "Gentrification": "Social Identities",
#     "Housing policy": "Social Identities",
#     "Latin Americans": "World",
#     "Puerto Ricans": "World",
#     # Add more mappings as needed
# }

# def group_tags(tags):
#     """Group book tags based on APA syllabus labels."""
#     grouped_tags = {label: [] for label in apa_labels.values()}
#     for tag in tags:
#         category = apa_labels.get(tag, None)
#         if category:
#             grouped_tags[category].append(tag)
#     return grouped_tags

# # Example grouping
# grouped_tags = group_tags(tags)
# print(f"Grouped Tags: {grouped_tags}")


'the+lord+of+the+tings'

In [16]:
# Define APA syllabus labels based on the categories from the site
apa_labels = {
    "African and African-American Philosophy": [
        "African philosophy", "African-American philosophy", "Black studies"
    ],
    "Feminist Philosophy": [
        "Feminism", "Women's studies", "Gender studies", "Queer theory"
    ],
    "Latin American and Hispanic Philosophy": [
        "Latin American philosophy", "Hispanic studies", "Chicano studies"
    ],
    "Asian and Asian-American Philosophy": [
        "Asian philosophy", "Eastern philosophy", "Chinese philosophy", "Indian philosophy", "Japanese philosophy"
    ],
    "Social and Political Philosophy": [
        "Gentrification", "Housing policy", "Social justice", "Political science", "Democracy", "Marxism"
    ],
    "Ethics": [
        "Ethics", "Moral philosophy", "Bioethics", "Applied ethics", "Virtue ethics"
    ],
    "Philosophy of Religion": [
        "Religion", "Theology", "Religious studies", "Spirituality"
    ],
    "Metaphysics and Epistemology": [
        "Metaphysics", "Epistemology", "Ontology", "Knowledge theory"
    ],
    # Add more categories as needed
}

def group_tags(tags):
    """Group book tags based on APA syllabus labels."""
    grouped_tags = {category: [] for category in apa_labels.keys()}
    other_tags = []
    for tag in tags:
        found = False
        for category, keywords in apa_labels.items():
            if tag.lower() in [keyword.lower() for keyword in keywords]:
                grouped_tags[category].append(tag)
                found = True
                break
        if not found:
            other_tags.append(tag)
    grouped_tags["Other"] = other_tags
    return grouped_tags

# Example grouping
grouped_tags = group_tags(tags)
print(f"Grouped Tags: {grouped_tags}")

def rao_entropy(tag_groups):
    """Calculate Rao's entropy based on tag groups."""
    tag_counts = [len(tags) for tags in tag_groups.values() if tags]
    probabilities = np.array(tag_counts) / sum(tag_counts)
    distances = squareform(pdist(np.eye(len(probabilities)), metric='euclidean'))
    return np.dot(probabilities, np.dot(distances, probabilities))

# Example calculation
diversity_score = rao_entropy(grouped_tags)
print(f"Rao's Entropy: {diversity_score}")

def ia_select(books, lambda_value=0.5):
    """Balance utility and diversity to recommend new books."""
    # Simulate utility and diversity scores for each book
    utilities = np.random.rand(len(books))
    diversities = np.random.rand(len(books))
    total_scores = utilities + lambda_value * diversities
    # Recommend top-k books based on total score
    recommended_books = [book for _, book in sorted(zip(total_scores, books), reverse=True)]
    return recommended_books[:5]  # Recommend top-5 books

# Example recommendation
books = ["The Lord of the Rings", "1984", "The Autobiography of Malcolm X",
         "Harry Potter"]
recommendations = ia_select(books)
print(f"Recommended Books: {recommendations}")


Grouped Tags: {'African and African-American Philosophy': [], 'Feminist Philosophy': [], 'Latin American and Hispanic Philosophy': [], 'Asian and Asian-American Philosophy': [], 'Social and Political Philosophy': [], 'Ethics': [], 'Philosophy of Religion': [], 'Metaphysics and Epistemology': [], 'Other': ['The Lord of the Rings', 'Fiction', 'Ficción', 'English Fantasy fiction', 'Ficción fantástica inglesa', 'Fantasy fiction', 'Open Library Staff Picks', 'Middle Earth (Imaginary place)', 'Fiction, fantasy, epic', 'Middle earth (imaginary place), fiction', 'Baggins, frodo (fictitious character), fiction', 'Gandalf (fictitious character), fiction', 'British and irish fiction (fictional works by one author)', 'English literature', 'Frodo Baggins (Fictitious character)', 'Baggins, bilbo (fictitious character), fiction', 'Fiction, fantasy, general', 'English language', 'Fiction, media tie-in', 'Gift books', 'Quests (Expeditions)', 'Wizards', 'Terre du Milieu (Lieu imaginaire)', 'Romans, nouv

In [17]:
def full_pipeline(syllabus):
    """Full pipeline to process syllabus and recommend diverse books."""
    # Step 1: Fetch tags for each book
    all_tags = []
    for book in syllabus:
        tags = get_book_tags(book)
        grouped = group_tags(tags)
        all_tags.append(grouped)
    
    # Step 2: Calculate diversity score
    diversity_scores = [rao_entropy(tags) for tags in all_tags]
    
    # Step 3: Recommend new books based on IA-Select
    recommendations = ia_select(syllabus)
    
    return diversity_scores, recommendations

# Example usage
syllabus = ["The Lord of the Rings", "Another Book", "Yet Another Book"]
diversity_scores, recommendations = full_pipeline(syllabus)
print(f"Diversity Scores: {diversity_scores}")
print(f"Recommendations: {recommendations}")


ValueError: shapes (1,1) and (0,) not aligned: 1 (dim 1) != 0 (dim 0)