In [None]:
%pip install pandas requests

In [None]:
import pandas as pd
import requests
import os 
import time
import json

# Read in file 
df = pd.read_csv('books.csv')

# Inspect df
df.head()
df.shape

(18044, 3)

In [3]:
# Rename columns
df = df.rename(columns={"book_title": "title", "book_genres": "genres"})

In [4]:
# Drop rows with missings on title or author
df = df.dropna(subset=["title", "author"])

In [29]:
df.to_csv('books.csv', index=False)

In [5]:
# Select 10 random rows to test if PPLX code works
df10 = df.sample(n=10, random_state=42)
df10.head()

# Drop genres
df10 = df10.drop(columns="genres")

In [30]:
df10.to_csv('books10.csv', index=False)

In [19]:
system_prompt = """
You are a helpful literary assistant. Your job is to search the internet and assess several aspects for books that are provided to you.

You return a pandas data frame where each row refers to one book, and the columns are the following:
'title', 'author', 'female_main_character', 'author_gender', 'pub_year', 'genres',
'feature_1pos', 'feature_1neg', 'feature_2pos', 'feature_2neg', 'feature_3pos', 'feature_3neg',
'feature_4pos', 'feature_4neg', 'feature_5pos', 'feature_5neg', 'feature_6pos', 'feature_6neg',
'feature_7pos', 'feature_7neg', 'feature_8pos', 'feature_8neg', 'feature_9pos', 'feature_9neg',
'feature_10pos', 'feature_10neg', 'feature_11pos', 'feature_11neg', 'feature_12pos', 'feature_12neg'

For each book, fill in the title and author and assess if it features a female main character. The character does not need to be the sole protagonist but must play a central role in the plot, with more significance than a secondary or supporting character.

If there is no female main character, fill the column 'female_main_character' for that row with a 0 and the later columns with NA and move on to the next book.

If there is a female main character, fill it with a 1 and assess the following questions, filling in the according values into the columns:

- What is the gender of the author? ('author_gender'; {0: male, 1: female, -1: not determinable or non-binary})
- In what year was the book published? ('pub_year'; {numeric value})
- What is/are the genre(s) of the book? ('genres'; {list of a maximum of 5 genres in the order of their relevance})

For the following, use:
    {0: doesn’t occur, 1: does occur} for POS features
    {0: doesn’t occur, -1: does occur} for NEG features

1-POS: female character is saved by a male character ('feature_1pos')
1-NEG: female character saves a male character ('feature_1neg')

2-POS: female character is protected by a male character ('feature_2pos')
2-NEG: female character protects a male character ('feature_2neg')

3-POS: female character’s problem is solved through help or luck ('feature_3pos')
3-NEG: female character solves problem through skill ('feature_3neg')

4-POS: female character is victimized/harmed by male character ('feature_4pos')
4-NEG: female character is perpetrator/harms a male character ('feature_4neg')

5-POS: female character follows orders ('feature_5pos')
5-NEG: female character gives orders ('feature_5neg')

6-POS: female character is admired for her beauty ('feature_6pos')
6-NEG: female character is admired for her intelligence ('feature_6neg')

7-POS: female character is homemaker ('feature_7pos')
7-NEG: female character is breadwinner ('feature_7neg')

8-POS: female character is skilled in domestic tasks ('feature_8pos')
8-NEG: female character is not skilled or uninterested in domestic tasks ('feature_8neg')

9-POS: female character has a lower rank occupation (e.g., nurse, assistant, maid, …) ('feature_9pos')
9-NEG: female character has a higher rank occupation (doctor, manager, ruler, …) ('feature_9neg')

10-POS: female character is physically strong/capable ('feature_10pos')
10-NEG: female character is physically weak/incapable ('feature_10neg')

11-POS: female character has low self-esteem/is emotionally fragile ('feature_11pos')
11-NEG: female character has high self-esteem/is emotionally strong ('feature_11neg')

12-POS: female character is scared of taking risks/challenges ('feature_12pos')
12-NEG: female character likes taking risks/challenges ('feature_12neg')
"""

user_prompt_template = "Please analyze the following book: {title} by {author}."

In [None]:
# Setting my API key
YOUR_API_KEY = os.environ["PERPLEXITY_API_KEY"]

# Saying hi it's me, and this is what I'll be sending
headers = {
    "Authorization": f"Bearer {YOUR_API_KEY}",
    "Content-Type": "application/json"
}

# This is where I want to send it to
url = "https://api.perplexity.ai/chat/completions"

# I'll store the responses I get in this (as of now, empty) list
responses = []

# Setting up what I want to send to Perplexity
for index, row in df10.iterrows():
    # Extract title and author from each row 
    title = row["title"]
    author = row["author"]

    # Format user prompt with current title and author
    user_prompt = user_prompt_template.format(title=title, author=author)

    # Payload
    payload = {
        "model": "sonar",
        "messages": [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ]
        # "max_tokens": 123,  # Limits the length of the response
        # "temperature": 0.2,  # Controls randomness in the response
        # "top_p": 0.9,
        # "search_domain_filter": ["<any>"],
        # "return_images": False,
        # "return_related_questions": False,
        # "search_recency_filter": "<string>",
        # "top_k": 0,
        # "stream": False,
        # "presence_penalty": 0,
        # "frequency_penalty": 1,
        # "response_format": {},
        # "web_search_options": {"search_context_size": "high"}
    }

    # Send the request to the Perplexity API
    response = requests.post(url, json=payload, headers=headers)
    
    # Check the response
    if response.status_code == 200:
        response_json = response.json()
        # Extract the answer from the response (this might vary depending on the response format)
        answer = response_json.get('choices', [{}])[0].get('message', {}).get('content', 'No answer found')
        responses.append(answer)  # Store the answer
    else:
        # Handle errors by appending an error message
        responses.append(f"Error: {response.status_code}")

# # Add the responses to your original dataframe
# df10['female_main_character'] = responses

# # Print the updated dataframe to check the results
# print(df10)

In [28]:
for response in responses:
    print(response)

```python
import pandas as pd

# Create a DataFrame
data = {
    'title': ['The Mysterious Disappearance of Leon (I Mean Noel)'],
    'author': ['Ellen Raskin'],
    'female_main_character': [1],
    'author_gender': [1],
    'pub_year': [1971],
    'genres': [['Children\'s Mystery', 'Middle Grade']],
    
    'feature_1pos': [0],
    'feature_1neg': [0],
    'feature_2pos': [0],
    'feature_2neg': [0],
    'feature_3pos': [1], # The mystery is solved through help and deciphering messages.
    'feature_3neg': [1], # Caroline uses her skills to solve the puzzle and find her husband.
    'feature_4pos': [0],
    'feature_4neg': [0],
    'feature_5pos': [1], # Caroline follows Augie's guidance in solving the mystery.
    'feature_5neg': [1], # Caroline also leads her adopted children in their search.
    'feature_6pos': [0],
    'feature_6neg': [1], # Caroline is admired for her intelligence and perseverance.
    'feature_7pos': [0],
    'feature_7neg': [1], # Caroline is a breadwinner d

In [18]:
print(df10["female_main_character"].iloc[4])

In Jeffrey Archer's book **"Not a Penny More, Not a Penny Less,"** there are few female characters, and none of them play a central or main role in the plot. The book primarily focuses on a group of male characters—such as an Oxford professor, a Harley Street physician, an art dealer, and a British aristocrat—who are victims of a stock scam and plot to retrieve their money[1][4][5].

One of the few female characters mentioned is **Anne**, who serves as the love interest of James, one of the four male protagonists. However, she is not developed as a main character and plays a limited role in the story[4].

Therefore, the book does not feature a female main character. The narrative revolves around the male protagonists and their efforts to outwit the swindler, Harvey Metcalfe.


To Dos: 
* make yes or no column for fmc 
* author gender 
* features