In [1]:
import pandas as pd

# List of French Writers abritrarily defined and chosen in the 17th, 18th and 19th century
authors = [
    # 17th century
    "Honoré d'Urfé", "Madeleine de Scudéry", "Paul Scarron", "Jean de La Fontaine",
    "Madame de Lafayette", "Charles Sorel", "Gauthier de Costes de La Calprenède",
    "Vincent Voiture", "Jean-Pierre Camus", "Marie-Catherine d'Aulnoy",
    
    # 18th century
    "Montesquieu", "Voltaire", "Jean-Jacques Rousseau", "Denis Diderot", "Marivaux",
    "Claude Crébillon", "Louis-Sébastien Mercier", "Bernardin de Saint-Pierre",
    "Choderlos de Laclos", "Restif de la Bretonne", "Madame de Genlis",
    "Gabriel Sénac de Meilhan", "Abbé Prévost", "François Gayot de Pitaval",
    "Jean Potocki", "Nicolas Edme Restif de La Bretonne", "Jean-François Marmontel",
    "Pierre Choderlos de Laclos", "Jean-Baptiste Louvet de Couvray", "Jean-Baptiste de Boyer d'Argens",
    
    # 19th century
    "Honoré de Balzac", "Victor Hugo", "Alexandre Dumas", "Gustave Flaubert", "Émile Zola",
    "Stendhal", "Alfred de Musset", "George Sand", "Jules Verne", "Alphonse Daudet",
    "Théophile Gautier", "Charles Baudelaire", "Théodore de Banville", "Edmond de Goncourt",
    "Joris-Karl Huysmans", "Octave Mirbeau", "Félicien Champsaur", "Gustave Aimard",
    "Prosper Mérimée", "Paul Féval", "Eugène Sue", "Félicité de La Mennais", "Charles Nodier",
    "Barbey d'Aurevilly", "Léon Bloy", "Georges Ohnet", "Paul de Kock",
    "Jules Barbey d'Aurevilly", "Gaston Leroux", "Édouard Rod"
]

# Create a DataFrame
df = pd.DataFrame(authors, columns=["Authors"])

# Ajouter une colonne pour la période
df["Period"] = (
    ["17th century"] * 10 +
    ["18th century"] * 20 +
    ["19th century"] * 30
)

# Afficher le DataFrame
print(df)


                                Authors        Period
0                         Honoré d'Urfé  17th century
1                  Madeleine de Scudéry  17th century
2                          Paul Scarron  17th century
3                   Jean de La Fontaine  17th century
4                   Madame de Lafayette  17th century
5                         Charles Sorel  17th century
6   Gauthier de Costes de La Calprenède  17th century
7                       Vincent Voiture  17th century
8                     Jean-Pierre Camus  17th century
9              Marie-Catherine d'Aulnoy  17th century
10                          Montesquieu  18th century
11                             Voltaire  18th century
12                Jean-Jacques Rousseau  18th century
13                        Denis Diderot  18th century
14                             Marivaux  18th century
15                     Claude Crébillon  18th century
16              Louis-Sébastien Mercier  18th century
17            Bernardin de S

In [2]:
import requests
import random

random.seed(123)

url_api = "https://openlibrary.org/search.json?"

# Function to get the title of the books and the themes
def get_random_book(author):
    url  = url_api + f'q=author:{author}' + '&fields=title,first_publish_year,subject'
    req = requests.get(url)
    # Check if the request worked
    if req.status_code == 200:
        books = req.json().get("docs", [])
        # Select only books for which themes are defined in the API and for which the number of themes are above 5 to avoid irrelevant book descriptions and for which the publication dates back before 1900
        books_with_themes = [book for book in books if book.get("subject") and len(book["subject"]) >= 5 and book.get("first_publish_year") and book.get("first_publish_year") <= 1900] # book.get("first_publish_year") to avoid NoneType errors
        num_books = len(books_with_themes)
        if num_books > 0:
            # Si l'auteur a au moins 3 livres, on en sélectionne entre 1 et 3 au hasard
            
            if num_books >= 3:
                selected_books = random.sample(books_with_themes, random.randint(3, num_books))  # Select 3 random books
            elif num_books == 2:
                selected_books = books_with_themes
            elif num_books == 1:
                selected_books = books_with_themes
            book_info = []
            for info in selected_books:
                title = info.get("title", "N/A")
                themes = info.get("subject", "N/A")
                year = info.get("first_publish_year", "N/A")
                book_info.append((author, title, year, themes))
            return book_info
        else:
            return [(author, '', '','')]
    else:
        return f"The request for {author} failed"
    




In [3]:
list_books = []
for author in df["Authors"]:
    list_books.append(get_random_book(author))

print(list_books)


[[("Honoré d'Urfé", '', '', '')], [('Madeleine de Scudéry', 'Artamène', 1691, ['Court and courtiers', 'Fiction', 'Social conditions', 'Social life and customs', 'Women', 'Fiction, general', 'France, fiction'])], [('Paul Scarron', 'Svr la conference de Rvel en mars', 1649, ['Fronde', 'Poetry', 'History', 'France. 1649 March 11', 'France'])], [('Jean de La Fontaine', 'Fables', 1678, ['French Fables', 'Translations into English', 'Fables', 'Oversize books', 'Illustrations', 'Adaptations', 'Translations into Malagasy', 'Specimens', "Aesop's fables", 'English Fables', 'French language', 'Translations into French Creole', 'Readers', 'Poetry', 'French language materials', 'Translations into Yiddish', 'Juvenile literature', 'Translations from French', 'Translations into Arabic', 'Translations into Esperanto', 'Translations into Occitan', 'Toy and movable books', 'Dialects', 'Latin language', 'Yiddish Fables', 'Translations into Italian', 'Translations', 'Translations into Vietnamese', 'Transla

In [4]:
# As we have a : list[list[tuples]], we convert it to list[tuples] to transform it to a df afterward
flattened_books = []
for author_data in list_books:
    for book in author_data:
        flattened_books.append(book)

# Convertir en DataFrame
df_books = pd.DataFrame(flattened_books, columns=['Author', 'Title', 'Year', 'Themes'])


df_books


Unnamed: 0,Author,Title,Year,Themes
0,Honoré d'Urfé,,,
1,Madeleine de Scudéry,Artamène,1691,"[Court and courtiers, Fiction, Social conditio..."
2,Paul Scarron,Svr la conference de Rvel en mars,1649,"[Fronde, Poetry, History, France. 1649 March 1..."
3,Jean de La Fontaine,Fables,1678,"[French Fables, Translations into English, Fab..."
4,Jean de La Fontaine,Contes et nouvelles en vers,1685,"[Translations into English, Translations into ..."
...,...,...,...,...
185,Georges Ohnet,,,
186,Paul de Kock,Oeuvres,1864,"[Description and travel, Travel, History, Libr..."
187,Jules Barbey d'Aurevilly,,,
188,Gaston Leroux,,,


In [5]:
# delete lines of the DF for which no book was found
df_books = df_books[df_books['Title'] != '']
df_books

Unnamed: 0,Author,Title,Year,Themes
1,Madeleine de Scudéry,Artamène,1691,"[Court and courtiers, Fiction, Social conditio..."
2,Paul Scarron,Svr la conference de Rvel en mars,1649,"[Fronde, Poetry, History, France. 1649 March 1..."
3,Jean de La Fontaine,Fables,1678,"[French Fables, Translations into English, Fab..."
4,Jean de La Fontaine,Contes et nouvelles en vers,1685,"[Translations into English, Translations into ..."
6,Charles Sorel,De la connoissance des bons livres,1671,"[French language, French literature, History, ..."
...,...,...,...,...
180,Charles Nodier,Mélanges tirés d'une petite bibliothèque,1829,"[Literature, Bibliography, History and critici..."
181,Barbey d'Aurevilly,Les poètes,1889,"[French Poets, French poetry, Histoire et crit..."
182,Barbey d'Aurevilly,Les philosophes et les écrivains religieux,1860,"[French Philosophy, French Religious literatur..."
183,Barbey d'Aurevilly,Les vieilles actrices,1884,"[Actresses, Actresses, French, Authors, French..."


In [6]:
# Transform the list of themes in a string
df_books['Themes'] = df_books['Themes'].apply(lambda x: ', '.join(x))
df_books

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_books['Themes'] = df_books['Themes'].apply(lambda x: ', '.join(x))


Unnamed: 0,Author,Title,Year,Themes
1,Madeleine de Scudéry,Artamène,1691,"Court and courtiers, Fiction, Social condition..."
2,Paul Scarron,Svr la conference de Rvel en mars,1649,"Fronde, Poetry, History, France. 1649 March 11..."
3,Jean de La Fontaine,Fables,1678,"French Fables, Translations into English, Fabl..."
4,Jean de La Fontaine,Contes et nouvelles en vers,1685,"Translations into English, Translations into O..."
6,Charles Sorel,De la connoissance des bons livres,1671,"French language, French literature, History, H..."
...,...,...,...,...
180,Charles Nodier,Mélanges tirés d'une petite bibliothèque,1829,"Literature, Bibliography, History and criticis..."
181,Barbey d'Aurevilly,Les poètes,1889,"French Poets, French poetry, Histoire et criti..."
182,Barbey d'Aurevilly,Les philosophes et les écrivains religieux,1860,"French Philosophy, French Religious literature..."
183,Barbey d'Aurevilly,Les vieilles actrices,1884,"Actresses, Actresses, French, Authors, French,..."


In [7]:
df_books.to_csv('books1.csv', index=False)

In [8]:
df_books = pd.read_csv('C:\\Users\\arnau\\Documents\\Mes documents\\Arnaud\\GitHub\\libroguessr\\Data\\books.csv')
df_books

Unnamed: 0,Author,Title,Year,Themes
0,Madeleine de Scudéry,Artamène,1691,"Court and courtiers, Fiction, Social condition..."
1,Paul Scarron,Svr la conference de Rvel en mars,1649,"Fronde, Poetry, History, France. 1649 March 11..."
2,Jean de La Fontaine,Fables,1678,"French Fables, Translations into English, Fabl..."
3,Jean de La Fontaine,Contes et nouvelles en vers,1685,"Translations into English, Translations into O..."
4,Charles Sorel,De la connoissance des bons livres,1671,"French language, French literature, History, H..."
...,...,...,...,...
161,Charles Nodier,Mélanges tirés d'une petite bibliothèque,1829,"Literature, Bibliography, History and criticis..."
162,Barbey d'Aurevilly,Les bas-bleus,1878,"History and criticism, French literature, Litt..."
163,Barbey d'Aurevilly,Les vieilles actrices,1884,"Actresses, Actresses, French, Authors, French,..."
164,Barbey d'Aurevilly,Les philosophes et les écrivains religieux,1860,"French Philosophy, French Religious literature..."


In [27]:
df_books.sample(20)

Unnamed: 0,Author,Title,Year,Themes
67,Alexandre Dumas,La dame aux camélias [play],1855,"Fiction, Courtesans, Théâtre du Gymnase-Armand..."
110,George Sand,La charca del diablo,1846,"widowers, family, rural life, marriage customs..."
145,Edmond de Goncourt,La femme au dix-huitieme siecle,1862,"Social and moral questions, Women, Salons, Soc..."
108,George Sand,The haunted pool,1890,"Country life, Parent and child, Fiction, Man-w..."
151,Félicien Champsaur,Masques modernes,1889,"Social life and customs, Popular culture, Thea..."
104,Stendhal,Promenades dans Rome,1829,"Biography, Description and travel, French, Fre..."
37,Restif de la Bretonne,La vie de mom père,1884,"English fiction, French Translations, French f..."
34,Bernardin de Saint-Pierre,Studies of nature,1796,"Nature, Natural history, Religious aspects of ..."
74,Alexandre Dumas,Le roman du masque de fer,1600,"History, Fiction, Accessible book, Protected D..."
80,Alexandre Dumas,Mes mémoires,1860,"Biography, French Authors, Biography & Autobio..."


In [44]:
# The goal is now to clean the themes to only keep relevant themes, and to translate them into French
# To translate themes, we use the API Lingva

url = "https://lingva.ml/api/v1/"
source = 'auto/' # auto as some themes may be written in another language than English
dest = 'fr/'

for i in range(len(df_books['Themes'])):
    url_api_transl = url + source + dest + df_books['Themes'][i]
    response = requests.get(url_api_transl)
    if response.status_code == 200:
        translated_themes = response.json().get("translation", "")
        df_books.loc[i,'Themes'] = translated_themes
    else:
        pass


In [45]:
df_books.sample(20)


Unnamed: 0,Author,Title,Year,Themes
145,Edmond de Goncourt,La femme au dix-huitieme siecle,1862,"Questions sociales et morales, Femmes, Salons,..."
161,Charles Nodier,Mélanges tirés d'une petite bibliothèque,1829,"Literature, Bibliography, History and criticis..."
38,Restif de la Bretonne,Le pornographe,1769,"Littérature érotique, Statut juridique, lois, ..."
67,Alexandre Dumas,La dame aux camélias [play],1855,"Fiction, Courtesans, Théâtre du Gymnase-Armand..."
6,Marie-Catherine d'Aulnoy,The Prince of Carency,1719,"Fiction, Princes, Fiction, biographique, Franc..."
128,Jules Verne,Le Tour du Monde en Quatre-Vingts Jours,1872,"Voyages autour du monde, Traductions en gujara..."
116,Jules Verne,Deux ans de vacances,1900,"Fiction, Voyages et séjours, Îles, Langue fran..."
59,Victor Hugo,Le Rhin,1800,"Description et voyages, Rhin et sa vallée, Voy..."
47,François Gayot de Pitaval,Continuation des Causes célebres et intéressantes,1765,"Premiers travaux jusqu'en 1800, Procès, Crimin..."
96,Émile Zola,Rome,1896,"Catholiques, Pèlerins et pèlerinages chrétiens..."


In [36]:
#TODO : corriger les thèmes non traduits
#TODO : corriger les noms en anglais
#TODO : enlever les mots en trop dans les titres : ex: 67 : Dumas

'Cour et courtisans, Fiction, Conditions sociales, Vie sociale et mœurs, Femmes, Fiction, général, France, fiction'