In [33]:
import pandas as pd

# List of French Writers abritrarily defined and chosen in the 17th, 18th and 19th century
authors = [
    # 17th century
    "Honoré d'Urfé", "Madeleine de Scudéry", "Paul Scarron", "Jean de La Fontaine",
    "Madame de Lafayette", "Charles Sorel", "Gauthier de Costes de La Calprenède",
    "Vincent Voiture", "Jean-Pierre Camus", "Marie-Catherine d'Aulnoy",
    
    # 18th century
    "Montesquieu", "Voltaire", "Jean-Jacques Rousseau", "Denis Diderot", "Marivaux",
    "Claude Crébillon", "Louis-Sébastien Mercier", "Bernardin de Saint-Pierre",
    "Choderlos de Laclos", "Restif de la Bretonne", "Madame de Genlis",
    "Gabriel Sénac de Meilhan", "Abbé Prévost", "François Gayot de Pitaval",
    "Jean Potocki", "Nicolas Edme Restif de La Bretonne", "Jean-François Marmontel",
    "Pierre Choderlos de Laclos", "Jean-Baptiste Louvet de Couvray", "Jean-Baptiste de Boyer d'Argens",
    
    # 19th century
    "Honoré de Balzac", "Victor Hugo", "Alexandre Dumas", "Gustave Flaubert", "Émile Zola",
    "Stendhal", "Alfred de Musset", "George Sand", "Jules Verne", "Alphonse Daudet",
    "Théophile Gautier", "Charles Baudelaire", "Théodore de Banville", "Edmond de Goncourt",
    "Joris-Karl Huysmans", "Octave Mirbeau", "Félicien Champsaur", "Gustave Aimard",
    "Prosper Mérimée", "Paul Féval", "Eugène Sue", "Félicité de La Mennais", "Charles Nodier",
    "Barbey d'Aurevilly", "Léon Bloy", "Georges Ohnet", "Paul de Kock",
    "Jules Barbey d'Aurevilly", "Gaston Leroux", "Édouard Rod"
]

# Create a DataFrame
df = pd.DataFrame(authors, columns=["Authors"])

# Ajouter une colonne pour la période
df["Period"] = (
    ["17th century"] * 10 +
    ["18th century"] * 20 +
    ["19th century"] * 30
)

# Afficher le DataFrame
print(df)


                                Authors        Period
0                         Honoré d'Urfé  17th century
1                  Madeleine de Scudéry  17th century
2                          Paul Scarron  17th century
3                   Jean de La Fontaine  17th century
4                   Madame de Lafayette  17th century
5                         Charles Sorel  17th century
6   Gauthier de Costes de La Calprenède  17th century
7                       Vincent Voiture  17th century
8                     Jean-Pierre Camus  17th century
9              Marie-Catherine d'Aulnoy  17th century
10                          Montesquieu  18th century
11                             Voltaire  18th century
12                Jean-Jacques Rousseau  18th century
13                        Denis Diderot  18th century
14                             Marivaux  18th century
15                     Claude Crébillon  18th century
16              Louis-Sébastien Mercier  18th century
17            Bernardin de S

In [39]:
import requests

url_api = "https://openlibrary.org/search.json?"

# Function to get the title of the books and the themes
def get_random_book(author):
    url  = url_api + f'q=author:{author}' + '&fields=title,first_publish_year,subject'
    req = requests.get(url)
    # Check if the request worked
    if req.status_code == 200:
        books = req.json().get("docs", [])
        # Select only books for which themes are defined in the API and for which the number of themes are above 5 to avoid irrelevant book descriptions and for which the publication dates back before 1900
        books_with_themes = [book for book in books if book.get("subject") and len(book["subject"]) >= 5 and book.get("first_publish_year") and book.get("first_publish_year") <= 1900] # book.get("first_publish_year") to avoid NoneType errors
        num_books = len(books_with_themes)
        if num_books > 0:
            # Si l'auteur a au moins 3 livres, on en sélectionne 3
            if num_books >= 5:
                selected_books = books_with_themes[:5]
            elif num_books == 4 : 
                selected_books = books_with_themes[:4]
            elif num_books == 3 : 
                selected_books = books_with_themes[:3]
            elif num_books == 2:
                selected_books = books_with_themes[:2]
            elif num_books == 1:
                selected_books = books_with_themes
            book_info = []
            for info in selected_books:
                title = info.get("title", "N/A")
                themes = info.get("subject", "N/A")
                year = info.get("first_publish_year", "N/A")
                book_info.append((author, title, year, themes))
            return book_info
        else:
            return [(author, '', '','')]
    else:
        return f"The request for {author} failed"
    




In [40]:
list_books = []
for author in df["Authors"]:
    list_books.append(get_random_book(author))

print(list_books)


[[("Honoré d'Urfé", '', '', '')], [('Madeleine de Scudéry', 'Artamène', 1691, ['Court and courtiers', 'Fiction', 'Social conditions', 'Social life and customs', 'Women', 'Fiction, general', 'France, fiction'])], [('Paul Scarron', 'Svr la conference de Rvel en mars', 1649, ['Fronde', 'Poetry', 'History', 'France. 1649 March 11', 'France'])], [('Jean de La Fontaine', 'Fables', 1678, ['French Fables', 'Translations into English', 'Fables', 'Oversize books', 'Illustrations', 'Adaptations', 'Translations into Malagasy', 'Specimens', "Aesop's fables", 'English Fables', 'French language', 'Translations into French Creole', 'Readers', 'Poetry', 'French language materials', 'Translations into Yiddish', 'Juvenile literature', 'Translations from French', 'Translations into Arabic', 'Translations into Esperanto', 'Translations into Occitan', 'Toy and movable books', 'Dialects', 'Latin language', 'Yiddish Fables', 'Translations into Italian', 'Translations', 'Translations into Vietnamese', 'Transla

In [41]:
# As we have a : list[list[tuples]], we convert it to list[tuples] to transform it to a df afterward
flattened_books = []
for author_data in list_books:
    for book in author_data:
        flattened_books.append(book)

# Convertir en DataFrame
df_books = pd.DataFrame(flattened_books, columns=['Author', 'Title', 'Year', 'Themes'])


df_books


Unnamed: 0,Author,Title,Year,Themes
0,Honoré d'Urfé,,,
1,Madeleine de Scudéry,Artamène,1691,"[Court and courtiers, Fiction, Social conditio..."
2,Paul Scarron,Svr la conference de Rvel en mars,1649,"[Fronde, Poetry, History, France. 1649 March 1..."
3,Jean de La Fontaine,Fables,1678,"[French Fables, Translations into English, Fab..."
4,Jean de La Fontaine,Contes et nouvelles en vers,1685,"[Translations into English, Translations into ..."
...,...,...,...,...
144,Georges Ohnet,,,
145,Paul de Kock,Oeuvres,1864,"[Description and travel, Travel, History, Libr..."
146,Jules Barbey d'Aurevilly,,,
147,Gaston Leroux,,,


In [42]:
# delete lines of the DF for which no book was found
df_books = df_books[df_books['Title'] != '']
df_books = df_books.reset_index(drop=True)

df_books

Unnamed: 0,Author,Title,Year,Themes
0,Madeleine de Scudéry,Artamène,1691,"[Court and courtiers, Fiction, Social conditio..."
1,Paul Scarron,Svr la conference de Rvel en mars,1649,"[Fronde, Poetry, History, France. 1649 March 1..."
2,Jean de La Fontaine,Fables,1678,"[French Fables, Translations into English, Fab..."
3,Jean de La Fontaine,Contes et nouvelles en vers,1685,"[Translations into English, Translations into ..."
4,Charles Sorel,De la connoissance des bons livres,1671,"[French language, French literature, History, ..."
...,...,...,...,...
129,Barbey d'Aurevilly,Les bas-bleus,1878,"[History and criticism, French literature, Lit..."
130,Barbey d'Aurevilly,Les philosophes et les écrivains religieux,1860,"[French Philosophy, French Religious literatur..."
131,Barbey d'Aurevilly,Les poètes,1889,"[French Poets, French poetry, Histoire et crit..."
132,Barbey d'Aurevilly,Les vieilles actrices,1884,"[Actresses, Actresses, French, Authors, French..."


In [43]:
# Transform the list of themes in a string
df_books['Themes'] = df_books['Themes'].apply(lambda x: ', '.join(x))
df_books

Unnamed: 0,Author,Title,Year,Themes
0,Madeleine de Scudéry,Artamène,1691,"Court and courtiers, Fiction, Social condition..."
1,Paul Scarron,Svr la conference de Rvel en mars,1649,"Fronde, Poetry, History, France. 1649 March 11..."
2,Jean de La Fontaine,Fables,1678,"French Fables, Translations into English, Fabl..."
3,Jean de La Fontaine,Contes et nouvelles en vers,1685,"Translations into English, Translations into O..."
4,Charles Sorel,De la connoissance des bons livres,1671,"French language, French literature, History, H..."
...,...,...,...,...
129,Barbey d'Aurevilly,Les bas-bleus,1878,"History and criticism, French literature, Litt..."
130,Barbey d'Aurevilly,Les philosophes et les écrivains religieux,1860,"French Philosophy, French Religious literature..."
131,Barbey d'Aurevilly,Les poètes,1889,"French Poets, French poetry, Histoire et criti..."
132,Barbey d'Aurevilly,Les vieilles actrices,1884,"Actresses, Actresses, French, Authors, French,..."


In [65]:
!pip install fuzzywuzzy


Collecting fuzzywuzzy
  Downloading fuzzywuzzy-0.18.0-py2.py3-none-any.whl.metadata (4.9 kB)
Downloading fuzzywuzzy-0.18.0-py2.py3-none-any.whl (18 kB)
Installing collected packages: fuzzywuzzy
Successfully installed fuzzywuzzy-0.18.0


In [None]:
!pip install python-Levenshtein
#erreur sans ce package /opt/conda/lib/python3.12/site-packages/fuzzywuzzy/fuzz.py:11: UserWarning: Using slow pure-python SequenceMatcher. Install python-Levenshtein to remove this warning warnings.warn('Using slow pure-python SequenceMatcher. Install python-Levenshtein to remove this warning')



Collecting python-Levenshtein
  Downloading python_Levenshtein-0.26.1-py3-none-any.whl.metadata (3.7 kB)
Collecting Levenshtein==0.26.1 (from python-Levenshtein)
  Downloading levenshtein-0.26.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.2 kB)
Collecting rapidfuzz<4.0.0,>=3.9.0 (from Levenshtein==0.26.1->python-Levenshtein)
  Downloading rapidfuzz-3.10.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Downloading python_Levenshtein-0.26.1-py3-none-any.whl (9.4 kB)
Downloading levenshtein-0.26.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (162 kB)
Downloading rapidfuzz-3.10.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m45.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: rapidfuzz, Levenshtein, python-Levenshtein
Successfully installed Levenshtein-0.26.1 python-Levenshtein-0.26.1 rapidfuzz-3.10

In [None]:
#TODO commencer par traduire les titres en Français car malgré cela, il reste des titres similaires
# Remove similar titles
from fuzzywuzzy import fuzz

threshold = 85  # Similarity threshold

# To avoid index problems, we run the loop in the opposite direction 
for i in range(len(df_books) - 1, 0, -1):  
    if df_books['Author'][i] == df_books['Author'][i - 1]:  # Si les auteurs sont identiques
        similarity = fuzz.ratio(df_books['Title'][i], df_books['Title'][i - 1])  # Similarité entre les titres
        if similarity > threshold:  # Supprime si les titres sont trop similaires
            df_books = df_books.drop(index=i).reset_index(drop=True)

df_books


Unnamed: 0,Author,Title,Year,Themes
0,Madeleine de Scudéry,Artamène,1691,"Court and courtiers, Fiction, Social condition..."
1,Paul Scarron,Svr la conference de Rvel en mars,1649,"Fronde, Poetry, History, France. 1649 March 11..."
2,Jean de La Fontaine,Fables,1678,"French Fables, Translations into English, Fabl..."
3,Jean de La Fontaine,Contes et nouvelles en vers,1685,"Translations into English, Translations into O..."
4,Charles Sorel,De la connoissance des bons livres,1671,"French language, French literature, History, H..."
...,...,...,...,...
125,Barbey d'Aurevilly,Les bas-bleus,1878,"History and criticism, French literature, Litt..."
126,Barbey d'Aurevilly,Les philosophes et les écrivains religieux,1860,"French Philosophy, French Religious literature..."
127,Barbey d'Aurevilly,Les poètes,1889,"French Poets, French poetry, Histoire et criti..."
128,Barbey d'Aurevilly,Les vieilles actrices,1884,"Actresses, Actresses, French, Authors, French,..."


In [73]:
df_books.to_csv('text.csv', index=False)

In [44]:
df_books.to_csv('books0.csv', index=False)

In [71]:
df_books = pd.read_csv('/home/onyxia/work/libroguessr/Data/books0.csv')
df_books

Unnamed: 0,Author,Title,Year,Themes
0,Madeleine de Scudéry,Artamène,1691,"Court and courtiers, Fiction, Social condition..."
1,Paul Scarron,Svr la conference de Rvel en mars,1649,"Fronde, Poetry, History, France. 1649 March 11..."
2,Jean de La Fontaine,Fables,1678,"French Fables, Translations into English, Fabl..."
3,Jean de La Fontaine,Contes et nouvelles en vers,1685,"Translations into English, Translations into O..."
4,Charles Sorel,De la connoissance des bons livres,1671,"French language, French literature, History, H..."
...,...,...,...,...
129,Barbey d'Aurevilly,Les bas-bleus,1878,"History and criticism, French literature, Litt..."
130,Barbey d'Aurevilly,Les philosophes et les écrivains religieux,1860,"French Philosophy, French Religious literature..."
131,Barbey d'Aurevilly,Les poètes,1889,"French Poets, French poetry, Histoire et criti..."
132,Barbey d'Aurevilly,Les vieilles actrices,1884,"Actresses, Actresses, French, Authors, French,..."


In [48]:
df_books.sample(20)

Unnamed: 0,Author,Title,Year,Themes
58,Honoré de Balzac,Illusions perdues,1837,"Social life and customs, Translations into Chi..."
88,George Sand,Indiana,1832,"Fiction, Man-woman relationships, Marriage, Wo..."
79,Émile Zola,Au bonheur des dames,1883,"Department stores, Fiction, French fiction, Li..."
50,Jean-Baptiste Louvet de Couvray,Accusation intentée dans la convention nation...,1792,"Trials, litigation, Trials (Political crimes a..."
42,Abbé Prévost,Histoire générale des voyages ou Nouvelle coll...,1746,"Voyages and travels, Voyages, Description and ..."
118,Félicien Champsaur,Masques modernes,1889,"Social life and customs, Popular culture, Thea..."
7,Montesquieu,De l'esprit des lois,1748,"Derecho, Filosofía, Jurisprudence, great_books..."
72,Gustave Flaubert,Salammbô,1863,"Fiction, History, Continental european fiction..."
32,Bernardin de Saint-Pierre,Voyage à l'île de France,1775,"Early works to 1800, Description and travel, F..."
71,Gustave Flaubert,L'Éducation sentimentale,1898,"Fiction, Young men, History, Married women, Un..."


In [49]:
# The goal is now to clean the themes to only keep relevant themes, and to translate them into French
# To translate themes, we use the API Lingva

url = "https://lingva.ml/api/v1/"
source = 'auto/' # auto as some themes may be written in another language than English
dest = 'fr/'

for i in range(len(df_books['Themes'])):
    url_api_transl = url + source + dest + df_books['Themes'][i]
    response = requests.get(url_api_transl)
    if response.status_code == 200:
        translated_themes = response.json().get("translation", "")
        df_books.loc[i,'Themes'] = translated_themes
    else:
        pass


In [30]:
df_books.sample(20)


Unnamed: 0,Author,Title,Year,Themes
10,Voltaire,Candide,1746,"Conduite de vie, Lecteurs, Littérature françai..."
166,Eugène Sue,Juif errant,1844,"Jésuites, Juif errant, Fiction, Illustrations,..."
148,Charles Baudelaire,Nouvelles histoires extraordinaires (Berenice ...,1857,"nouvelles, aristocratie, contes d'horreur amér..."
8,Montesquieu,De l'esprit des lois,1748,"Droit, Philosophie, Jurisprudence, grands_livr..."
3,Jean de La Fontaine,Contes et nouvelles en vers,1685,"Traductions en anglais, Traductions en occitan..."
81,Alexandre Dumas,Les Trois Mousquetaires,1844,"Histoire, Fiction, Épéistes, La France dans la..."
15,Jean-Jacques Rousseau,Emile or Education,1762,"Éducation, Ouvrages de jeunesse jusqu'à 1800, ..."
152,Edmond de Goncourt,La femme au dix-huitieme siecle,1862,"Questions sociales et morales, Femmes, Salons,..."
60,Honoré de Balzac,Cousin Pons,1875,"Vie sociale et mœurs, Fiction en espagnol, Fic..."
48,Jean-Baptiste Louvet de Couvray,"Discours de Jean-Baptiste Louvet, sur la guerre",1792,"Politique et gouvernement, Premiers travaux ju..."


In [51]:
!pip install langdetect

Collecting langdetect
  Downloading langdetect-1.0.9.tar.gz (981 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m981.5/981.5 kB[0m [31m26.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Building wheels for collected packages: langdetect
  Building wheel for langdetect (setup.py) ... [?25ldone
[?25h  Created wheel for langdetect: filename=langdetect-1.0.9-py3-none-any.whl size=993222 sha256=08c4b253689c442c6442151bdbb46a012ffb439f937b9bd4e5d1912d328f39ef
  Stored in directory: /home/onyxia/.cache/pip/wheels/c1/67/88/e844b5b022812e15a52e4eaa38a1e709e99f06f6639d7e3ba7
Successfully built langdetect
Installing collected packages: langdetect
Successfully installed langdetect-1.0.9


In [53]:
#TODO : corriger les thèmes non traduits
#TODO : corriger les noms en anglais
#TODO : enlever les mots en trop dans les titres : ex: 67 : Dumas

# The goal is now to clean the themes to only keep relevant themes, and to translate them into French
# To translate themes, we use the API Lingva
from langdetect import detect

count = 0
for i in range(len(df_books['Title'])):    
    if detect(df_books['Title'][i]) != "fr":
        count += 1
    else :
        pass

count

46