In [36]:
from sklearn import datasets # pour importer le dataset iris
import pandas as pd # pour la transformation en DataFrame
import re

In [17]:
# Chargement du Dataset Iris

iris = datasets.load_iris()
print([key for key in iris])
print(iris['feature_names'])
print(iris['target_names'])

['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename', 'data_module']
['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
['setosa' 'versicolor' 'virginica']


In [18]:
# Iris DataFrame
iris_df = pd.DataFrame(iris['data'], columns=iris['feature_names'])

# Ajouter une colonne 'label' au DataFrame
iris_df['label'] = iris['target']

# Transformer les numéros de labels en leurs valeurs textuelles
iris_df['label'] = iris_df['label'].map({i:label for i, label in enumerate(iris['target_names'])})

# Résultat
iris_df

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),label
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,virginica
146,6.3,2.5,5.0,1.9,virginica
147,6.5,3.0,5.2,2.0,virginica
148,6.2,3.4,5.4,2.3,virginica


In [25]:
# Brouillon

list(iris_df.columns)

['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)',
 'label']

In [82]:
def text_type_select(value, feature, select=0) :
    """ Retourne un type de texte selon la séléction
    0. <value> <feature>
    1. <value> <unit> of <name> (avec feature = (<name>, <unit>))

    Args:
        value (float): la valeur du feature
        feature (str): le nom de feature
        select (int, optional): Type de texte à séléctionner. Defaults to 0.

    Returns:
        str: Texte retourné
    """
    if select == 0 :
        return str(value) + " " + str(feature)
    if select == 1 :
        name, unit = feature
        return str(value) + " " + str(unit) + " of " + str(name)


def df_row_to_text(df:pd.DataFrame, row_num, label_num=-1, subject_name = "subject", has_unit=False) :
    """ Pour une ligne d'un DataFrame, génère un texte expliquant la ligne

    Args:
        `df` (pd.DataFrame): Le DataFrame
        `row_num` (int): La ligne du DataFrame dont on veut générer la description en texte
        `label_num` (int, optional): La colonne contenant le label. Defaults to -1 (la dernière colonne).
        `subject_name` (str, optional): Le nom du sujet pour l'affichage. Defaults to "subject".
        `has_unit` (bool, optional): _description_. Defaults to True.

    Returns:
        str: Le texte généré
    """

    # features
    values = [i for i in df.iloc[row_num]]
    feature_names = list(df.columns)

    # label
    label_value = values.pop(label_num)
    feature_names.pop(label_num)

    # features name - unit
    if has_unit :
        regex_unit_pattern = r'([\w ]+)\s+\((\w+)\)$'
        feature_names = [re.search(regex_unit_pattern, feature).groups() for feature in feature_names] 
        has_unit_vector = [True if len(tuple) == 2 else False for tuple in feature_names]
    else :
        has_unit_vector = [False for _ in feature_names]

    # generate text type
    value_feature_text_list = [text_type_select(value, feature, select=1) if has_unit_vector[i] \
                               else text_type_select(value, feature, select=0) \
                               for i, (value, feature) in enumerate(zip(values, feature_names))]

    # generate text
    text = "The " + subject_name + " with "
    for vf in value_feature_text_list[:-1] :
        text += vf + ", "
    text = text[:-2]
    text += " and " + value_feature_text_list[-1]
    text += " is a " + str(label_value) # a / an 

    return text

print(df_row_to_text(iris_df, 145, subject_name="flower", has_unit=False))
print(df_row_to_text(iris_df, 145, subject_name="flower", has_unit=True))

The flower with 6.7 sepal length (cm), 3.0 sepal width (cm), 5.2 petal length (cm) and 2.3 petal width (cm) is a virginica
The flower with 6.7 cm of sepal length, 3.0 cm of sepal width, 5.2 cm of petal length and 2.3 cm of petal width is a virginica
