In [12]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.datasets import load_wine
import pandas as pd

In [20]:
def quantize(data, n_bits):
    # 2 bits = 4 niveaux (00, 01, 10, 11)
    n_levels = 2**n_bits
    
    # Normaliser les données entre 0 et n_levels-1
    min_vals = np.min(data, axis=0)
    max_vals = np.max(data, axis=0)
    normalized = (data - min_vals) / (max_vals - min_vals) * (n_levels-1)
    
    # Quantifier en valeurs entières de 0 à n_levels-1
    quantized = np.round(normalized).astype(int)
    quantized = np.clip(quantized, 0, n_levels-1)
    
    return quantized

def export_data(data, filename):
    df = pd.DataFrame(data)
    df.to_csv(filename, header=False, index=False)
    print(f"Données exportées dans {filename}")

def process_dataset_from_sklearn(dataset_name, n_bits):
    if dataset_name == "iris":
        data = load_iris()
        output_file = f"data/iris_{n_bits}bits.csv"
    elif dataset_name == "wine":
        data = load_wine() 
        output_file = f"data/wine_{n_bits}bits.csv"
    else:
        raise ValueError("Dataset must be 'iris' or 'wine'")
    
    print(f"Dataset : {dataset_name}")
    print(f"Number of features: {data.data.shape[1]}")
    print(f"Number of classes: {len(np.unique(data.target))}")
    print("---")
    
    X = data.data
    X_quantized = quantize(X, n_bits)
    
    # Combine features and target into one array before exporting
    full_data = np.column_stack((X_quantized, data.target))
    export_data(full_data, output_file)



In [22]:

process_dataset_from_sklearn("iris", 2)

Dataset : iris
Number of features: 4
Number of classes: 3
---
Données exportées dans data/iris_2bits.csv


In [24]:
process_dataset_from_sklearn("wine", 4)

Dataset : wine
Number of features: 13
Number of classes: 3
---
Données exportées dans data/wine_4bits.csv
