In [7]:
# Import de la bibliothèque essentielle pour la manipulation de données
import pandas as pd

# On définit des noms de colonnes clairs
column_names = ['sentiment', 'id', 'date', 'query', 'user', 'text']

# Chargement des données dans un "DataFrame"
df = pd.read_csv("./input/training.1600000.processed.noemoticon.csv", names=column_names, encoding="latin-1")

# On remplace la valeur 4 par 1 dans la colonne 'sentiment' pour la rendre binaire (0 ou 1)
df['sentiment'] = df['sentiment'].replace(4, 1)

# Affichage des 5 premières lignes pour un premier aperçu
print("Aperçu des 5 premières lignes du tableau de données :")
print(df.head())

Aperçu des 5 premières lignes du tableau de données :
   sentiment          id                          date     query  \
0          0  1467810369  Mon Apr 06 22:19:45 PDT 2009  NO_QUERY   
1          0  1467810672  Mon Apr 06 22:19:49 PDT 2009  NO_QUERY   
2          0  1467810917  Mon Apr 06 22:19:53 PDT 2009  NO_QUERY   
3          0  1467811184  Mon Apr 06 22:19:57 PDT 2009  NO_QUERY   
4          0  1467811193  Mon Apr 06 22:19:57 PDT 2009  NO_QUERY   

              user                                               text  
0  _TheSpecialOne_  @switchfoot http://twitpic.com/2y1zl - Awww, t...  
1    scotthamilton  is upset that he can't update his Facebook by ...  
2         mattycus  @Kenichan I dived many times for the ball. Man...  
3          ElleCTF    my whole body feels itchy and like its on fire   
4           Karoli  @nationwideclass no, it's not behaving at all....  


In [8]:
import mlflow
import mlflow.sklearn
import mlflow.transformers

# Configuration de l'URI de suivi pour MLflow
mlflow.set_tracking_uri("file:/mlflow")

# Définition du nom de l'expérience
experiment_name = "Analyse de Sentiments - Twitter"
mlflow.set_experiment(experiment_name)

print(f"MLflow configuré pour l'expérience: '{experiment_name}'")

MLflow configuré pour l'expérience: 'Analyse de Sentiments - Twitter'


In [9]:
df.describe()

Unnamed: 0,sentiment,id
count,1600000.0,1600000.0
mean,0.5,1998818000.0
std,0.5,193576100.0
min,0.0,1467810000.0
25%,0.0,1956916000.0
50%,0.5,2002102000.0
75%,1.0,2177059000.0
max,1.0,2329206000.0


In [11]:
# Reduce the DataFrame to 16000 elements, stratified by column 0
n_samples = 16000

# Sample 8000 rows where sentiment is 0 (negative sentiment)
df_neg = df[df['sentiment'] == 0].sample(n=n_samples // 2, random_state=42)

# Sample 8000 rows where sentiment is 1 (positive sentiment)
df_pos = df[df['sentiment'] == 1].sample(n=n_samples // 2, random_state=42)

# Concatenate the two dataframes
df_truncated_stratified = pd.concat([df_neg, df_pos])

# Shuffle the truncated DataFrame
df_truncated_stratified = df_truncated_stratified.sample(frac=1, random_state=42).reset_index(drop=True)

# Select only the columns we need (sentiment and text)
df_prepared = df_truncated_stratified[['sentiment', 'text']]


# Display the shape of the truncated and stratified DataFrame
print("Shape of truncated and stratified DataFrame:")
print(df_prepared.shape)

# Display the first few rows of the truncated and stratified DataFrame
print("\nAperçu du DataFrame tronqué et stratifié:")
display(df_prepared.head())

# Check the value counts of the sentiment column to confirm stratification
print("\nValue counts of sentiment in truncated and stratified DataFrame:")
print(df_prepared['sentiment'].value_counts())

Shape of truncated and stratified DataFrame:
(16000, 2)

Aperçu du DataFrame tronqué et stratifié:


Unnamed: 0,sentiment,text
0,1,@pbadstibner I have good balance..used to do m...
1,0,@gtissa Still having issue and it's GDI!!! The...
2,0,@Chrismorris528 Sigh. In 3 hours. It sucks to ...
3,0,@HelloEli exacly
4,1,In fairness. He smells good.



Value counts of sentiment in truncated and stratified DataFrame:
sentiment
1    8000
0    8000
Name: count, dtype: int64


In [None]:
df = df_prepared
# 1. Obtenir les dimensions du tableau (nombre de lignes, nombre de colonnes)
print(f"Dimensions du tableau : {df.shape}")
print("-" * 30)

# 2. Obtenir un résumé des informations (types de données, valeurs non nulles)
print("Informations sur le DataFrame :")
df.info()
print("-" * 30)

# 3. Compter le nombre de tweets pour chaque sentiment
print("Distribution des sentiments :")
print(df['sentiment'].value_counts())

Dimensions du tableau : (1600000, 6)
------------------------------
Informations sur le DataFrame :
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1600000 entries, 0 to 1599999
Data columns (total 6 columns):
 #   Column     Non-Null Count    Dtype 
---  ------     --------------    ----- 
 0   sentiment  1600000 non-null  int64 
 1   id         1600000 non-null  int64 
 2   date       1600000 non-null  object
 3   query      1600000 non-null  object
 4   user       1600000 non-null  object
 5   text       1600000 non-null  object
dtypes: int64(2), object(4)
memory usage: 73.2+ MB
------------------------------
Distribution des sentiments :
sentiment
0    800000
1    800000
Name: count, dtype: int64


In [13]:
import re
import string

def clean_text(text):
    text = re.sub(r'https://\S+|www\.\S+', '', text)
    text = re.sub(r'@\w+', '', text)
    text = re.sub(r'#', '', text)
    text = text.lower()
    text = re.sub(f'[{re.escape(string.punctuation)}]', '', text) 
    return text

df['cleaned_text'] = df['text'].apply(clean_text)

print("Original text:  ", df['text'].iloc[0])
print("Cleaned text:   ", df['cleaned_text'].iloc[0])

Original text:   @switchfoot http://twitpic.com/2y1zl - Awww, that's a bummer.  You shoulda got David Carr of Third Day to do it. ;D
Cleaned text:     httptwitpiccom2y1zl  awww thats a bummer  you shoulda got david carr of third day to do it d


In [None]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords

nltk.download('punkt')
nltk.download('stopwords')

stop_words = set(stopwords.words('english'))

df['tokens'] = df['cleaned_text'].apply(lambda text: [word for word in word_tokenize(text) if word not in stop_words])

print("Cleaned text:", df['cleaned_text'].iloc[0])
print("Final tokens:", df['tokens'].iloc[0])

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Moi\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Moi\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

df['final_text'] = df['tokens'].apply(lambda tokens: ' '.join(tokens))

vectorizer = TfidfVectorizer(max_features=5000)

X = vectorizer.fit_transform(df['final_text'])

y = df['sentiment']

print("Shape of our feature matrix X:", X.shape)
print("Shape of our target vector y:", y.shape)

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

print(f"Training data shape: {X_train.shape}")
print(f"Testing data shape: {X_test.shape}")

In [None]:
from sklearn.metrics import accuracy_score, classification_report
from sklearn.linear_model import LogisticRegression

with mlflow.start_run(run_name="Logistic Regression (TF-IDF)") as run:
    print("Démarrage de l'expérimentation avec MLflow pour Logistic Regression...")
    
    model_lr = LogisticRegression(max_iter=1000)
    print("Training the model...")
    model_lr.fit(X_train, y_train)
    print("Model training is complete!")

    print("Making predictions on the test data...")
    y_pred = model_lr.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred, target_names=['Non-négatif (0)', 'Négatif (1)'], output_dict=True)
    
    print(f"\nOverall Accuracy: {accuracy:.2f}")
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred, target_names=['Non-négatif (0)', 'Négatif (1)']))

    mlflow.log_param("model_type", "LogisticRegression")
    mlflow.log_param("vectorizer", "TfidfVectorizer")
    mlflow.log_param("tfidf_max_features", 5000)
    mlflow.log_metric("accuracy", accuracy)

    cleaned_report = {}
    for key, value in report.items():
        if isinstance(value, dict):
            cleaned_key = key.replace(' (0)', '_0').replace(' (1)', '_1').replace('-', '_')
            cleaned_report[cleaned_key] = value
        else:
            cleaned_report[key] = value
    mlflow.log_metrics({f"precision_{k}": v['precision'] for k, v in cleaned_report.items() if isinstance(v, dict)})
    mlflow.log_metrics({f"recall_{k}": v['recall'] for k, v in cleaned_report.items() if isinstance(v, dict)})
    mlflow.log_metrics({f"f1_score_{k}": v['f1-score'] for k, v in cleaned_report.items() if isinstance(v, dict)})
    mlflow.log_text(classification_report(y_test, y_pred, target_names=['Non-négatif (0)', 'Négatif (1)']), "classification_report.txt")
    
    from mlflow.models.signature import infer_signature
    signature = infer_signature(X_train, y_pred)
    input_example = X_train[:5]
    mlflow.sklearn.log_model(model_lr, "logistic_regression_model", signature=signature, input_example=input_example)
    
    print("\nModèle et métriques enregistrés dans MLflow.")

In [None]:
# Import necessary libraries for the GloVe model
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, SpatialDropout1D
from sklearn.model_selection import train_test_split
import mlflow
import mlflow.keras
from sklearn.metrics import classification_report, accuracy_score
import os

with mlflow.start_run(run_name="GloVe_LSTM") as run:
    print("Starting GloVe LSTM experiment with MLflow...")

    # --- 1. Data Preparation for GloVe ---
    glove_file_path = r'C:\formations\ingénieur ia\projet 7\dev_bis\sentimentanalysis\input\glove.6B\glove.6B.100d.txt'
    embedding_dim = 100
    max_features = 10000
    max_len = 100

    tokenizer_glove = Tokenizer(num_words=max_features, split=' ')
    tokenizer_glove.fit_on_texts(df['tokens'].values)
    X_glove = tokenizer_glove.texts_to_sequences(df['tokens'].values)
    X_glove = pad_sequences(X_glove, maxlen=max_len)
    y_glove = df['sentiment'].values

    X_train_glove, X_test_glove, y_train_glove, y_test_glove = train_test_split(X_glove, y_glove, test_size=0.2, random_state=42)

    print(f"Training data shape: {X_train_glove.shape}")
    print(f"Testing data shape: {X_test_glove.shape}")

    # --- 2. Load GloVe Embeddings ---
    embeddings_index = {}
    try:
        with open(glove_file_path, encoding='utf-8') as f:
            for line in f:
                values = line.split()
                word = values[0]
                coefs = np.asarray(values[1:], dtype='float32')
                embeddings_index[word] = coefs
    except FileNotFoundError:
        print(f"GloVe file not found at {glove_file_path}. Please check the path.")
        mlflow.end_run()

    # --- 3. Create Embedding Matrix ---
    word_index = tokenizer_glove.word_index
    embedding_matrix = np.zeros((len(word_index) + 1, embedding_dim))
    for word, i in word_index.items():
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:
            embedding_matrix[i] = embedding_vector

    # --- 4. Build the LSTM Model ---
    model_glove = Sequential()
    model_glove.add(Embedding(len(word_index) + 1,
                              embedding_dim,
                              weights=[embedding_matrix],
                              input_length=max_len,
                              trainable=False))
    model_glove.add(SpatialDropout1D(0.2))
    model_glove.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
    model_glove.add(Dense(1, activation='sigmoid'))

    model_glove.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    print(model_glove.summary())

    # --- 5. Train the Model ---
    epochs = 3
    batch_size = 64

    history = model_glove.fit(X_train_glove, y_train_glove,
                              epochs=epochs,
                              batch_size=batch_size,
                              validation_data=(X_test_glove, y_test_glove),
                              verbose=1)

    # --- 6. Evaluate and Log with MLflow ---
    y_pred_glove_proba = model_glove.predict(X_test_glove)
    y_pred_glove = (y_pred_glove_proba > 0.5).astype("int32")

    accuracy_glove = accuracy_score(y_test_glove, y_pred_glove)
    report_glove = classification_report(y_test_glove, y_pred_glove, target_names=['Non-négatif (0)', 'Négatif (1)'], output_dict=True)

    print(f"\nOverall Accuracy (GloVe): {accuracy_glove:.2f}")
    print("\nClassification Report (GloVe):")
    print(classification_report(y_test_glove, y_pred_glove, target_names=['Non-négatif (0)', 'Négatif (1)']))

    mlflow.log_param("model_type", "GloVe_LSTM")
    mlflow.log_param("embedding_dim", embedding_dim)
    mlflow.log_param("max_features", max_features)
    mlflow.log_param("max_len", max_len)
    mlflow.log_param("epochs", epochs)
    mlflow.log_param("batch_size", batch_size)

    mlflow.log_metric("accuracy", accuracy_glove)
    
    cleaned_report_glove = {}
    for key, value in report_glove.items():
        if isinstance(value, dict):
            cleaned_key = key.replace(' (0)', '_0').replace(' (1)', '_1').replace('-', '_')
            cleaned_report_glove[cleaned_key] = value
        else:
            cleaned_report_glove[key] = value

    mlflow.log_metrics({f"precision_{k}": v['precision'] for k, v in cleaned_report_glove.items() if isinstance(v, dict)})
    mlflow.log_metrics({f"recall_{k}": v['recall'] for k, v in cleaned_report_glove.items() if isinstance(v, dict)})
    mlflow.log_metrics({f"f1_score_{k}": v['f1-score'] for k, v in cleaned_report_glove.items() if isinstance(v, dict)})
    
    mlflow.log_text(classification_report(y_test_glove, y_pred_glove, target_names=['Non-négatif (0)', 'Négatif (1)']), "classification_report.txt")

    mlflow.keras.log_model(model_glove, "glove_lstm_model")

    print("\nGloVe LSTM model and metrics logged to MLflow.")

In [None]:
# We'll use the same powerful libraries as before
import os

# Disable the progress bar from huggingface_hub to avoid LookupError in some notebook environments
os.environ["HF_HUB_DISABLE_PROGRESS_BARS"] = "1"

from transformers import AutoTokenizer, AutoModelForSequenceClassification

# The correct, English-based model name
model_name = "distilbert-base-uncased"

# 1. Load the Tokenizer for DistilBERT
tokenizer = AutoTokenizer.from_pretrained(model_name)

# 2. Load the DistilBERT model for sequence classification (with 2 labels)
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

print("DistilBERT model and its tokenizer are loaded and ready!")

In [None]:
# A sample tweet from our dataset
sample_text = "i love the new flight attendant service"

# Use the tokenizer to process the text
encoding = tokenizer(sample_text, 
                     padding='max_length', 
                     truncation=True, 
                     max_length=64, 
                     return_tensors='pt')

print("Original Sentence:\n", sample_text)
print("\nToken IDs (input_ids):\n", encoding['input_ids'])
print("\nAttention Mask:\n", encoding['attention_mask'])

In [None]:
import torch
from torch.utils.data import Dataset

class SentimentDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = str(self.texts[idx])
        label = int(self.labels[idx])

        encoding = self.tokenizer(
            text,
            add_special_tokens=True,
            max_length=self.max_len,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt',
        )

        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(label, dtype=torch.long)
        }

sample_df = df.sample(n=16000, random_state=42)

MAX_LEN = 64
sentiment_dataset = SentimentDataset(
    texts=sample_df.text.to_numpy(),
    labels=sample_df.sentiment.to_numpy(),
    tokenizer=tokenizer,
    max_len=MAX_LEN
)

In [None]:
from torch.utils.data import DataLoader, random_split
import numpy as np

train_size = int(0.8 * len(sentiment_dataset))
test_size = len(sentiment_dataset) - train_size
train_dataset, test_dataset = random_split(sentiment_dataset, [train_size, test_size])

BATCH_SIZE = 16

train_data_loader = DataLoader(
  train_dataset,
  batch_size=BATCH_SIZE,
  shuffle=True
)

test_data_loader = DataLoader(
  test_dataset,
  batch_size=BATCH_SIZE
)

print(f"Number of batches in the training loader: {len(train_data_loader)}")
print(f"Number of batches in the testing loader: {len(test_data_loader)}")

In [None]:
from torch.optim import AdamW
from tqdm.auto import tqdm
import torch
from sklearn.metrics import classification_report, accuracy_score
import numpy as np

history = {
    'train_loss': [],
    'accuracy': []
}

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

optimizer = AdamW(model.parameters(), lr=2e-5)

EPOCHS = 6

with mlflow.start_run(run_name="DistilBERT") as run:
    mlflow.log_params({"model_name": model_name, "epochs": EPOCHS, "learning_rate": 2e-5, "batch_size": BATCH_SIZE, "max_len": MAX_LEN})
    
    for epoch in range(EPOCHS):
        print(f'--- Epoch {epoch + 1} / {EPOCHS} ---')
        
        model.train()
        total_train_loss = 0

        for batch in tqdm(train_data_loader, desc="Training"):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            optimizer.zero_grad()
            
            outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss
            
            loss.backward()
            total_train_loss += loss.item()
            
            optimizer.step()

        avg_train_loss = total_train_loss / len(train_data_loader)
        history['train_loss'].append(avg_train_loss)

        mlflow.log_metric("train_loss", avg_train_loss, step=epoch)

        print("Evaluating...")
        model.eval()
    
    predictions, true_labels = [], []
    
    with torch.no_grad():
        for batch in tqdm(test_data_loader, desc="Evaluating"):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            
            logits = outputs.logits
            batch_predictions = torch.argmax(logits, dim=1)
            
            predictions.extend(batch_predictions.cpu().numpy())
            true_labels.extend(labels.cpu().numpy())

    val_accuracy = accuracy_score(true_labels, predictions)
    history['accuracy'].append(val_accuracy)
    
    mlflow.log_metric("accuracy", val_accuracy, step=epoch)

    report = classification_report(true_labels, predictions, target_names=['Non-négatif (0)', 'Négatif (1)'], output_dict=True)
    print(f"\nRésultats pour l'Epoch {epoch + 1}:")
    print(report)

    mlflow.log_param("model_type", "DistilBERT")
    mlflow.log_metric("accuracy", accuracy)

    cleaned_report = {}
    for key, value in report.items():
        if isinstance(value, dict):
            cleaned_key = key.replace(' (0)', '_0').replace(' (1)', '_1').replace('-', '_')
            cleaned_report[cleaned_key] = value
        else:
            cleaned_report[key] = value

    mlflow.log_metrics({f"precision_{k}": v['precision'] for k, v in cleaned_report.items() if isinstance(v, dict)})
    mlflow.log_metrics({f"recall_{k}": v['recall'] for k, v in cleaned_report.items() if isinstance(v, dict)})
    mlflow.log_metrics({f"f1_score_{k}": v['f1-score'] for k, v in cleaned_report.items() if isinstance(v, dict)})

    mlflow.log_text(classification_report(true_labels, predictions, target_names=['Non-négatif (0)', 'Négatif (1)']), "classification_report_distilbert.txt")

    print(f"Logged DistilBERT metrics to MLflow (run_id={run.info.run_id}) - accuracy={accuracy:.4f}")

    if epoch == EPOCHS - 1:
        model_artifact = {"model": model, "tokenizer": tokenizer}
        mlflow.transformers.log_model(
            transformers_model=model_artifact,
            name="distilbert_sentiment_model",
            task="text-classification"
        )

print("\nEntraînement terminé et modèle sauvegardé avec MLflow !")

In [None]:
import matplotlib.pyplot as plt

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))

ax1.plot(history['train_loss'], label='Perte d entraînement')
ax1.set_title('Évolution de la Perte')
ax1.set_xlabel('Épochs')
ax1.set_ylabel('Perte')
ax1.legend()

ax2.plot(history['accuracy'], label='Précision de validation', color='orange')
ax2.set_title('Évolution de la Précision')
ax2.set_xlabel('Épochs')
ax2.set_ylabel('Précision')
ax2.legend()

plt.show()

In [None]:
import os

output_dir = "./sentiment_distilbert_model/"

os.makedirs(output_dir, exist_ok=True)

print(f"Sauvegarde du modèle dans le répertoire {output_dir}")

model.save_pretrained(output_dir)

tokenizer.save_pretrained(output_dir)

print("\nModèle et tokenizer sauvegardés avec succès !")

In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

output_dir = "./sentiment_distilbert_model/"

tokenizer = AutoTokenizer.from_pretrained(output_dir)

model = AutoModelForSequenceClassification.from_pretrained(output_dir)

model.eval()

new_tweet = "The flight was surprisingly on time and the crew was wonderful."

inputs = tokenizer(new_tweet, return_tensors="pt", truncation=True, padding=True)

with torch.no_grad():
    outputs = model(**inputs)

logits = outputs.logits
prediction = torch.argmax(logits, dim=1).item()

sentiments = ['Négatif (0)', 'Positif (1)']
print(f"Le tweet : '{new_tweet}'")
print(f"Result: {prediction}")
print(f"Prédiction : {sentiments[prediction]}")

## 5. Chargement et Service du Modèle avec MLflow

Maintenant que nous avons entraîné et sauvegardé nos modèles avec MLflow, voyons comment les réutiliser. Nous allons charger le modèle DistilBERT (qui est le plus performant) depuis le tracking server de MLflow et l'utiliser pour faire une prédiction. Ensuite, nous verrons comment le déployer en tant que service local.

In [None]:
import mlflow
import pandas as pd

runs = mlflow.search_runs(experiment_names=[experiment_name], order_by=["start_time DESC"], max_results=1)
last_run_id = runs.iloc[0]['run_id']

model_uri = f"runs:/{{last_run_id}}/distilbert_sentiment_model"

loaded_model = mlflow.pyfunc.load_model(model_uri)

new_tweet = "This is the worst flight experience I have ever had."
prediction_data = pd.DataFrame([new_tweet], columns=["text"])

prediction = loaded_model.predict(prediction_data)

sentiments = ['Négatif (0)', 'Positif (1)']
print(f"Tweet: '{{new_tweet}}'")
print(f"Prédiction : {sentiments[prediction[0]]}")

### Démarrer un serveur de prédiction local

MLflow facilite également le déploiement d'un modèle en tant que service REST API. Pour démarrer un serveur local qui sert notre modèle DistilBERT, vous pouvez exécuter la commande suivante dans votre terminal (en vous assurant que l'environnement où MLflow est installé est activé) :

```bash
# La variable MLFLOW_TRACKING_URI doit pointer vers votre serveur de suivi
export MLFLOW_TRACKING_URI=file:/home/samuel/mlflow_data

# Servez le modèle à partir de son URI
mlflow models serve -m "runs:/<RUN_ID>/distilbert_sentiment_model" -p 1234
```

Remplacez `<RUN_ID>` par l'ID de l'exécution que vous souhaitez servir (par exemple, celui que nous avons récupéré dans la cellule précédente). Une fois le serveur démarré, vous pouvez lui envoyer des requêtes POST pour obtenir des prédictions, par exemple avec `curl` :

```bash
curl -X POST -H "Content-Type:application/json" --data '{"dataframe_split": {"columns":["text"], "data":[[ "I love MLflow!"]]}}' http://127.0.0.1:1234/invocations
```

Cela conclut notre tour d'horizon de l'intégration de MLflow pour le suivi, la gestion et le déploiement de modèles.