<a href="https://colab.research.google.com/github/jpbeaud/language/blob/main/incidents.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
import chardet
# Détection de l'encodage du fichier
with open("incident.txt", "rb") as f:
    result = chardet.detect(f.read())
    encoding = result['encoding']
# Chargement du fichier CSV
df = pd.read_csv("incident.txt", encoding=encoding, sep=None, engine='python')
# Identification des colonnes pertinentes
desc_col = [col for col in df.columns if "Description" in col][0]
group_col = [col for col in df.columns if "résolution" in col or "resolution" in col][0]
# Nettoyage des données
df = df[[desc_col, group_col]].dropna()
# Encodage des labels
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(df[group_col])
# Vectorisation des descriptions
vectorizer = TfidfVectorizer(max_features=5000)
X = vectorizer.fit_transform(df[desc_col])
# Séparation des données
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Entraînement du modèle
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
# Évaluation
y_pred = model.predict(X_test)
print(classification_report(
    y_test, y_pred,
    labels=list(set(y_test)),
    target_names=label_encoder.inverse_transform(list(set(y_test)))
))

