In [1]:
import pandas as pd
import numpy as np
from openai import OpenAI
from sklearn.linear_model import Ridge
from sklearn.multioutput import MultiOutputRegressor
from tqdm import tqdm

# === 1. CONFIGURATION ===
API_KEY = "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"  # Replace with your OpenAI API key
openai_model = "text-embedding-3-large"

client = OpenAI(api_key=API_KEY)

# === 2. LOAD AND PREPARE TRAINING DATA ===
df_train = pd.read_excel("test01.xlsx")

# Combine fields into one input string
df_train["text"] = (
    df_train["Systeme"].astype(str) + " | " +
    df_train["Description"].astype(str) + " | " +
    df_train["Description de l'√©quipement"].astype(str)
)

# Drop rows with missing target scores
df_train = df_train.dropna(subset=["Fiabilit√© Int√©grit√©", "Disponibilt√©", "Process Safety"])

X_train_text = df_train["text"].tolist()
y_train = df_train[["Fiabilit√© Int√©grit√©", "Disponibilt√©", "Process Safety"]].values


# === 3. EMBEDDING FUNCTION ===
def get_embedding(text: str, model: str = openai_model) -> list:
    try:
        response = client.embeddings.create(input=[text], model=model)
        return response.data[0].embedding
    except Exception as e:
        print(f"Error embedding text: {text[:50]}... -> {e}")
        return [0.0] * 1536  # fallback in case of failure


# === 4. EMBED TRAINING TEXTS ===
print("üîÑ Embedding training data...")
X_train_embedded = np.array([get_embedding(text) for text in tqdm(X_train_text)])


# === 5. TRAIN MODEL ===
print("üß† Training regression model...")
model = MultiOutputRegressor(Ridge())
model.fit(X_train_embedded, y_train)




üîÑ Embedding training data...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 5998/5998 [30:28<00:00,  3.28it/s]  


üß† Training regression model...


0,1,2
,estimator,Ridge()
,n_jobs,

0,1,2
,alpha,1.0
,fit_intercept,True
,copy_X,True
,max_iter,
,tol,0.0001
,solver,'auto'
,positive,False
,random_state,


In [6]:
# === 6. LOAD AND EMBED NEW ANOMALIES ===
df_new = pd.read_excel("test01wfi.xlsx")

df_new["text"] = (
    df_new["Systeme"].astype(str) + " | " +
    df_new["Description"].astype(str) + " | " +
    df_new["Description de l'√©quipement"].astype(str)
)

X_new_text = df_new["text"].tolist()

print("üîÑ Embedding new anomaly data...")



üîÑ Embedding new anomaly data...


In [None]:
X_new_embedded = np.array([get_embedding(text) for text in tqdm(X_new_text)])


# === 7. PREDICT AND FORMAT RESULTS ===
print("üßÆ Predicting scores...")
preds = model.predict(X_new_embedded)
preds_df = pd.DataFrame(preds, columns=["Fiabilit√© Int√©grit√©", "Disponibilt√©", "Process Safety"])
preds_df = preds_df.round().clip(1, 5).astype(int)
# preds_df = preds_df.clip(lower=1.0, upper=5.0)

# Combine with original input
df_result = pd.concat([df_new, preds_df], axis=1)

# === 8. SAVE TO FILE ===
output_file = "scored_anomalies_openai.xlsx"
df_result.to_excel(output_file, index=False)
print(f"‚úÖ Done! Results saved to {output_file}")
display(df_result.head(20))

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 20/20 [00:06<00:00,  2.89it/s]

üßÆ Predicting scores...
‚úÖ Done! Results saved to scored_anomalies_openai.xlsx





Unnamed: 0,Num_equipement,Systeme,Description,Description de l'√©quipement,text,Fiabilit√© Int√©grit√©,Disponibilt√©,Process Safety
0,3f68d058-b105-46f0-b137-6ce7695b85d1,e4f67f5f-be46-4ce0-ad14-68575d3cf0cd,Soufflage avec de l'air des radiateur de refro...,TRANSFO PRINCIPAL,e4f67f5f-be46-4ce0-ad14-68575d3cf0cd | Souffla...,2,4,1
1,3f68d058-b105-46f0-b137-6ce7695b85d1,e4f67f5f-be46-4ce0-ad14-68575d3cf0cd,Bruit moto-ventilateur transformateur principa...,TRANSFO PRINCIPAL,e4f67f5f-be46-4ce0-ad14-68575d3cf0cd | Bruit m...,3,4,1
2,3f68d058-b105-46f0-b137-6ce7695b85d1,e4f67f5f-be46-4ce0-ad14-68575d3cf0cd,manque √©clairage transformateur principal,TRANSFO PRINCIPAL,e4f67f5f-be46-4ce0-ad14-68575d3cf0cd | manque ...,2,3,3
3,3f68d058-b105-46f0-b137-6ce7695b85d1,e4f67f5f-be46-4ce0-ad14-68575d3cf0cd,Arr√™t 7 jours U3 AVR 2021: Entretien pr√©ventif...,TRANSFO PRINCIPAL,e4f67f5f-be46-4ce0-ad14-68575d3cf0cd | Arr√™t 7...,2,3,2
4,3f68d058-b105-46f0-b137-6ce7695b85d1,e4f67f5f-be46-4ce0-ad14-68575d3cf0cd,D√©gagement des portes grillage haut suite risq...,TRANSFO PRINCIPAL,e4f67f5f-be46-4ce0-ad14-68575d3cf0cd | D√©gagem...,2,3,4
5,3f68d058-b105-46f0-b137-6ce7695b85d1,e4f67f5f-be46-4ce0-ad14-68575d3cf0cd,pr√©voir changer le silicagel du transformateur...,TRANSFO PRINCIPAL,e4f67f5f-be46-4ce0-ad14-68575d3cf0cd | pr√©voir...,2,3,2
6,3f68d058-b105-46f0-b137-6ce7695b85d1,e4f67f5f-be46-4ce0-ad14-68575d3cf0cd,netoyage et souflage des radiateurs du transfo...,TRANSFO PRINCIPAL,e4f67f5f-be46-4ce0-ad14-68575d3cf0cd | netoyag...,2,4,1
7,3f68d058-b105-46f0-b137-6ce7695b85d1,e4f67f5f-be46-4ce0-ad14-68575d3cf0cd,Pr√©voir la remise en √©tat porte transformateur...,TRANSFO PRINCIPAL,e4f67f5f-be46-4ce0-ad14-68575d3cf0cd | Pr√©voir...,2,3,2
8,3f68d058-b105-46f0-b137-6ce7695b85d1,e4f67f5f-be46-4ce0-ad14-68575d3cf0cd,Pr√©sence trace d'huile au niveau sortie borne ...,TRANSFO PRINCIPAL,e4f67f5f-be46-4ce0-ad14-68575d3cf0cd | Pr√©senc...,2,4,2
9,3f68d058-b105-46f0-b137-6ce7695b85d1,e4f67f5f-be46-4ce0-ad14-68575d3cf0cd,pr√©voir le contr√¥le des indicateur temp√©rature...,TRANSFO PRINCIPAL,e4f67f5f-be46-4ce0-ad14-68575d3cf0cd | pr√©voir...,2,3,1


In [8]:

display(df_result.head(20))

Unnamed: 0,Num_equipement,Systeme,Description,Description de l'√©quipement,text,Fiabilit√© Int√©grit√©,Disponibilt√©,Process Safety
0,3f68d058-b105-46f0-b137-6ce7695b85d1,e4f67f5f-be46-4ce0-ad14-68575d3cf0cd,Soufflage avec de l'air des radiateur de refro...,TRANSFO PRINCIPAL,e4f67f5f-be46-4ce0-ad14-68575d3cf0cd | Souffla...,2,4,1
1,3f68d058-b105-46f0-b137-6ce7695b85d1,e4f67f5f-be46-4ce0-ad14-68575d3cf0cd,Bruit moto-ventilateur transformateur principa...,TRANSFO PRINCIPAL,e4f67f5f-be46-4ce0-ad14-68575d3cf0cd | Bruit m...,3,4,1
2,3f68d058-b105-46f0-b137-6ce7695b85d1,e4f67f5f-be46-4ce0-ad14-68575d3cf0cd,manque √©clairage transformateur principal,TRANSFO PRINCIPAL,e4f67f5f-be46-4ce0-ad14-68575d3cf0cd | manque ...,2,3,3
3,3f68d058-b105-46f0-b137-6ce7695b85d1,e4f67f5f-be46-4ce0-ad14-68575d3cf0cd,Arr√™t 7 jours U3 AVR 2021: Entretien pr√©ventif...,TRANSFO PRINCIPAL,e4f67f5f-be46-4ce0-ad14-68575d3cf0cd | Arr√™t 7...,2,3,2
4,3f68d058-b105-46f0-b137-6ce7695b85d1,e4f67f5f-be46-4ce0-ad14-68575d3cf0cd,D√©gagement des portes grillage haut suite risq...,TRANSFO PRINCIPAL,e4f67f5f-be46-4ce0-ad14-68575d3cf0cd | D√©gagem...,2,3,4
5,3f68d058-b105-46f0-b137-6ce7695b85d1,e4f67f5f-be46-4ce0-ad14-68575d3cf0cd,pr√©voir changer le silicagel du transformateur...,TRANSFO PRINCIPAL,e4f67f5f-be46-4ce0-ad14-68575d3cf0cd | pr√©voir...,2,3,2
6,3f68d058-b105-46f0-b137-6ce7695b85d1,e4f67f5f-be46-4ce0-ad14-68575d3cf0cd,netoyage et souflage des radiateurs du transfo...,TRANSFO PRINCIPAL,e4f67f5f-be46-4ce0-ad14-68575d3cf0cd | netoyag...,2,4,1
7,3f68d058-b105-46f0-b137-6ce7695b85d1,e4f67f5f-be46-4ce0-ad14-68575d3cf0cd,Pr√©voir la remise en √©tat porte transformateur...,TRANSFO PRINCIPAL,e4f67f5f-be46-4ce0-ad14-68575d3cf0cd | Pr√©voir...,2,3,2
8,3f68d058-b105-46f0-b137-6ce7695b85d1,e4f67f5f-be46-4ce0-ad14-68575d3cf0cd,Pr√©sence trace d'huile au niveau sortie borne ...,TRANSFO PRINCIPAL,e4f67f5f-be46-4ce0-ad14-68575d3cf0cd | Pr√©senc...,2,4,2
9,3f68d058-b105-46f0-b137-6ce7695b85d1,e4f67f5f-be46-4ce0-ad14-68575d3cf0cd,pr√©voir le contr√¥le des indicateur temp√©rature...,TRANSFO PRINCIPAL,e4f67f5f-be46-4ce0-ad14-68575d3cf0cd | pr√©voir...,2,3,1
