In [2]:
import joblib

# Load the trained components
model = joblib.load('incident_predictor_model.pkl')
preprocessor = joblib.load('incident_preprocessor.pkl')
label_encoders = joblib.load('incident_label_encoders.pkl')

In [8]:
import pandas as pd

df_new = pd.read_excel("2019 Incident data.xlsx")

df_new.head()

Unnamed: 0,CBU / BS,Reporting Site,Date Occured,Title,Sub Type,Description,Type of Injury,Kind of event hazard,Actions taken
0,CBU MOROCCO,CBU MOROCCO | MAR - CASABLANCA | SAL,2019-12-27,27-12-2019 / LTA (Lost Time Accident) / MAR - ...,LTA (Lost Time Accident),"Dans une intersection, en voulant éviter un cy...",-,Vehicle - 2 or 3 wheelers accident,Alerte du LD Etablissement d’un PV de constatP...
1,CBU MOROCCO,CBU MOROCCO | MAR - KENITRA 2 | FAR,2019-12-24,24-12-2019 / LTA (Lost Time Accident) / MAR - ...,LTA (Lost Time Accident),While treating a sick cow on the ground. The c...,-,Falling objects,We asked for the presence of the farm's nurse ...
2,CBU INDONESIA,CBU INDONESIA | IDN - BOGOR 5 | FAC,2019-12-22,22-12-2019 / NLTA - RWC (Non lost time acciden...,"NLTA - RWC (Non lost time accident, Restricted...","saat sebelum menyapu merapihkan botol ke lori,...",-,Hit by a vehicle,ya … jari tangan dijahit
3,"CBU TURKIYE, CENTRAL ASIA AND CAUCASUS","CBU TURKIYE, CENTRAL ASIA AND CAUCASUS | TUR -...",2019-12-20,20-12-2019 / NLTA - MTC (Non lost time acciden...,"NLTA - MTC (Non lost time accident, Medical tr...",Maintanence technician took Aciplusfoam chemic...,-,Thermal (direct or fluid projection) hazards,No Data Imported
4,"CBU TURKIYE, CENTRAL ASIA AND CAUCASUS","CBU TURKIYE, CENTRAL ASIA AND CAUCASUS | TUR -...",2019-12-20,20-12-2019 / LTA (Lost Time Accident) / TUR - ...,LTA (Lost Time Accident),HOD operator intervened jammed bottles at cap ...,-,Falling objects,No Data Imported


In [14]:
from deep_translator import GoogleTranslator
from langdetect import detect
from tqdm.notebook import tqdm

# Columns to translate
columns_to_translate = ['Title', 'Description', 'Actions taken']

# Translate each column
for col in columns_to_translate:
    print(f"🔤 Translating column: '{col}' (skipping English rows)")
    translated = []

    for text in tqdm(df_new[col], desc=f"Translating {col}"):
        if pd.isnull(text):
            translated.append(text)
        else:
            try:
                lang = detect(str(text))
                if lang == 'en':
                    translated.append(text)  # Skip English
                else:
                    translated_text = GoogleTranslator(source='auto', target='en').translate(str(text))
                    translated.append(translated_text)
            except Exception as e:
                print(f"⚠️ Error with text: {text}\n{e}")
                translated.append(text)

    df_new[col + '_EN'] = translated
    print(f"✅ Done translating '{col}'. New column: '{col}_EN'")

# Save the translated Excel file
output_file = 'translated_file_2019.xlsx'
df_new.to_excel(output_file, index=False)
print(f"💾 All translations complete! Saved to: {output_file}")

🔤 Translating column: 'Title' (skipping English rows)


Translating Title:   0%|          | 0/146 [00:00<?, ?it/s]

✅ Done translating 'Title'. New column: 'Title_EN'
🔤 Translating column: 'Description' (skipping English rows)


Translating Description:   0%|          | 0/146 [00:00<?, ?it/s]

✅ Done translating 'Description'. New column: 'Description_EN'
🔤 Translating column: 'Actions taken' (skipping English rows)


Translating Actions taken:   0%|          | 0/146 [00:00<?, ?it/s]

⚠️ Error with text: -
No features in text.
✅ Done translating 'Actions taken'. New column: 'Actions taken_EN'
💾 All translations complete! Saved to: translated_file_2019.xlsx


In [16]:
# Rebuild full_text and time features
df_new['full_text'] = (
    df_new['Title_EN'].fillna('') + ' ' +
    df_new['Description_EN'].fillna('') + ' ' +
    df_new['Actions taken_EN'].fillna('')
)

df_new['Date Occured'] = pd.to_datetime(df_new['Date Occured'], errors='coerce')
df_new['month'] = df_new['Date Occured'].dt.month
df_new['weekday'] = df_new['Date Occured'].dt.weekday

# Prepare inputs
X_new = df_new[['Reporting Site', 'Sub Type', 'month', 'weekday', 'full_text']]
X_new_transformed = preprocessor.transform(X_new)

# Predict
y_new_pred = model.predict(X_new_transformed)

In [18]:
# Convert predictions to DataFrame
y_new_df = pd.DataFrame(y_new_pred, columns=['Type of Injury', 'Kind of event hazard'])

# Decode labels
for col in y_new_df.columns:
    y_new_df[col] = label_encoders[col].inverse_transform(y_new_df[col])

# Combine with original
df_result = pd.concat([df_new.reset_index(drop=True), y_new_df], axis=1)

In [20]:
df_result.to_excel("Incident Predictions.xlsx", index=False)
print("✅ Saved: Incident Predictions.xlsx")

✅ Saved: Incident Predictions.xlsx


In [13]:
from deep_translator import GoogleTranslator
from langdetect import detect
from tqdm.notebook import tqdm

# Load model components
model = joblib.load('incident_predictor_model.pkl')
preprocessor = joblib.load('incident_preprocessor.pkl')
label_encoders = joblib.load('incident_label_encoders.pkl')

df_automation_step = pd.read_excel('2019 Incident data.xlsx')

# Columns to translate
columns_to_translate = ['Title', 'Description', 'Actions taken']

# Translate each column
for col in columns_to_translate:
    print(f"🔤 Translating column: '{col}' (skipping English rows)")
    translated = []

    for text in tqdm(df_automation_step[col], desc=f"Translating {col}"):
        if pd.isnull(text):
            translated.append(text)
        else:
            try:
                lang = detect(str(text))
                if lang == 'en':
                    translated.append(text)  # Skip English
                else:
                    translated_text = GoogleTranslator(source='auto', target='en').translate(str(text))
                    translated.append(translated_text)
            except Exception as e:
                print(f"⚠️ Error with text: {text}\n{e}")
                translated.append(text)

    df_automation_step[col + '_EN'] = translated
    print(f"✅ Done translating '{col}'. New column: '{col}_EN'")

# Create NLP column
df_automation_step['full_text'] = (
    df_automation_step['Title_EN'].fillna('') + ' ' +
    df_automation_step['Description_EN'].fillna('') + ' ' +
    df_automation_step['Actions taken_EN'].fillna('')
)

# Time features
df_automation_step['Date Occured'] = pd.to_datetime(df_automation_step['Date Occured'], errors='coerce')
df_automation_step['month'] = df_automation_step['Date Occured'].dt.month
df_automation_step['weekday'] = df_automation_step['Date Occured'].dt.weekday

# Prepare features
X_new = df_automation_step[['Reporting Site', 'Sub Type', 'month', 'weekday', 'full_text']]
X_new_transformed = preprocessor.transform(X_new)

# Predict
y_pred = model.predict(X_new_transformed)
y_pred_df = pd.DataFrame(y_pred, columns=['Type of Injury', 'Kind of event hazard'])

# Decode
for col in y_pred_df.columns:
    y_pred_df[col] = label_encoders[col].inverse_transform(y_pred_df[col])

# Merge predictions
df_result = pd.concat([df_automation_step.reset_index(drop=True), y_pred_df], axis=1)

# Save output
df_result.to_excel("Incident Predictions 2.xlsx", index=False)
print("✅ Saved: Incident Predictions 2.xlsx")

🔤 Translating column: 'Title' (skipping English rows)


Translating Title:   0%|          | 0/146 [00:00<?, ?it/s]

✅ Done translating 'Title'. New column: 'Title_EN'
🔤 Translating column: 'Description' (skipping English rows)


Translating Description:   0%|          | 0/146 [00:00<?, ?it/s]

✅ Done translating 'Description'. New column: 'Description_EN'
🔤 Translating column: 'Actions taken' (skipping English rows)


Translating Actions taken:   0%|          | 0/146 [00:00<?, ?it/s]

⚠️ Error with text: -
No features in text.
✅ Done translating 'Actions taken'. New column: 'Actions taken_EN'
✅ Saved: Incident Predictions 2.xlsx


In [33]:
import pickle

file_path = 'incident_label_encoders.pkl'

# Load and print the data
with open(file_path, 'rb') as f:
    data = pickle.load(f)

# Display the loaded data
print("Data loaded from the .pkl file:")
display(data)

Data loaded from the .pkl file:


array(['-', 'Abrasion/Irritation', 'Amputation',
       'Asphyxiation, suffocation', 'Bruising / Contusion',
       'Burn (2nd, 3rd, 4th degree)', 'Concussion',
       'Contact / Exposure (chemicals)', 'Crush',
       'Cut / Laceration / Wound', 'Dislocation',
       'Foreign body (ingressed)', 'Fractures', 'Inflammation',
       'Irritation', 'Loss of consciousness', 'Multi Injury',
       'Musculoskeletal Disorder', 'Other', 'Pain',
       'Scald / Burn (1st degree)', 'Shock', 'Strain / Sprain'],
      dtype=object)

In [35]:
import joblib
import pandas as pd

# Load predicted data (with numeric predictions)
df = pd.read_excel("Incident Predictions 2.xlsx")

# Load saved label encoders
label_encoders = joblib.load("incident_label_encoders.pkl")

# Ensure prediction columns are integers
df['Predicted Kind of event hazard'] = df['Predicted Kind of event hazard'].astype(int)
df['Predicted Type of Injury'] = df['Predicted Type of Injury'].astype(int)

# Decode predicted hazard and injury
df['Predicted Kind of event hazard'] = label_encoders['Kind of event hazard'].inverse_transform(
    df['Predicted Kind of event hazard']
)
df['Predicted Type of Injury'] = label_encoders['Type of Injury'].inverse_transform(
    df['Predicted Type of Injury']
)

# Save the decoded predictions
df.to_excel("Incident Predictions 2_decoded.xlsx", index=False)
print("Decoded file saved successfully.")

Decoded file saved successfully.
