In [1]:
import pymongo
import pandas as pd
import numpy as np
# MongoDB connection details
client = pymongo.MongoClient("mongodb://localhost:27017/")  # MongoDB URI
db = client["ais_training_data"]  # Database name
collection = db["ais_data_relative_week_summary"]  # Collection name

# Fetch data from MongoDB
data = list(collection.find())  # Get all documents from the collection

# Convert the data to a Pandas DataFrame
df = pd.DataFrame(data)


df = df.drop(columns=['_id'])
# Get a list of ship types excluding 'Undefined'
ship_types = df['Ship_Type'][df['Ship_Type'] != 'Undefined'].unique()

df['Ship_Type'] = df['Ship_Type'].apply(lambda x: np.random.choice(ship_types) if x == 'Undefined' else x)
df=df.groupby(['YearWeek', 'Ship_Type', 'Destination',"CN"], as_index=False)['TotalCount'].sum()


In [5]:
df1=df["CN"].unique()

In [8]:
df_unique = pd.DataFrame(df1, columns=['CN'])

In [10]:
df_unique.to_csv("ship_compnay_name.csv")

In [2]:
df

Unnamed: 0,YearWeek,Ship_Type,Destination,CN,TotalCount
0,2023-01,Anti-pollution,Brondby,NCC ROADS,1
1,2023-01,Anti-pollution,Brunsbuttel,WESSELS REEDEREI GMBH & CO KG,1
2,2023-01,Anti-pollution,Donso,HELLMAN HBO,5
3,2023-01,Anti-pollution,Fredericia,MARINE PROJECTS ROTTERDAM BV,5
4,2023-01,Anti-pollution,Frederiksberg,SAMSOE KOMMUNE,6
...,...,...,...,...,...
87013,2023-43,WIG,Struer,SAMSOE KOMMUNE,3
87014,2023-43,WIG,Trelleborg,ANGFARTYGSAKTIEBOLAGET 1879,3
87015,2023-43,WIG,Tuborg Havn,SAMSOE KOMMUNE,3
87016,2023-43,WIG,Visby,ANGFARTYGSAKTIEBOLAGET 1879,4


# RandomForestClassifier

In [None]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, MultiLabelBinarizer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.multioutput import MultiOutputClassifier
from sklearn.metrics import classification_report, hamming_loss, accuracy_score
import joblib


data=df
# Split 'YearWeek' into 'Year' and 'Week'
data[['Year', 'Week']] = data['YearWeek'].str.split('-', expand=True)
data['Year'] = data['Year'].astype(int)
data['Week'] = data['Week'].astype(int)
data = data.drop('YearWeek', axis=1)

# Group by 'Year', 'Week', and 'CN' to aggregate Destinations
grouped_data = data.groupby(['Year', 'Week', 'CN'])['Destination'].apply(list).reset_index()

# Remove duplicate Destinations
grouped_data['Destination'] = grouped_data['Destination'].apply(lambda x: list(set(x)))

# Initialize MultiLabelBinarizer
mlb = MultiLabelBinarizer()

# Fit and transform the 'Destination' column
y = mlb.fit_transform(grouped_data['Destination'])
destination_classes = mlb.classes_

# Prepare features
X = grouped_data[['Year', 'Week', 'CN']]

# Define categorical and numerical features
categorical_features = ['CN']
numerical_features = ['Year', 'Week']

# Define the ColumnTransformer for preprocessing
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features),
        ('num', 'passthrough', numerical_features)
    ])



# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)


# Define the Model Pipeline


# Define the base classifier
base_classifier = RandomForestClassifier(n_estimators=100, random_state=42)

# Wrap the classifier with MultiOutputClassifier
multi_label_classifier = MultiOutputClassifier(base_classifier, n_jobs=-1)

# Create the pipeline
model_pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', multi_label_classifier)
])


# Train the model pipeline
model_pipeline.fit(X_train, y_train)



# Predict on the test set
y_pred = model_pipeline.predict(X_test)

# Classification Report
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=destination_classes))

# Hamming Loss
hl = hamming_loss(y_test, y_pred)
print(f"Hamming Loss: {hl:.4f}")

# Exact Match Ratio
exact_match = np.all(y_test == y_pred, axis=1).mean()
print(f"Exact Match Ratio: {exact_match:.4f}")



# Define filenames
model_filename = 'multi_label_shipping_model.joblib'
mlb_filename = 'destination_mlb.joblib'

# Save the model pipeline
joblib.dump(model_pipeline, model_filename)

# Save the MultiLabelBinarizer
joblib.dump(mlb, mlb_filename)

print(f"Model saved to {model_filename}")
print(f"MultiLabelBinarizer saved to {mlb_filename}")



# Load the model pipeline
loaded_model = joblib.load('multi_label_shipping_model.joblib')

# Load the MultiLabelBinarizer
loaded_mlb = joblib.load('destination_mlb.joblib')

print("Model and MultiLabelBinarizer loaded successfully.")


def predict_destinations(year_week, cn, model, mlb):
    """
    Predict destinations based on YearWeek and CN.
    
    Parameters:
    - year_week (str): Format 'YYYY-WW', e.g., '2024-05'
    - cn (str): Company Name
    
    Returns:
    - List of predicted destinations
    """
    # Split YearWeek
    year, week = map(int, year_week.split('-'))
    
    # Create a DataFrame for the input
    input_df = pd.DataFrame({
        'Year': [year],
        'Week': [week],
        'CN': [cn]
    })
    
    # Predict
    predicted = model.predict(input_df)
    
    # Convert predictions to destination names
    destinations = mlb.inverse_transform(predicted)
    
    # Return the first (and only) prediction
    return destinations[0]

# Example usage
year_week_input = '2023-05'
cn_input = 'WIG'  

predicted_destinations = predict_destinations(year_week_input, cn_input, loaded_model, loaded_mlb)
print(f"Predicted Destinations for YearWeek {year_week_input} and CN '{cn_input}': {predicted_destinations}")


Classification Report:
                         precision    recall  f1-score   support

               Aabenraa       0.37      0.52      0.43        33
                Aalborg       0.56      0.43      0.48        82
                 Aarhus       0.44      0.47      0.45       121
               Aberdeen       0.36      0.29      0.32        17
            Aeroskobing       0.76      0.86      0.81        29
               Agnefest       1.00      0.55      0.71        11
                   Ahus       0.30      0.48      0.37        21
               Akrehamn       0.50      1.00      0.67         1
                    Ala       0.00      0.00      0.00         0
                Alcudia       0.00      0.00      0.00         0
                Alkmaar       0.00      0.00      0.00         0
                Allinge       0.87      0.76      0.81        17
              Amsterdam       0.60      0.33      0.43         9
             Anstruther       0.00      0.00      0.00         2
 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Hamming Loss: 0.0043
Exact Match Ratio: 0.4126
Model saved to multi_label_shipping_model.joblib
MultiLabelBinarizer saved to destination_mlb.joblib
Model and MultiLabelBinarizer loaded successfully.
Predicted Destinations for YearWeek 2024-05 and CN 'WIG': ()


In [24]:

year_week_input = '2023-05'
cn_input = 'COMBI LIFT GMBH'
predicted_destinations = predict_destinations(year_week_input, cn_input, loaded_model, loaded_mlb)
print(f"Predicted Destinations for YearWeek {year_week_input} and CN '{cn_input}': {predicted_destinations}")

Predicted Destinations for YearWeek 2023-05 and CN 'COMBI LIFT GMBH': ('BRUNSBUETTEL', 'Bremerhaven', 'Brunsbuttel', 'Busum', 'Butzfleth', 'Cuxhaven', 'Eckernforde', 'Gluckstadt', 'Hamburg', 'Hamburg-Harburg', 'Heiligenhafen', 'Holtenau', 'Husum', 'Itzehoe', 'KIEL', 'Kappeln', 'Kiel', 'LUBECK', 'Laboe', 'Neuhaus', 'Neustadt/Holstein', 'Orth', 'Rendsburg', 'Rostock', 'Sassnitz', 'Stade', 'Toenning', 'Wilhelmshaven')


# RandomForestClassifier with  RandomForestRegressor without shiptype

In [26]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, MultiLabelBinarizer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.multioutput import MultiOutputClassifier
from sklearn.metrics import classification_report, hamming_loss, mean_absolute_error, mean_squared_error, r2_score
import joblib


# Load data
data = df

# Split 'YearWeek' into 'Year' and 'Week'
data[['Year', 'Week']] = data['YearWeek'].str.split('-', expand=True)
data['Year'] = data['Year'].astype(int)
data['Week'] = data['Week'].astype(int)
data = data.drop('YearWeek', axis=1)

# Group by 'Year', 'Week', and 'CN' to aggregate Destinations and TotalCounts
grouped_data = data.groupby(['Year', 'Week', 'CN']).agg({
    'Destination': list,
    'TotalCount': list
}).reset_index()

# Explode the grouped_data to create a regression dataset
regression_data = grouped_data.explode(['Destination', 'TotalCount']).reset_index(drop=True)

# Initialize MultiLabelBinarizer for classification
mlb = MultiLabelBinarizer()
y_class = mlb.fit_transform(grouped_data['Destination'])
destination_classes = mlb.classes_

# Features for classification
X_class = grouped_data[['Year', 'Week', 'CN']]

# Features and target for regression
X_reg = regression_data[['Year', 'Week', 'CN', 'Destination']]
y_reg = regression_data['TotalCount']


# Classification Split
X_train_class, X_test_class, y_train_class, y_test_class = train_test_split(
    X_class, y_class, test_size=0.2, random_state=42)

# Regression Split
X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(
    X_reg, y_reg, test_size=0.2, random_state=42)



# Classification Preprocessing
categorical_features_class = ['CN']
numerical_features_class = ['Year', 'Week']

preprocessor_class = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features_class),
        ('num', 'passthrough', numerical_features_class)
    ])

# Regression Preprocessing
categorical_features_reg = ['CN', 'Destination']
numerical_features_reg = ['Year', 'Week']

preprocessor_reg = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features_reg),
        ('num', 'passthrough', numerical_features_reg)
    ])

# Classification Pipeline
base_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
multi_label_classifier = MultiOutputClassifier(base_classifier, n_jobs=-1)
pipeline_class = Pipeline(steps=[
    ('preprocessor', preprocessor_class),
    ('classifier', multi_label_classifier)
])

# Regression Pipeline
pipeline_reg = Pipeline(steps=[
    ('preprocessor', preprocessor_reg),
    ('regressor', RandomForestRegressor(n_estimators=100, random_state=42))
])


# Train Classification Model
pipeline_class.fit(X_train_class, y_train_class)

# Train Regression Model
pipeline_reg.fit(X_train_reg, y_train_reg)



# Predict on test set
y_pred_class = pipeline_class.predict(X_test_class)

# Classification Report
print("Classification Report:")
print(classification_report(y_test_class, y_pred_class, target_names=destination_classes))

# Hamming Loss
hl = hamming_loss(y_test_class, y_pred_class)
print(f"Hamming Loss: {hl:.4f}")



# Predict on test set
y_pred_reg = pipeline_reg.predict(X_test_reg)

# Calculate evaluation metrics
mae = mean_absolute_error(y_test_reg, y_pred_reg)
mse = mean_squared_error(y_test_reg, y_pred_reg)
rmse = np.sqrt(mse)
r2 = r2_score(y_test_reg, y_pred_reg)

print("\nRegression Model Evaluation Metrics:")
print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")
print(f"R^2 Score: {r2:.2f}")



# Define filenames
model_class_filename = 'multi_label_classification_model_new.joblib'
model_reg_filename = 'regression_model_new.joblib'
mlb_filename = 'destination_mlb_new.joblib'

# Save the classification pipeline
joblib.dump(pipeline_class, model_class_filename)
print(f"\nClassification model saved to {model_class_filename}")

# Save the regression pipeline
joblib.dump(pipeline_reg, model_reg_filename)
print(f"Regression model saved to {model_reg_filename}")

# Save the MultiLabelBinarizer
joblib.dump(mlb, mlb_filename)
print(f"MultiLabelBinarizer saved to {mlb_filename}")


Classification Report:


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


                         precision    recall  f1-score   support

               Aabenraa       0.37      0.52      0.43        33
                Aalborg       0.56      0.43      0.48        82
                 Aarhus       0.44      0.47      0.45       121
               Aberdeen       0.36      0.29      0.32        17
            Aeroskobing       0.76      0.86      0.81        29
               Agnefest       1.00      0.55      0.71        11
                   Ahus       0.30      0.48      0.37        21
               Akrehamn       0.50      1.00      0.67         1
                    Ala       0.00      0.00      0.00         0
                Alcudia       0.00      0.00      0.00         0
                Alkmaar       0.00      0.00      0.00         0
                Allinge       0.87      0.76      0.81        17
              Amsterdam       0.60      0.33      0.43         9
             Anstruther       0.00      0.00      0.00         2
              Antwerpen 

In [None]:
# Load the models and encoders
loaded_pipeline_class = joblib.load('/home/talal/fyp/predictive model/multi_label_classification_model_new.joblib')
loaded_pipeline_reg = joblib.load('/home/talal/fyp/predictive model/regression_model_new.joblib')
loaded_mlb = joblib.load('/home/talal/fyp/predictive model/destination_mlb_new.joblib')
    

In [15]:
import pandas as pd
import joblib

import pandas as pd

def predict_destinations_and_counts(year_week, cn, class_model, reg_model, mlb):
    """
    Predict Destinations and their TotalCounts based on YearWeek and CN.
    
    Parameters:
    - year_week (str): Format 'YYYY-WW', e.g., '2024-05'
    - cn (str): Company Name
    
    Returns:
    - DataFrame: A DataFrame with 'Destination' and 'TotalCount' columns
    """
    # Split YearWeek
    try:
        year, week = map(int, year_week.split('-'))
    except ValueError:
        raise ValueError("year_week should be in 'YYYY-WW' format, e.g., '2024-05'")
    
    # Create a DataFrame for classification input
    input_class = pd.DataFrame({
        'Year': [year],
        'Week': [week],
        'CN': [cn]
    })
    
    # Predict Destinations (binary matrix)
    predicted_class = class_model.predict(input_class)
    
    # Convert binary matrix to list of destinations
    predicted_destinations = mlb.inverse_transform(predicted_class)
    
    # If no destinations predicted
    if not predicted_destinations or not predicted_destinations[0]:
        return pd.DataFrame(columns=['Destination', 'TotalCount'])
    
    # Extract destinations (since we have only one input)
    destinations = predicted_destinations[0]
    
    # Prepare DataFrame for regression input
    input_reg = pd.DataFrame({
        'Year': [year] * len(destinations),
        'Week': [week] * len(destinations),
        'CN': [cn] * len(destinations),
        'Destination': list(destinations)
    })
    
    # Predict TotalCount for each destination
    predicted_counts = reg_model.predict(input_reg)
    
    # Combine destinations with their predicted counts
    results = pd.DataFrame({
        'Destination': destinations,
        'TotalCount': predicted_counts.astype(int)
    })
    
    return results


if __name__ == "__main__":

    print("Models and encoders loaded successfully.")
    
    # Example Inputs
    year_week_input = '2024-05'
    cn_input = 'COMBI LIFT GMBH'  # Example Company Name
    
    # Predict Destinations and TotalCounts
    predicted_results = predict_destinations_and_counts(
        year_week_input,
        cn_input,
        loaded_pipeline_class,
        loaded_pipeline_reg,
        loaded_mlb
    )
    
predicted_results


Models and encoders loaded successfully.


Unnamed: 0,Destination,TotalCount
0,BRUNSBUETTEL,1
1,Bremerhaven,2
2,Brunsbuttel,2
3,Busum,4
4,Butzfleth,2
5,Cuxhaven,4
6,Eckernforde,2
7,Gluckstadt,3
8,Hamburg,3
9,Hamburg-Harburg,1


In [14]:
predicted_results

[('BRUNSBUETTEL', 1),
 ('Bremerhaven', 2),
 ('Brunsbuttel', 2),
 ('Busum', 4),
 ('Butzfleth', 2),
 ('Cuxhaven', 4),
 ('Eckernforde', 2),
 ('Gluckstadt', 3),
 ('Hamburg', 3),
 ('Hamburg-Harburg', 1),
 ('Heiligenhafen', 4),
 ('Holtenau', 3),
 ('Husum', 3),
 ('Itzehoe', 1),
 ('KIEL', 3),
 ('Kappeln', 4),
 ('Kiel', 3),
 ('LUBECK', 2),
 ('Laboe', 2),
 ('Neuhaus', 1),
 ('Neustadt/Holstein', 3),
 ('Orth', 2),
 ('Rendsburg', 4),
 ('Rostock', 4),
 ('Sassnitz', 3),
 ('Stade', 2),
 ('Toenning', 4),
 ('Wilhelmshaven', 1)]

# RandomForestClassifier with  RandomForestRegressor with shiptype

In [None]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, MultiLabelBinarizer
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestClassifier
from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import classification_report, hamming_loss, jaccard_score
from sklearn.pipeline import Pipeline
import joblib



# Load data
data = df  

# Split 'YearWeek' into 'Year' and 'Week'
data[['Year', 'Week']] = data['YearWeek'].str.split('-', expand=True)
data['Year'] = data['Year'].astype(int)
data['Week'] = data['Week'].astype(int)
data = data.drop('YearWeek', axis=1)

# Group by Year, Week, Ship_Type, CN and aggregate Destinations
grouped_data = data.groupby(['Year', 'Week', 'Ship_Type', 'CN'])['Destination'].apply(list).reset_index()
grouped_data['Destination'] = grouped_data['Destination'].apply(lambda x: list(set(x))) 

# Define feature columns and target
feature_columns = ['Year', 'Week', 'Ship_Type', 'CN']
target_column = 'Destination'

X = grouped_data[feature_columns]
y = grouped_data[target_column]

# Binarize the target
mlb = MultiLabelBinarizer()
y_binarized = mlb.fit_transform(y)
destination_classes = mlb.classes_

print(f"Number of Destinations: {len(destination_classes)}")



X_train, X_test, y_train, y_test = train_test_split(
    X, y_binarized, test_size=0.2, random_state=42)


categorical_features = ['Ship_Type', 'CN']
numerical_features = ['Year', 'Week']

preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features),
        ('num', 'passthrough', numerical_features)
    ])

classifier = OneVsRestClassifier(RandomForestClassifier(n_estimators=100, random_state=42))

model_pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', classifier)
])



model_pipeline.fit(X_train, y_train)


y_pred = model_pipeline.predict(X_test)

# Hamming Loss
hl = hamming_loss(y_test, y_pred)
print(f"Hamming Loss: {hl:.4f}")

# Jaccard Score
js = jaccard_score(y_test, y_pred, average='samples')
print(f"Jaccard Score: {js:.4f}")

# Classification Report
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=destination_classes))


model_filename = 'models/multi_label_shipping_model.joblib'
mlb_filename = 'models/multi_label_binarizer.joblib'

joblib.dump(model_pipeline, model_filename)
joblib.dump(mlb, mlb_filename)

print(f"Model saved to {model_filename}")
print(f"MultiLabelBinarizer saved to {mlb_filename}")



loaded_model = joblib.load(model_filename)
loaded_mlb = joblib.load(mlb_filename)

print("Model and MultiLabelBinarizer loaded successfully.")



def predict_destinations(yearweek, ship_type, cn, model, mlb):
    """
    Predict destinations based on YearWeek, Ship_Type, and CN.

    Parameters:
    - yearweek (str): e.g., '2024-05'
    - ship_type (str): e.g., 'WIG'
    - cn (str): Company Name, e.g., 'NORDIC MARINE OIL AS'

    Returns:
    - List of predicted destinations
    """
    # Split YearWeek
    year, week = map(int, yearweek.split('-'))
    
    # Create DataFrame
    input_df = pd.DataFrame({
        'Year': [year],
        'Week': [week],
        'Ship_Type': [ship_type],
        'CN': [cn]
    })
    
    # Predict
    pred = model.predict(input_df)
    
    # Inverse transform to get destination names
    destinations = mlb.inverse_transform(pred)
    
    # Return the list of destinations
    return destinations[0] if destinations else []


new_yearweek = '2024-05'
new_ship_type = 'WIG'
new_cn = 'NORDIC MARINE OIL AS'

predicted_destinations = predict_destinations(new_yearweek, new_ship_type, new_cn, loaded_model, loaded_mlb)
print(f"Predicted Destinations: {predicted_destinations}")


Number of Destinations: 412




Hamming Loss: 0.0034
Jaccard Score: 0.3508
Classification Report:
                         precision    recall  f1-score   support

               Aabenraa       0.76      0.49      0.60        86
                Aalborg       0.58      0.59      0.59       134
                 Aarhus       0.67      0.57      0.62       257
               Aberdeen       0.50      0.06      0.11        17
            Aeroskobing       0.75      0.63      0.69        73
               Agnefest       0.17      0.12      0.14         8
                   Ahus       0.24      0.19      0.21        43
               Akrehamn       0.00      0.00      0.00         0
                    Ala       0.00      0.00      0.00         0
                Alcudia       0.00      0.00      0.00         0
                Alkmaar       0.00      0.00      0.00         0
                Allinge       0.75      0.68      0.71        53
              Amsterdam       0.00      0.00      0.00        16
             Anstruther

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


FileNotFoundError: [Errno 2] No such file or directory: 'models/multi_label_shipping_model.joblib'

In [9]:
model_filename = 'multi_label_shipping_model.joblib'
mlb_filename = 'multi_label_binarizer.joblib'

joblib.dump(model_pipeline, model_filename)
joblib.dump(mlb, mlb_filename)

['multi_label_binarizer.joblib']

In [None]:
model_filename = '/home/talal/fyp/predictive model/multi_label_shipping_model.joblib'
mlb_filename = '/home/talal/fyp/predictive model/multi_label_binarizer.joblib'
loaded_model = joblib.load(model_filename)
loaded_mlb = joblib.load(mlb_filename)

In [18]:


print("Model and MultiLabelBinarizer loaded successfully.")



def predict_destinations(yearweek, ship_type, cn, model, mlb):
    """
    Predict destinations based on YearWeek, Ship_Type, and CN.

    Parameters:
    - yearweek (str): e.g., '2024-05'
    - ship_type (str): e.g., 'WIG'
    - cn (str): Company Name, e.g., 'NORDIC MARINE OIL AS'

    Returns:
    - List of predicted destinations
    """
    # Split YearWeek
    year, week = map(int, yearweek.split('-'))
    
    # Create DataFrame
    input_df = pd.DataFrame({
        'Year': [year],
        'Week': [week],
        'Ship_Type': [ship_type],
        'CN': [cn]
    })
    
    # Predict
    pred = model.predict(input_df)
    
    # Inverse transform to get destination names
    destinations = mlb.inverse_transform(pred)
    
    # Return the list of destinations
    return destinations[0] if destinations else []

new_yearweek = '2024-52'
new_ship_type = 'WIG'
new_cn = 'Akrehamn'

predicted_destinations = predict_destinations(new_yearweek, new_ship_type, new_cn, loaded_model, loaded_mlb)
print(f"Predicted Destinations: {predicted_destinations}")


Model and MultiLabelBinarizer loaded successfully.
Predicted Destinations: ()
