In [None]:
# === Librairies pour la manipulation et l'analyse de données ===
import os
from pathlib import Path
import pandas as pd
from datetime import datetime

# === Librairies pour la visualisation ===
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns

# === Librairies pour l'affichage interactif (Jupyter/IPython) ===
from IPython.display import display

pd.set_option('display.max_columns', None)
pd.set_option("display.width", None)


In [2]:
user = os.getlogin().lower()
print(f"Detected user: {user}")

# Use the default path only if the username is 'rabah'
if user == "rabah":
    file_path = r"A:\OneDrive - Sopex London Ltd\Shared\19. Shipments - Sopex\New_Freight_file_V1.xlsx"
    
else:
    # Fallback logic for other users
    file_path = ""

Detected user: rabah


In [None]:
df= pd.read_excel(file_path, sheet_name="Freight")

display(df.head())
print(df.columns.tolist())

freight= df[['Port POL', 'Country POD', 'Port POD', 
             'Freight', 'Freight_Currency', 'Surcharge', 'Surcharge_Currency', 
             'Freight All In', 'Extra', 'Unit', 'Extra_Currency', 'Terminal POL', 
             'Shipping Line', 
             'Shipping Line2', 'Free Time POL', 
             'Free Time POD', 'Validity', 'Standardized','Region']].copy()

display(freight.head())
print(freight.info())

EUR_USD = 1.1605

freight.loc[freight["Freight_Currency"] == "USD", "FREIGHT_USE_USD"] = freight["Freight"] * 1
freight.loc[freight["Freight_Currency"] == "EUR", "FREIGHT_USE_USD"] = freight["Freight"] * EUR_USD

freight.loc[freight["Surcharge_Currency"] == "USD", "Freight_Surcharge_USD"] = freight["Surcharge"] * 1
freight.loc[freight["Surcharge_Currency"] == "EUR", "Freight_Surcharge_USD"] = freight["Surcharge"] * EUR_USD

display(freight.head())
display(freight.info())


Unnamed: 0,Port POL,Country POD,Port POD,Freight,Freight_Currency,Surcharge,Surcharge_Currency,Freight All In,Extra,Unit,...,FREIGHT_USE_USD,Freight_Surcharge,FREIGHT_ALL_USD,Unnamed: 22,Unnamed: 23,Unnamed: 24,Unnamed: 25,Unnamed: 26,EUR/USD,1.16
0,Santos,Tanzania,Dar Es Salaam,2800,USD,0.0,USD,2800.0,,,...,2800.0,0.0,2800.0,,,,,,,
1,Santos,Benin,Cotonou,1665,USD,0.0,USD,1665.0,,,...,1665.0,0.0,1665.0,,,,,,,
2,Santos,UAE,Jebel Ali,1775,USD,0.0,USD,1775.0,,,...,1775.0,0.0,1775.0,,,,,,,
3,Santos,Turkey,Mersin,1350,USD,0.0,USD,1350.0,,,...,1350.0,0.0,1350.0,,,,,,,
4,Santos,Ghana,Tema,1700,USD,0.0,USD,1700.0,,,...,1700.0,0.0,1700.0,,,,,,,


['Port POL', 'Country POD', 'Port POD', 'Freight', 'Freight_Currency', 'Surcharge', 'Surcharge_Currency', 'Freight All In', 'Extra', 'Unit', 'Extra_Currency', 'Terminal POL', 'Shipping Line', 'Shipping Line2', 'Free Time POL', 'Free Time POD', 'Validity', 'Standardized', 'Region', 'FREIGHT_USE_USD', 'Freight_Surcharge', 'FREIGHT_ALL_USD', 'Unnamed: 22', 'Unnamed: 23', 'Unnamed: 24', 'Unnamed: 25', 'Unnamed: 26', 'EUR/USD', 1.16]


### Working on valid dates

In [None]:
df_clean = freight.dropna(subset=['Standardized'])

latest = df_clean.loc[df_clean.groupby(['Port POL', 'Port POD'])['Standardized'].idxmax()]

from datetime import datetime
today = datetime.today()

# Make a real copy to avoid SettingWithCopyWarning
df_future = df_clean[df_clean['Standardized'] > today].copy()

display(df_future.head())
print(df_future.info())

# Convert Extra safely to numeric (float), no integer casting
df_future["Extra"] = pd.to_numeric(df_future["Extra"].fillna(0), errors="coerce")

#df_future["Extra"] = pd.to_numeric(df_future["Extra"], errors="coerce")

# Compute Extra in USD
df_future.loc[df_future["Extra_Currency"] == "USD", "Freight_Extra_USD"] = df_future["Extra"] * 1
df_future.loc[df_future["Extra_Currency"] == "EUR", "Freight_Extra_USD"] = df_future["Extra"] * EUR_USD

# print unit unique
print(df_future["Unit"].unique())


display(df_future.head())
print(df_future.info())

# Make sure Unit is lowercase and stripped
df_future["Unit"] = df_future["Unit"].astype(str).str.strip().str.lower()

# Create the column if it doesn't exist
#df_future["Freight_Extra_USD"] = pd.to_numeric(df_future["Freight_Extra_USD"], errors="coerce")

# Case 1: Unit = "bl" → divide by 240
df_future.loc[df_future["Unit"] == "bl", "Freight_Extra_USD"] = (df_future["Freight_Extra_USD"] / 240).round(0)

# Case 2: Unit = "cntr" → divide by 240
df_future.loc[df_future["Unit"] == "cntr", "Freight_Extra_USD"] = (df_future["Freight_Extra_USD"] / 240).round(0)

# Case 3: Unit = NaN → do nothing (automatically handled)

print(df_future.info())

df_future["Freight_All_In_USD"] = (df_future["FREIGHT_USE_USD"].fillna(0)+ df_future["Freight_Surcharge_USD"].fillna(0)
    + df_future["Freight_Extra_USD"].fillna(0)).round(0)


print(df_future.info(verbose=True))

schema = pd.DataFrame({
    "dtype": df_future.dtypes,
    "non_null": df_future.notna().sum(),
    "nulls": df_future.isna().sum()
})

display(schema)


display(df_future.head())

print(df_future.info())

obj_cols = df_future.select_dtypes(include='object').columns

df_future[obj_cols] = (
    df_future[obj_cols]
    .astype(str)                                   # convert everything to string
    .apply(lambda col: col.str.strip().str.lower())  # trim + lowercase
    .apply(lambda col: col.str.replace(r'\s+', ' ', regex=True))  # remove extra spaces
)

Unnamed: 0,Port POL,Country POD,Port POD,Freight,Freight_Currency,Surcharge,Surcharge_Currency,Freight All In,Extra,Unit,Extra_Currency,Terminal POL,Shipping Line,Shipping Line2,Free Time POL,Free Time POD,Validity,Standardized,Region,FREIGHT_USE_USD,Freight_Surcharge_USD
981,Santos,Israel,Ashdod,1531,USD,116.0,USD,1647.0,,,,BTP,Monero,MSC,14,21,2026-02-28 00:00:00,2026-02-28,Mediterranean,1531.0,116.0
982,Santos,Lebanon,Beirut,1811,USD,116.0,USD,1927.0,,,,BTP,Monero,MSC,14,21,2026-02-28 00:00:00,2026-02-28,Mediterranean,1811.0,116.0
983,Santos,Albania,Durres,1531,USD,116.0,USD,1647.0,,,,BTP,Monero,MSC,14,21,2026-02-28 00:00:00,2026-02-28,Europe,1531.0,116.0
984,Santos,Turkey,Mersin,1631,USD,116.0,USD,1747.0,,,,BTP,Monero,MSC,14,21,2026-02-28 00:00:00,2026-02-28,Mediterranean,1631.0,116.0
985,Santos,Senegal,Dakar,1511,USD,116.0,USD,1627.0,,,,BTP,Monero,MSC,14,21,2026-02-28 00:00:00,2026-02-28,Africa West,1511.0,116.0


<class 'pandas.core.frame.DataFrame'>
Index: 81 entries, 981 to 1061
Data columns (total 21 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   Port POL               81 non-null     object        
 1   Country POD            81 non-null     object        
 2   Port POD               81 non-null     object        
 3   Freight                81 non-null     int64         
 4   Freight_Currency       81 non-null     object        
 5   Surcharge              81 non-null     float64       
 6   Surcharge_Currency     80 non-null     object        
 7   Freight All In         81 non-null     float64       
 8   Extra                  12 non-null     object        
 9   Unit                   12 non-null     object        
 10  Extra_Currency         12 non-null     object        
 11  Terminal POL           34 non-null     object        
 12  Shipping Line          80 non-null     object        
 13  Shipping

In [None]:
# Format date
today = datetime.today().strftime("%Y%m%d")

# Build full path + filename
path = r"A:\OneDrive - Sopex London Ltd\Shared\19. Shipments - Sopex"
filename = f"{today}_last_freight.xlsx"

full_path = f"{path}\\{filename}"

# Save
df_future.to_excel(full_path, index=False)

In [64]:
pivot = df_future.pivot_table(
    index=['Region','Country POD','Port POD'],
    columns='Port POL',
    values='Freight_All_In_USD',
    aggfunc='min'
)

pivot = pivot.sort_index().sort_index(axis=1)
pivot_reset = pivot.reset_index()
pivot_flat = pivot.reset_index().rename_axis(None, axis=1)

# Identify POL columns (all columns except the index columns)
pol_cols = ['antwerp', 'buenaventura', 'laem chabang', 'santos']

# Add cheapest freight value
pivot_flat['cheapest_freight'] = pivot_flat[pol_cols].min(axis=1)

# Add the POL (column name) where the freight is cheapest
pivot_flat['cheapest_pol'] = pivot_flat[pol_cols].idxmin(axis=1)

display(pivot_flat)

Unnamed: 0,Region,Country POD,Port POD,antwerp,buenaventura,laem chabang,santos,cheapest_freight,cheapest_pol
0,africa west,benin,cotonou,1731.0,,1531.0,1527.0,1527.0,santos
1,africa west,cameroon,douala,1952.0,,2031.0,1977.0,1952.0,antwerp
2,africa west,gambia,banjul,,,,1780.0,1780.0,santos
3,africa west,ghana,tema,1214.0,,1599.0,1727.0,1214.0,antwerp
4,africa west,guinea,conakry,,,4982.0,,4982.0,laem chabang
5,africa west,ivory coast,abidjan,1080.0,,1531.0,1627.0,1080.0,antwerp
6,africa west,liberia,monrovia,,,,1850.0,1850.0,santos
7,africa west,mauritania,nouakchott,,,,1650.0,1650.0,santos
8,africa west,senegal,dakar,1876.0,,2781.0,1627.0,1627.0,santos
9,africa west,sierra leone,capetown,,,,1650.0,1650.0,santos


In [None]:
# Format date
today = datetime.today().strftime("%Y%m%d")

# Build full path + filename
path = r"A:\OneDrive - Sopex London Ltd\Shared\19. Shipments - Sopex"
filename = f"{today}_last_freight.xlsx"

full_path = f"{path}\\{filename}"

# Save
df_future.to_excel(full_path, index=False)

PermissionError: [Errno 13] Permission denied: 'A:\\OneDrive - Sopex London Ltd\\Shared\\19. Shipments - Sopex\\20260202_last_freight.xlsx'