In [None]:
import pandas as pd
import numpy as np

# Constants
num_samples = 1000

# Data Generation
np.random.seed(42)  # For reproducibility

df = pd.DataFrame({
    'RouteID': np.arange(num_samples),
    'POL': np.random.choice(['PortA', 'PortB', 'PortC'], num_samples),
    'POD': np.random.choice(['PortX', 'PortY', 'PortZ'], num_samples),
    'High_Incidents': np.random.randint(0, 10, num_samples),
    'Medium_Incidents': np.random.randint(0, 20, num_samples),
    'Low_Incidents': np.random.randint(0, 50, num_samples),
    'No_Of_Transhipments': np.random.randint(0, 7, num_samples),
    'Average_Transit_Days': np.random.normal(30, 5, num_samples),
    'Distance': np.random.normal(2000, 300, num_samples),
    'Crew_Experience_Level': np.random.normal(10, 2, num_samples),
    'Training_Programs': np.random.choice(['regular', 'irregular'], num_samples),
    'Navigational_Risks': np.random.choice(['congested areas', 'shallow waters', 'clear'], num_samples),
    'Carbon_Emissions': np.random.normal(10000, 2000, num_samples),
    'Operational_Cost': np.random.normal(500000, 100000, num_samples),
    'Insurance_Premium': np.random.normal(50000, 10000, num_samples),
    'Total_Revenue': np.random.normal(1000000, 200000, num_samples),
    'Regulatory_Compliance': np.random.choice(['compliant', 'non-compliant'], num_samples),
    'Previous_Safety_Awards': np.random.choice(['yes', 'no'], num_samples),
    'Emergency_Response_Plans': np.random.choice(['in place', 'not in place'], num_samples),
    'Cargo_Type': np.random.choice(['hazardous', 'non-hazardous'], num_samples),
    'Ship_Type': np.random.choice(['container', 'bulk carrier'], num_samples),
    'Average_Vessel_Age': np.random.normal(12, 5, num_samples),
    'Navigational_Risks': np.random.choice(['congested areas', 'shallow waters', 'clear'], num_samples),
})

# Define weights for each feature group
weights = {
    'Historical_Incidents': 0.3,
    'Operational_Factors': 0.25,
    'Environmental_Factors': 0.2,
    'Financial_Factors': 0.15,
    'Compliance_Factors': 0.1
}

def calculate_risk_score(row):
    # Historical Incidents
    historical_incidents_score = (
        row['High_Incidents'] * 1.5 + 
        row['Medium_Incidents'] * 1.0 + 
        row['Low_Incidents'] * 0.5
    ) * weights['Historical_Incidents']
    
    # Operational Factors
    operational_factors_score = (
        row['No_Of_Transhipments'] * 1.2 + 
        row['Average_Transit_Days'] * 1.1 + 
        row['Distance'] * 1.0 + 
        (row['Crew_Experience_Level'] > 8) * -1 +  # Adjusted for Crew Experience Level
        (row['Training_Programs'] == 'irregular') * 1.5
    ) * weights['Operational_Factors']
    
    # Environmental Factors
    environmental_factors_score = (
        (row['Navigational_Risks'] == 'congested areas') * 1.5 + 
        (row['Navigational_Risks'] == 'shallow waters') * 1.2 + 
        row['Carbon_Emissions'] * 1.0
    ) * weights['Environmental_Factors']
    
    # Financial Factors
    financial_factors_score = (
        row['Operational_Cost'] * 0.8 + 
        row['Insurance_Premium'] * 0.5 - 
        row['Total_Revenue'] * 0.2  # Higher revenue reduces risk
    ) * weights['Financial_Factors']
    
    # Compliance Factors
    compliance_factors_score = (
        (row['Regulatory_Compliance'] == 'non-compliant') * 2.0 + 
        (row['Previous_Safety_Awards'] == 'no') * 1.5 + 
        (row['Emergency_Response_Plans'] == 'not in place') * 1.2
    ) * weights['Compliance_Factors']
    
    # Combine all contributions to get the final risk score
    risk_score = (
        historical_incidents_score + 
        operational_factors_score + 
        environmental_factors_score + 
        financial_factors_score + 
        compliance_factors_score
    )
    
    return risk_score

# Apply the risk score calculation to the DataFrame
df['RiskScore'] = df.apply(calculate_risk_score, axis=1)

# Scale the RiskScore within 0-100
min_risk_score = df['RiskScore'].min()
max_risk_score = df['RiskScore'].max()

df['Scaled_RiskScore'] = (df['RiskScore'] - min_risk_score) / (max_risk_score - min_risk_score) * 100

# Categorize the RiskScore
bins = [0, 25, 50, 75, 100]
labels = ['Low', 'Medium', 'High', 'Critical']
df['Risk_Category'] = pd.cut(df['Scaled_RiskScore'], bins=bins, labels=labels)


df.to_csv('enhanced_shipping_data_with_route_and_risks19.csv', index=False)
# Display results



In [None]:
import pandas as pd
import numpy as np

# Constants
num_samples_per_risk = 1000

# Initialize random seed for reproducibility
np.random.seed(42)

def generate_data_for_risk_category(risk_category):
    if risk_category == 'Low':
        transhipments_range = (0, 3)
        transit_days_mean = 20
        distance_mean = 1500
        crew_experience_mean = 15
        carbon_emissions_mean = 8000
        operational_cost_mean = 450000
        insurance_premium_mean = 40000
        total_revenue_mean = 950000
        high_incidents_range = (0, 2)
        medium_incidents_range = (0, 5)
        low_incidents_range = (0, 10)
        cargo_type = 'non-hazardous'
        
    elif risk_category == 'Medium':
        transhipments_range = (3, 5)
        transit_days_mean = 30
        distance_mean = 2000
        crew_experience_mean = 12
        carbon_emissions_mean = 10000
        operational_cost_mean = 500000
        insurance_premium_mean = 45000
        total_revenue_mean = 1000000
        high_incidents_range = (1, 5)
        medium_incidents_range = (1, 10)
        low_incidents_range = (1, 20)
        cargo_type = 'both'
        
    else:  # High Risk
        transhipments_range = (5, 7)
        transit_days_mean = 40
        distance_mean = 2500
        crew_experience_mean = 10
        carbon_emissions_mean = 12000
        operational_cost_mean = 550000
        insurance_premium_mean = 50000
        total_revenue_mean = 1050000
        high_incidents_range = (5, 10)
        medium_incidents_range = (5, 15)
        low_incidents_range = (10, 30)
        cargo_type = 'hazardous'
    
    # Generate data with specified distributions
    transhipments = np.random.randint(*transhipments_range, num_samples_per_risk)
    transit_days = np.random.normal(transit_days_mean, 4, num_samples_per_risk)
    distance = np.random.normal(distance_mean, 200, num_samples_per_risk)
    crew_experience = np.random.normal(crew_experience_mean, 2, num_samples_per_risk)
    carbon_emissions = np.random.normal(carbon_emissions_mean, 1500, num_samples_per_risk)
    operational_cost = np.random.normal(operational_cost_mean, 80000, num_samples_per_risk)
    insurance_premium = np.random.normal(insurance_premium_mean, 8000, num_samples_per_risk)
    total_revenue = np.random.normal(total_revenue_mean, 150000, num_samples_per_risk)
    
    high_incidents = np.random.randint(*high_incidents_range, num_samples_per_risk)
    medium_incidents = np.random.randint(*medium_incidents_range, num_samples_per_risk)
    low_incidents = np.random.randint(*low_incidents_range, num_samples_per_risk)
    
    # Create the DataFrame
    df = pd.DataFrame({
        'RouteID': np.arange(num_samples_per_risk),
        'POL': np.random.choice(['PortA', 'PortB', 'PortC'], num_samples_per_risk),
        'POD': np.random.choice(['PortX', 'PortY', 'PortZ'], num_samples_per_risk),
        'High_Incidents': high_incidents,
        'Medium_Incidents': medium_incidents,
        'Low_Incidents': low_incidents,
        'No_Of_Travels': np.random.randint(10, 100, num_samples_per_risk),
        'No_Of_Transhipments': transhipments,
        'Average_Transit_Days': transit_days.astype(int),
        'Distance': distance.astype(int),
        'Crew_Experience_Level': crew_experience.astype(int),
        'Training_Programs': np.random.choice(['regular', 'irregular'], num_samples_per_risk),
        'Navigational_Risks': np.random.choice(['congested areas', 'shallow waters', 'clear'], num_samples_per_risk),
        'Carbon_Emissions': carbon_emissions.astype(int),
        'Operational_Cost': operational_cost.astype(int),
        'Insurance_Premium': insurance_premium.astype(int),
        'Total_Revenue': total_revenue.astype(int),
        'Regulatory_Compliance': np.random.choice(['compliant', 'non-compliant'], num_samples_per_risk),
        'Previous_Safety_Awards': np.random.choice(['yes', 'no'], num_samples_per_risk),
        'Emergency_Response_Plans': np.random.choice(['in place', 'not in place'], num_samples_per_risk),
        'Cargo_Type': np.random.choice([cargo_type, 'non-hazardous' if cargo_type == 'both' else 'hazardous'], num_samples_per_risk),
        'Ship_Type': np.random.choice(['container', 'bulk carrier'], num_samples_per_risk),
        'Average_Vessel_Age': np.random.normal(12, 5, num_samples_per_risk).astype(int),
    })
    
    # Define weights for each feature group
    weights = {
        'Historical_Incidents': 0.3,
        'Operational_Factors': 0.25,
        'Environmental_Factors': 0.2,
        'Financial_Factors': 0.15,
        'Compliance_Factors': 0.1
    }
    
    def calculate_risk_score(row):
        # Historical Incidents
        historical_incidents_score = (
            row['High_Incidents'] * 1.5 + 
            row['Medium_Incidents'] * 1.0 + 
            row['Low_Incidents'] * 0.5
        ) * weights['Historical_Incidents']
        
        # Operational Factors
        operational_factors_score = (
            row['No_Of_Transhipments'] * 1.2 + 
            row['Average_Transit_Days'] * 1.1 + 
            row['Distance'] * 1.0 + 
            (row['Crew_Experience_Level'] > 8) * -1 +  # Adjusted for Crew Experience Level
            (row['Training_Programs'] == 'irregular') * 1.5
        ) * weights['Operational_Factors']
        
        # Environmental Factors
        environmental_factors_score = (
            (row['Navigational_Risks'] == 'congested areas') * 1.5 + 
            (row['Navigational_Risks'] == 'shallow waters') * 1.2 + 
            row['Carbon_Emissions'] * 1.0
        ) * weights['Environmental_Factors']
        
        # Financial Factors
        financial_factors_score = (
            row['Operational_Cost'] * 0.8 + 
            row['Insurance_Premium'] * 0.5 - 
            row['Total_Revenue'] * 0.2  # Higher revenue reduces risk
        ) * weights['Financial_Factors']
        
        # Compliance Factors
        compliance_factors_score = (
            (row['Regulatory_Compliance'] == 'non-compliant') * 2.0 + 
            (row['Previous_Safety_Awards'] == 'no') * 1.5 + 
            (row['Emergency_Response_Plans'] == 'not in place') * 1.2
        ) * weights['Compliance_Factors']
        
        # Combine all contributions to get the final risk score
        risk_score = (
            historical_incidents_score + 
            operational_factors_score + 
            environmental_factors_score + 
            financial_factors_score + 
            compliance_factors_score
        )
        
        return risk_score

    # Apply the risk score calculation to the DataFrame
    df['RiskScore'] = df.apply(calculate_risk_score, axis=1)

    # Scale the RiskScore to be within 0-100
    df['RiskScore_Scaled'] = 100 * (df['RiskScore'] - df['RiskScore'].min()) / (df['RiskScore'].max() - df['RiskScore'].min())

    # Categorize the Risk Score
    def categorize_risk_score(score):
        if score <= 33:
            return 'Low'
        elif score <= 66:
            return 'Medium'
        else:
            return 'High'

    df['Risk_Category'] = df['RiskScore_Scaled'].apply(categorize_risk_score)

    return df

# Generate datasets for Low, Medium, and High risk routes
low_risk_df = generate_data_for_risk_category('Low')
medium_risk_df = generate_data_for_risk_category('Medium')
high_risk_df = generate_data_for_risk_category('High')

# Save to CSV files or use as needed
low_risk_df.to_csv('low_risk_routes.csv', index=False)
medium_risk_df.to_csv('medium_risk_routes.csv', index=False)
high_risk_df.to_csv('high_risk_routes.csv', index=False)

print("Data generation complete.")


In [None]:
import pandas as pd
import numpy as np

def calculate_scaled_risk_score(input_csv, output_csv):
    # Read the CSV file
    df = pd.read_csv(input_csv)
    
    # Define weights for each feature group
    weights = {
        'Historical_Incidents': 0.3,
        'Operational_Factors': 0.25,
        'Environmental_Factors': 0.2,
        'Financial_Factors': 0.15,
        'Compliance_Factors': 0.1
    }
    
    def calculate_risk_score(row):
        # Historical Incidents
        historical_incidents_score = (
            row['High_Incidents'] * 1.5 + 
            row['Medium_Incidents'] * 1.0 + 
            row['Low_Incidents'] * 0.5
        ) * weights['Historical_Incidents']
        
        # Operational Factors
        operational_factors_score = (
            row['No_Of_Transhipments'] * 1.2 + 
            row['Average_Transit_Days'] * 1.1 + 
            row['Distance'] * 1.0 + 
            (row['Crew_Experience_Level'] > 8) * -1 +  # Adjusted for Crew Experience Level
            (row['Training_Programs'] == 'irregular') * 1.5
        ) * weights['Operational_Factors']
        
        # Environmental Factors
        environmental_factors_score = (
            (row['Navigational_Risks'] == 'congested areas') * 1.5 + 
            (row['Navigational_Risks'] == 'shallow waters') * 1.2 + 
            row['Carbon_Emissions'] * 1.0
        ) * weights['Environmental_Factors']
        
        # Financial Factors
        financial_factors_score = (
            row['Operational_Cost'] * 0.8 + 
            row['Insurance_Premium'] * 0.5 - row['Total_Revenue'] * 0.2  # Higher revenue reduces risk
        ) * weights['Financial_Factors']
        
        # Compliance Factors
        compliance_factors_score = (
            (row['Regulatory_Compliance'] == 'non-compliant') * 2.0 + 
            (row['Previous_Safety_Awards'] == 'no') * 1.5 + 
            (row['Emergency_Response_Plans'] == 'not in place') * 1.2
        ) * weights['Compliance_Factors']
        
        # Combine all contributions to get the final risk score
        risk_score = (
            historical_incidents_score + 
            operational_factors_score + 
            environmental_factors_score + 
            financial_factors_score + 
            compliance_factors_score
        )
        
        return risk_score

    # Apply the risk score calculation to the DataFrame
    df['RiskScore'] = df.apply(calculate_risk_score, axis=1)

    # Scale the RiskScore to be within 0-100
    df['RiskScore_Scaled'] = 100 * (df['RiskScore'] - df['RiskScore'].min()) / (df['RiskScore'].max() - df['RiskScore'].min())

 # Define risk categories
    def categorize_risk_score(score):
        if score <= 30:
            return 'Low'
        elif score <= 50:
            return 'Medium'
        else:
            return 'High'
    
    # Apply categorization
    df['Risk_Category'] = df['RiskScore_Scaled'].apply(categorize_risk_score)

    # Save the updated DataFrame to a new CSV file
    df.to_csv(output_csv, index=False)

# Example usage
calculate_scaled_risk_score('enhanced_routes_risk.csv', 'enhanced_routes_final_risk.csv')


In [None]:
import pandas as pd
import numpy as np

# Define weights for each feature group
weights = {
    'Historical_Incidents': 0.5,
    'Operational_Factors': 0.25,
    'Environmental_Factors': 0.2,
    'Financial_Factors': 0.15,
    'Compliance_Factors': 0.1
}

def calculate_risk_score(row):
    # Historical Incidents
    historical_incidents_score = (
        row['High_Incidents'] * 3.5 + 
        row['Medium_Incidents'] * 1.0 + 
        row['Low_Incidents'] * 0.5
    ) * weights['Historical_Incidents']
    
    # Operational Factors
    # Adjust score based on Crew Experience Level
    crew_experience_adjustment = 1 if row['Crew_Experience_Level'] <= 8 else -1
    
    operational_factors_score = (
        row['No_Of_Transhipments'] * 1.2 + 
        row['Average_Transit_Days'] * 1.1 + 
        row['Distance'] * 1.0 + 
        crew_experience_adjustment +  # Adjusted for Crew Experience Level
        (row['Training_Programs'] == 'irregular') * 1.5
    ) * weights['Operational_Factors']
    
    # Environmental Factors
    environmental_factors_score = (
        (row['Navigational_Risks'] == 'congested areas') * 1.5 + 
        (row['Navigational_Risks'] == 'shallow waters') * 1.2 + 
        row['Carbon_Emissions'] * 1.0
    ) * weights['Environmental_Factors']
    
    # Financial Factors
    if 90 * row['Total_Revenue'] > (row['Operational_Cost'] + row['Insurance_Premium']):
        financial_factors_score = (
            (row['Operational_Cost'] + row['Insurance_Premium']) * 0.5 - row['Total_Revenue'] * 0.1
        ) * weights['Financial_Factors']
    else:
        financial_factors_score = (
            (row['Operational_Cost'] + row['Insurance_Premium']) * 0.8 - row['Total_Revenue'] * 0.2
        ) * weights['Financial_Factors']
    
    # Compliance Factors
    compliance_factors_score = (
        (row['Regulatory_Compliance'] == 'non-compliant') * 2.0 + 
        (row['Previous_Safety_Awards'] == 'no') * 1.5 + 
        (row['Emergency_Response_Plans'] == 'not in place') * 1.2
    ) * weights['Compliance_Factors']
    
    # Combine all contributions to get the final risk score
    risk_score = (
        historical_incidents_score + 
        operational_factors_score + 
        environmental_factors_score + 
        financial_factors_score + 
        compliance_factors_score
    )
    
    return risk_score

def process_data(file_path):
    # Read the data
    df = pd.read_csv(file_path)
    
    # Calculate RiskScore
    df['RiskScore'] = df.apply(calculate_risk_score, axis=1)
    
    # Scale the RiskScore to be within 0-100
    df['RiskScore_Scaled'] = 100 * (df['RiskScore'] - df['RiskScore'].min()) / (df['RiskScore'].max() - df['RiskScore'].min())
    
    # Categorize the Risk Score
    def categorize_risk_score(score):
        if score <= 33:
            return 'Low'
        elif score <= 66:
            return 'Medium'
        else:
            return 'High'
    
    df['Risk_Category'] = df['RiskScore_Scaled'].apply(categorize_risk_score)
    
    # Save the updated DataFrame to a new CSV
    output_file_path = file_path.replace('.csv', '_with_risks.csv')
    df.to_csv(output_file_path, index=False)
    print(f"Updated data saved to {output_file_path}")

# Example usage
process_data('enhanced_routes_risk.csv')


## CONSIDER OPERATIONAL COST AND CARGO VALUE ONLY

In [2]:
import pandas as pd
import numpy as np

# Constants
num_samples_per_risk = 1000

# Initialize random seed for reproducibility
np.random.seed(42)

def generate_data_for_risk_category(risk_category):
    if risk_category == 'Low':
        num_samples_per_risk=1000
        transhipments_range = (0, 3)
        transit_days_mean = 20
        distance_mean = 1500
        crew_experience_mean = 15
        carbon_emissions_range = (600,1000)
        operational_cost_mean = 350000
        insurance_premium_mean = 30000
        total_revenue_mean = 950000
        high_incidents_range = (0, 2)
        medium_incidents_range = (0, 5)
        low_incidents_range = (0, 7)
        cargo_type = 'non-hazardous'
        cargo_value_mean = 350000
        
    elif risk_category == 'Medium':
        num_samples_per_risk=300
        transhipments_range = (3, 5)
        transit_days_mean = 30
        distance_mean = 2000
        crew_experience_mean = 12
        carbon_emissions_range = (1300,2500)
        operational_cost_mean = 450000
        insurance_premium_mean = 40000
        total_revenue_mean = 1000000
        high_incidents_range = (1, 5)
        medium_incidents_range = (1, 8)
        low_incidents_range = (5, 15)
        cargo_type = 'both'
        cargo_value_mean = 550000
        
    else:  # High Risk
        num_samples_per_risk=100
        transhipments_range = (5, 7)
        transit_days_mean = 40
        distance_mean = 2500
        crew_experience_mean = 10
        carbon_emissions_range = (3000,5500)
        operational_cost_mean = 550000
        insurance_premium_mean = 50000
        total_revenue_mean = 1050000
        high_incidents_range = (25, 50)
        medium_incidents_range = (5, 15)
        low_incidents_range = (10, 30)
        cargo_type = 'hazardous'
        cargo_value_mean = 850000
    
    # Generate data with specified distributions
    transhipments = np.random.randint(*transhipments_range, num_samples_per_risk)
    transit_days = np.random.normal(transit_days_mean, 4, num_samples_per_risk)
    distance = np.random.normal(distance_mean, 200, num_samples_per_risk)
    crew_experience = np.random.normal(crew_experience_mean, 2, num_samples_per_risk)
    
    # Carbon emissions based on the specified range
    carbon_emissions = np.random.randint(*carbon_emissions_range, num_samples_per_risk)
    
    cargo_value = np.random.normal(cargo_value_mean, 100000, num_samples_per_risk)
    
    high_incidents = np.random.randint(*high_incidents_range, num_samples_per_risk)
    medium_incidents = np.random.randint(*medium_incidents_range, num_samples_per_risk)
    low_incidents = np.random.randint(*low_incidents_range, num_samples_per_risk)
    
    # Calculate Operational Cost (includes fuel, crew wages, port fees, maintenance, and other logistical expenses)
    operational_cost = np.random.normal(operational_cost_mean, 80000, num_samples_per_risk)
    operational_cost += 0.05 * cargo_value  # Increase operational cost slightly based on cargo value
    
    # Calculate Insurance Premium (update based on High_Incidents, Medium_Incidents, or difficult Navigational_Risks)
    insurance_premium = np.random.normal(insurance_premium_mean, 8000, num_samples_per_risk)
    insurance_premium += 0.01 * cargo_value  # Base premium increase with cargo value
    insurance_premium += high_incidents * 1000  # Increase premium per high incident
    insurance_premium += medium_incidents * 500  # Increase premium per medium incident

    # Further increase insurance premium based on difficult navigational risks
    navigational_risk_multiplier = np.random.choice([1.0, 1.2, 1.5], p=[0.5, 0.3, 0.2], size=num_samples_per_risk)
    insurance_premium *= navigational_risk_multiplier

    total_revenue = np.random.normal(total_revenue_mean, 150000, num_samples_per_risk)
    
    # Create the DataFrame
    df = pd.DataFrame({
        'RouteID': np.arange(num_samples_per_risk),
        'POL': np.random.choice(['PortA', 'PortB', 'PortC'], num_samples_per_risk),
        'POD': np.random.choice(['PortX', 'PortY', 'PortZ'], num_samples_per_risk),
        'High_Incidents': high_incidents,
        'Medium_Incidents': medium_incidents,
        'Low_Incidents': low_incidents,
        'No_Of_Travels': np.random.randint(10, 100, num_samples_per_risk),
        'No_Of_Transhipments': transhipments,
        'Average_Transit_Days': transit_days.astype(int),
        'Distance': distance.astype(int),
        'Crew_Experience_Level': crew_experience.astype(int),
        'Training_Programs': np.random.choice(['regular', 'irregular'], num_samples_per_risk),
        #'Navigational_Risks': np.random.choice(['congested areas', 'shallow waters', 'clear'], num_samples_per_risk),
        'Navigational_Risks': np.random.choice(['congested areas', 'clear'], num_samples_per_risk),
        'Carbon_Emissions': carbon_emissions.astype(int),
        'Operational_Cost': operational_cost.astype(int),
        'Insurance_Premium': insurance_premium.astype(int),
        'Total_Revenue': total_revenue.astype(int),
        'Regulatory_Compliance': np.random.choice(['compliant', 'non-compliant'], num_samples_per_risk),
        'Previous_Safety_Awards': np.random.choice(['yes', 'no'], num_samples_per_risk),
        'Emergency_Response_Plans': np.random.choice(['in place', 'not in place'], num_samples_per_risk),
        'Cargo_Type': np.random.choice([cargo_type, 'non-hazardous' if cargo_type == 'both' else 'hazardous'], num_samples_per_risk),
        'Ship_Type': np.random.choice(['container', 'bulk carrier'], num_samples_per_risk),
        'Average_Vessel_Age': np.random.normal(12, 5, num_samples_per_risk).astype(int),
        'Cargo_Value': cargo_value.astype(int),
    })
    
    # Define weights for each feature group
    weights = {
        'Historical_Incidents': 0.3,
        'Operational_Factors': 0.25,
        'Environmental_Factors': 0.2,
        'Financial_Factors': 0.15,
        'Compliance_Factors': 0.1
    }
    
    def calculate_risk_score(row):
        # Historical Incidents
        historical_incidents_score = (
            row['High_Incidents'] * 1.5 + 
            row['Medium_Incidents'] * 1.0 + 
            row['Low_Incidents'] * 0.1
        ) * weights['Historical_Incidents']
        
        # Operational Factors
        operational_factors_score = (
            row['No_Of_Transhipments'] * 1.2 + 
            row['Average_Transit_Days'] * 1.1 + 
            row['Distance'] * 1.0 + 
            (row['Crew_Experience_Level'] > 8) * -1 +  # Adjusted for Crew Experience Level
            (row['Training_Programs'] == 'irregular') * 1.5
        ) * weights['Operational_Factors']
        
        # Environmental Factors
        environmental_factors_score = (
            (row['Navigational_Risks'] == 'congested areas') * 1.5 + 
            (row['Navigational_Risks'] == 'shallow waters') * 1.2 + 
            row['Carbon_Emissions'] * 1.0
        ) * weights['Environmental_Factors']
        
        # Financial Factors
# Financial Factors (Without Total_Revenue)
        financial_factors_score = (
            row['Operational_Cost'] * 0.4 + 
            row['Insurance_Premium'] * 0.2
            ) * weights['Financial_Factors']
        
        # Compliance Factors
        compliance_factors_score = (
            (row['Regulatory_Compliance'] == 'non-compliant') * 2.0 + 
            (row['Previous_Safety_Awards'] == 'no') * 1.5 + 
            (row['Emergency_Response_Plans'] == 'not in place') * 1.2
        ) * weights['Compliance_Factors']
        
        # Combine all contributions to get the final risk score
        risk_score = (
            historical_incidents_score + 
            operational_factors_score + 
            environmental_factors_score + 
            financial_factors_score + 
            compliance_factors_score
        )
        
        return risk_score

    # Apply the risk score calculation to the DataFrame
    df['RiskScore'] = df.apply(calculate_risk_score, axis=1)
    
    return df

# Generate data for each risk category
low_risk_data = generate_data_for_risk_category('Low')
medium_risk_data = generate_data_for_risk_category('Medium')
high_risk_data = generate_data_for_risk_category('High')

# Concatenate all risk category data into a single DataFrame
final_data = pd.concat([low_risk_data, medium_risk_data, high_risk_data])

# Scale RiskScore to fit between 100 and 200
min_score = final_data['RiskScore'].min()
max_score = final_data['RiskScore'].max()

final_data['Scaled_RiskScore'] = 100 * (final_data['RiskScore'] - min_score) / (max_score - min_score)

 # Categorize the Risk Score
def categorize_risk_score(score):
        if score <= 30:
            return 'Low'
        elif score <= 60:
            return 'Medium'
        else:
            return 'High'
    
final_data['Risk_Category'] = final_data['Scaled_RiskScore'].apply(categorize_risk_score)

final_data.reset_index(drop=True, inplace=True)

final_data.to_csv("cost_routes.csv",index=False)

# Show the final data
final_data.head()


Unnamed: 0,RouteID,POL,POD,High_Incidents,Medium_Incidents,Low_Incidents,No_Of_Travels,No_Of_Transhipments,Average_Transit_Days,Distance,...,Regulatory_Compliance,Previous_Safety_Awards,Emergency_Response_Plans,Cargo_Type,Ship_Type,Average_Vessel_Age,Cargo_Value,RiskScore,Scaled_RiskScore,Risk_Category
0,0,PortC,PortX,0,1,0,21,2,20,1517,...,compliant,no,not in place,hazardous,bulk carrier,9,339388,25960.025,35.50313,Medium
1,1,PortC,PortX,0,1,6,80,0,16,1795,...,compliant,no,not in place,non-hazardous,container,16,520465,26945.98,37.45887,Medium
2,2,PortB,PortX,0,2,6,10,2,21,1271,...,non-compliant,no,in place,non-hazardous,container,5,589076,15997.565,15.741598,Low
3,3,PortC,PortY,1,1,6,42,2,19,1461,...,compliant,no,not in place,non-hazardous,bulk carrier,16,387709,23786.26,31.191251,Medium
4,4,PortA,PortZ,0,1,5,23,0,19,1356,...,non-compliant,yes,not in place,hazardous,container,12,340003,16927.635,17.586485,Low
