In [1]:
import numpy as np
import pandas as pd
import random
from datetime import datetime, timedelta

In [2]:


# Number of samples
num_samples = 1000

# Generate a range of dates
start_date = datetime(2022, 1, 1)
end_date = datetime(2023, 12, 31)
date_range = [start_date + timedelta(days=x) for x in range((end_date - start_date).days + 1)]

# Generate realistic data
data = {
    'Distance': np.random.normal(loc=8000, scale=1500, size=num_samples),  # Nautical miles
    'Average_Transit_Days': np.random.normal(loc=20, scale=4, size=num_samples),  # Days
    'Number_Of_Travels': np.random.normal(loc=30, scale=10, size=num_samples),  # Number of trips
    'Total_Incidents_Count': np.random.poisson(lam=4, size=num_samples),  # Count of incidents
    'High_Incidents_Count': np.random.poisson(lam=1, size=num_samples),  # High severity incidents
    'Medium_Incidents_Count': np.random.poisson(lam=2, size=num_samples),  # Medium severity incidents
    'Low_Incidents_Count': np.random.poisson(lam=1, size=num_samples),  # Low severity incidents
    'High_Last_5Months_Incidents': np.random.poisson(lam=0.8, size=num_samples),  # Recent high severity incidents
    'Medium_Last_5Months_Incidents': np.random.poisson(lam=1.5, size=num_samples),  # Recent medium severity incidents
    'Low_Last_5Months_Incidents': np.random.poisson(lam=0.7, size=num_samples),  # Recent low severity incidents
    'Average_Vessel_Age': np.random.normal(loc=12, scale=5, size=num_samples),  # Years
    'Cargo_Type': np.random.choice(['hazardous', 'non-hazardous'], num_samples),  # Categorical
    'Port_Condition': np.random.choice(['well-maintained', 'poor'], num_samples),  # Categorical
    'Weather_Condition': np.random.choice(['calm', 'stormy'], num_samples),  # Categorical
    'Piracy_Risk': np.random.choice(['high', 'medium', 'low'], num_samples),  # Categorical
    'Port_Infrastructure_Quality': np.random.choice(['excellent', 'good', 'fair', 'poor'], num_samples),  # Categorical
    'Historical_Incident_Frequency': np.random.normal(loc=0.6, scale=0.2, size=num_samples),  # Incidents per year
    'Operational_Disruptions': np.random.choice(['strikes', 'port closures', 'none'], num_samples),  # Categorical
    'Navigational_Risks': np.random.choice(['congested areas', 'shallow waters', 'clear'], num_samples),  # Categorical
    'Regulatory_Compliance': np.random.choice(['compliant', 'non-compliant'], num_samples),  # Categorical
    'Crew_Experience_Level': np.random.normal(loc=10, scale=3, size=num_samples),  # Years
    'Ship_Maintenance_Records': np.random.choice(['up-to-date', 'overdue'], num_samples),  # Categorical
    'Insurance_Coverage': np.random.choice(['full', 'partial', 'none'], num_samples),  # Categorical
    'Cargo_Value': np.random.normal(loc=1500000, scale=300000, size=num_samples),  # USD
    'Ship_Type': np.random.choice(['container', 'bulk carrier'], num_samples),  # Categorical
    'Route_Duration': np.random.normal(loc=25, scale=5, size=num_samples),  # Days
    'Incident_Severity': np.random.choice(['minor', 'major', 'critical'], num_samples),  # Categorical
    'Previous_Safety_Awards': np.random.choice(['yes', 'no'], num_samples),  # Categorical
    'Training_Programs': np.random.choice(['regular', 'irregular'], num_samples),  # Categorical
    'Emergency_Response_Plans': np.random.choice(['in place', 'not in place'], num_samples),  # Categorical
    
    # New features
    'POL_Code': np.random.choice(['POL1', 'POL2', 'POL3', 'POL4'], num_samples),  # Categorical
    'POL_Region': np.random.choice(['Region1', 'Region2', 'Region3'], num_samples),  # Categorical
    'POD_Code': np.random.choice(['POD1', 'POD2', 'POD3', 'POD4'], num_samples),  # Categorical
    'POD_Region': np.random.choice(['Region1', 'Region2', 'Region3'], num_samples),  # Categorical
    'Transshipments_Count': np.random.poisson(lam=1.5, size=num_samples),  # Number of transshipments
    'Average_Transshipment_Days': np.random.normal(loc=4, scale=1, size=num_samples),  # Days
    'Transshipment_Risk_Level': np.random.choice(['high', 'medium', 'low'], num_samples),  # Categorical
    'Carbon_Emissions': np.random.normal(loc=100, scale=25, size=num_samples),  # Metric tons

    # Generate incident dates
    'Incident_Date': [random.choice(date_range).strftime('%Y-%m-%d') for _ in range(num_samples)]  # Random dates
}

# Creating a DataFrame
df = pd.DataFrame(data)

# Adding the RiskScore target variable with realistic proportions
df['RiskScore'] = np.random.choice(['Low Risk', 'Medium Risk', 'High Risk'], num_samples, p=[0.5, 0.3, 0.2])

# Save the DataFrame to a CSV file if needed
df.to_csv('realistic_mock_shipping_data_with_dates.csv', index=False)

print(df.head())


      Distance  Average_Transit_Days  Number_Of_Travels  \
0  5805.663513             26.379413          27.863568   
1  9322.567131             17.044690          36.308224   
2  8058.362756             16.163868          25.564987   
3  6867.644933             23.526115          41.879096   
4  8388.415358             25.976758          17.514580   

   Total_Incidents_Count  High_Incidents_Count  Medium_Incidents_Count  \
0                      2                     3                       1   
1                      4                     0                       2   
2                      5                     1                       2   
3                      3                     3                       1   
4                      4                     1                       1   

   Low_Incidents_Count  High_Last_5Months_Incidents  \
0                    0                            0   
1                    1                            1   
2                    0             

In [4]:
import numpy as np
import pandas as pd
from datetime import datetime

# Sample DataFrame
num_samples = 1000
np.random.seed(0)

# Generating mock data
df = pd.DataFrame({
    'Distance': np.random.normal(loc=500, scale=100, size=num_samples),
    'Average_Transit_Days': np.random.normal(loc=10, scale=2, size=num_samples),
    'Number_Of_Travels': np.random.randint(1, 50, size=num_samples),
    'Total_Incidents_Count': np.random.poisson(lam=5, size=num_samples),
    'Carbon_Emissions': np.random.normal(loc=100, scale=20, size=num_samples),
    'Cargo_Value': np.random.normal(loc=1000000, scale=200000, size=num_samples),
    'Route_Duration': np.random.normal(loc=15, scale=5, size=num_samples),
    'Incident_Date': pd.date_range(start='2023-01-01', periods=num_samples, freq='D'),
    'Previous_Safety_Awards': np.random.choice(['yes', 'no'], num_samples),
    'Training_Programs': np.random.choice(['regular', 'irregular'], num_samples),
    'Incident_Severity': np.random.choice(['minor', 'major', 'critical'], num_samples)
})

# Convert Incident_Date to datetime
df['Incident_Date'] = pd.to_datetime(df['Incident_Date'])

# Calculate recent incidents (e.g., incidents in the last 10 days)
today = datetime.now()
df['Days_Since_Incident'] = (today - df['Incident_Date']).dt.days
df['Recent_Incidents'] = df['Days_Since_Incident'].apply(lambda x: 1 if x <= 10 else 0)

# Simulating POL and POD codes
df['POL_Code'] = np.random.choice(['POL1', 'POL2', 'POL3'], num_samples)
df['POD_Code'] = np.random.choice(['POD1', 'POD2', 'POD3'], num_samples)

# Aggregate recent incidents
recent_incidents_agg = df.groupby(['POL_Code', 'POD_Code', 'Route_Duration'])['Recent_Incidents'].sum().reset_index()
recent_incidents_agg.rename(columns={'Recent_Incidents': 'Total_Recent_Incidents'}, inplace=True)
df = df.merge(recent_incidents_agg, on=['POL_Code', 'POD_Code', 'Route_Duration'], how='left')

# Normalize continuous features
for feature in ['Distance', 'Average_Transit_Days', 'Number_Of_Travels', 'Total_Incidents_Count', 'Carbon_Emissions', 'Cargo_Value', 'Route_Duration', 'Total_Recent_Incidents']:
    df[feature] = (df[feature] - df[feature].mean()) / df[feature].std()

# Encode categorical features
df = pd.get_dummies(df, columns=['Previous_Safety_Awards', 'Training_Programs', 'Incident_Severity'])

# Define weights (adjust these based on your requirements)
weights = {
    'Total_Incidents_Count': 0.25,
    'Carbon_Emissions': 0.15,
    'Number_Of_Travels': 0.1,
    'Average_Transit_Days': 0.1,
    'Total_Recent_Incidents': 0.15,
    'Cargo_Value': 0.1,
    'Previous_Safety_Awards_yes': 0.05,
    'Training_Programs_regular': 0.05,
    'Incident_Severity_minor': 0.05,
    'Incident_Severity_major': 0.1
}

# Calculate RiskScore based on weighted sum
df['RiskScore'] = (
    df['Total_Incidents_Count'] * weights['Total_Incidents_Count'] +
    df['Carbon_Emissions'] * weights['Carbon_Emissions'] +
    df['Number_Of_Travels'] * weights['Number_Of_Travels'] +
    df['Average_Transit_Days'] * weights['Average_Transit_Days'] +
    df['Total_Recent_Incidents'] * weights['Total_Recent_Incidents'] +
    df['Cargo_Value'] * weights['Cargo_Value'] +
    df['Previous_Safety_Awards_yes'] * weights['Previous_Safety_Awards_yes'] +
    df['Training_Programs_regular'] * weights['Training_Programs_regular'] +
    df['Incident_Severity_minor'] * weights['Incident_Severity_minor'] +
    df['Incident_Severity_major'] * weights['Incident_Severity_major']
)

# Convert RiskScore to categories (optional)
bins = [-np.inf, -1, 0.5, 1, np.inf]
labels = ['Low Risk', 'Medium Risk', 'High Risk']
#df['RiskScore_Category'] = pd.cut(df['RiskScore'], bins=bins, labels=labels)
df['RiskScore_Category'] = ''


# Save the updated DataFrame
df.to_csv('updated_shipping_data_with_risks.csv', index=False)

print(df[['RiskScore', 'RiskScore_Category']].head())


   RiskScore RiskScore_Category
0   0.524120                   
1  -0.842819                   
2  -0.241838                   
3   0.243107                   
4  -0.769997                   


In [5]:
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
from sklearn.preprocessing import MinMaxScaler

# Set random seed for reproducibility
np.random.seed(0)

# Number of samples
num_samples = 1000

# Generate mock data with RouteID, POL, and POD
df = pd.DataFrame({
    'RouteID': np.arange(1, num_samples + 1),  # Sequential RouteID
    'POL_Code': np.random.choice(['POL1', 'POL2', 'POL3'], num_samples),
    'POD_Code': np.random.choice(['POD1', 'POD2', 'POD3'], num_samples),
    'Distance': np.random.normal(loc=500, scale=100, size=num_samples),
    'Average_Transit_Days': np.random.normal(loc=10, scale=2, size=num_samples),
    'Number_Of_Travels': np.random.randint(1, 50, size=num_samples),
    'Total_Incidents_Count': np.random.poisson(lam=5, size=num_samples),
    'Carbon_Emissions': np.random.normal(loc=100, scale=20, size=num_samples),
    'Cargo_Value': np.random.normal(loc=1500000, scale=300000, size=num_samples),
    'Route_Duration': np.random.normal(loc=25, scale=5, size=num_samples),
    'Incident_Date': [datetime.now() - timedelta(days=np.random.randint(0, 365)) for _ in range(num_samples)],
    'Previous_Safety_Awards': np.random.choice(['yes', 'no'], num_samples),
    'Training_Programs': np.random.choice(['regular', 'irregular'], num_samples),
    'Incident_Severity': np.random.choice(['minor', 'major', 'critical'], num_samples),
    'Average_Vessel_Age': np.random.normal(loc=12, scale=5, size=num_samples),
    'Cargo_Type': np.random.choice(['hazardous', 'non-hazardous'], num_samples),
    'Navigational_Risks': np.random.choice(['congested areas', 'shallow waters', 'clear'], num_samples),
    'Regulatory_Compliance': np.random.choice(['compliant', 'non-compliant'], num_samples),
    'Crew_Experience_Level': np.random.normal(loc=10, scale=3, size=num_samples),
    'Ship_Maintenance_Records': np.random.choice(['up-to-date', 'overdue'], num_samples),
    'Insurance_Coverage': np.random.choice(['full', 'partial', 'none'], num_samples),
    'Ship_Type': np.random.choice(['container', 'bulk carrier'], num_samples),
    'Emergency_Response_Plans': np.random.choice(['in place', 'not in place'], num_samples)
})

# Convert Incident_Date to datetime
df['Incident_Date'] = pd.to_datetime(df['Incident_Date'])

# Calculate recent incidents (e.g., incidents in the last 10 days)
today = datetime.now()
df['Days_Since_Incident'] = (today - df['Incident_Date']).dt.days
df['Recent_Incidents'] = df['Days_Since_Incident'].apply(lambda x: 1 if x <= 10 else 0)

# Aggregate recent incidents
recent_incidents_agg = df.groupby(['POL_Code', 'POD_Code', 'Route_Duration'])['Recent_Incidents'].sum().reset_index()
recent_incidents_agg.rename(columns={'Recent_Incidents': 'Total_Recent_Incidents'}, inplace=True)
df = df.merge(recent_incidents_agg, on=['POL_Code', 'POD_Code', 'Route_Duration'], how='left')

# Normalize continuous features using Min-Max Scaling
features = ['Distance', 'Average_Transit_Days', 'Number_Of_Travels', 'Total_Incidents_Count', 
             'Carbon_Emissions', 'Cargo_Value', 'Route_Duration', 'Total_Recent_Incidents',
             'Average_Vessel_Age', 'Crew_Experience_Level']
scaler = MinMaxScaler()
df[features] = scaler.fit_transform(df[features])

# Encode categorical features
df = pd.get_dummies(df, columns=['Previous_Safety_Awards', 'Training_Programs', 'Incident_Severity',
                                 'Cargo_Type', 'Navigational_Risks', 'Regulatory_Compliance',
                                 'Ship_Maintenance_Records', 'Insurance_Coverage', 'Ship_Type',
                                 'Emergency_Response_Plans'])

# Define weights for RiskScore calculation
weights = {
    'Total_Incidents_Count': 0.2,
    'Carbon_Emissions': 0.1,
    'Number_Of_Travels': 0.1,
    'Average_Transit_Days': 0.1,
    'Total_Recent_Incidents': 0.1,
    'Cargo_Value': 0.1,
    'Average_Vessel_Age': 0.05,
    'Crew_Experience_Level': 0.05,
    'Cargo_Type_hazardous': 0.05,
    'Navigational_Risks_congested areas': 0.05,
    'Regulatory_Compliance_non-compliant': 0.05,
    'Ship_Maintenance_Records_overdue': 0.05,
    'Insurance_Coverage_partial': 0.05,
    'Ship_Type_bulk carrier': 0.05,
    'Emergency_Response_Plans_not in place': 0.05
}

# Calculate RiskScore based on weighted sum
df['RiskScore'] = (
    df['Total_Incidents_Count'] * weights['Total_Incidents_Count'] +
    df['Carbon_Emissions'] * weights['Carbon_Emissions'] +
    df['Number_Of_Travels'] * weights['Number_Of_Travels'] +
    df['Average_Transit_Days'] * weights['Average_Transit_Days'] +
    df['Total_Recent_Incidents'] * weights['Total_Recent_Incidents'] +
    df['Cargo_Value'] * weights['Cargo_Value'] +
    df['Average_Vessel_Age'] * weights['Average_Vessel_Age'] +
    df['Crew_Experience_Level'] * weights['Crew_Experience_Level'] +
    df['Cargo_Type_hazardous'] * weights['Cargo_Type_hazardous'] +
    df['Navigational_Risks_congested areas'] * weights['Navigational_Risks_congested areas'] +
    df['Regulatory_Compliance_non-compliant'] * weights['Regulatory_Compliance_non-compliant'] +
    df['Ship_Maintenance_Records_overdue'] * weights['Ship_Maintenance_Records_overdue'] +
    df['Insurance_Coverage_partial'] * weights['Insurance_Coverage_partial'] +
    df['Ship_Type_bulk carrier'] * weights['Ship_Type_bulk carrier'] +
    df['Emergency_Response_Plans_not in place'] * weights['Emergency_Response_Plans_not in place']
)

# Normalize RiskScore to [0, 1] range
df['RiskScore'] = (df['RiskScore'] - df['RiskScore'].min()) / (df['RiskScore'].max() - df['RiskScore'].min())

# Define bins and labels for RiskScore categories
bins = [0, 0.33, 0.66, 1]
labels = ['Low Risk', 'Medium Risk', 'High Risk']

# Convert RiskScore to categories
df['RiskScore_Category'] = pd.cut(df['RiskScore'], bins=bins, labels=labels)

# Save the updated DataFrame
df.to_csv('enhanced_shipping_data_with_route_and_risks.csv', index=False)

df.head()


   RouteID POL_Code POD_Code  RiskScore RiskScore_Category
0        1     POL1     POD2   0.723118          High Risk
1        2     POL2     POD2   0.557970        Medium Risk
2        3     POL1     POD2   0.736983          High Risk
3        4     POL2     POD2   0.592353        Medium Risk
4        5     POL2     POD1   0.829720          High Risk


In [7]:
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
from sklearn.preprocessing import MinMaxScaler

# Set random seed for reproducibility
np.random.seed(0)

# Number of samples
num_samples = 1000

# Generate mock data with RouteID, POL, and POD
df = pd.DataFrame({
    'RouteID': np.arange(1, num_samples + 1),  # Sequential RouteID
    'POL_Code': np.random.choice(['POL1', 'POL2', 'POL3'], num_samples),
    'POD_Code': np.random.choice(['POD1', 'POD2', 'POD3'], num_samples),
    'Distance': np.random.randint(200, 1000, size=num_samples),  # Integer values for Distance
    'Average_Transit_Days': np.random.randint(5, 20, size=num_samples),  # Integer values for Average_Transit_Days
    'Number_Of_Travels': np.random.randint(1, 50, size=num_samples),
    'Total_Incidents_Count': np.random.poisson(lam=5, size=num_samples),
    'Carbon_Emissions': np.random.normal(loc=100, scale=20, size=num_samples),
    'Cargo_Value': np.random.normal(loc=1500000, scale=300000, size=num_samples),
    'Route_Duration': np.random.randint(10, 40, size=num_samples),  # Integer values for Route_Duration
    'Incident_Date': [datetime.now() - timedelta(days=np.random.randint(0, 365)) for _ in range(num_samples)],
    'Previous_Safety_Awards': np.random.choice(['yes', 'no'], num_samples),
    'Training_Programs': np.random.choice(['regular', 'irregular'], num_samples),
    'Incident_Severity': np.random.choice(['minor', 'major', 'critical'], num_samples),
    'Average_Vessel_Age': np.random.normal(loc=12, scale=5, size=num_samples),
    'Cargo_Type': np.random.choice(['hazardous', 'non-hazardous'], num_samples),
    'Navigational_Risks': np.random.choice(['congested areas', 'shallow waters', 'clear'], num_samples),
    'Regulatory_Compliance': np.random.choice(['compliant', 'non-compliant'], num_samples),
    'Crew_Experience_Level': np.random.normal(loc=10, scale=3, size=num_samples),
    'Ship_Maintenance_Records': np.random.choice(['up-to-date', 'overdue'], num_samples),
    'Insurance_Coverage': np.random.choice(['full', 'partial', 'none'], num_samples),
    'Ship_Type': np.random.choice(['container', 'bulk carrier'], num_samples),
    'Emergency_Response_Plans': np.random.choice(['in place', 'not in place'], num_samples)
})

# Convert Incident_Date to datetime
df['Incident_Date'] = pd.to_datetime(df['Incident_Date'])

# Calculate recent incidents (e.g., incidents in the last 10 days)
today = datetime.now()
df['Days_Since_Incident'] = (today - df['Incident_Date']).dt.days
df['Recent_Incidents'] = df['Days_Since_Incident'].apply(lambda x: 1 if x <= 10 else 0)

# Aggregate recent incidents
recent_incidents_agg = df.groupby(['POL_Code', 'POD_Code', 'Route_Duration'])['Recent_Incidents'].sum().reset_index()
recent_incidents_agg.rename(columns={'Recent_Incidents': 'Total_Recent_Incidents'}, inplace=True)
df = df.merge(recent_incidents_agg, on=['POL_Code', 'POD_Code', 'Route_Duration'], how='left')

# Normalize continuous features using Min-Max Scaling
features = ['Distance', 'Average_Transit_Days', 'Number_Of_Travels', 'Total_Incidents_Count', 
             'Carbon_Emissions', 'Cargo_Value', 'Route_Duration', 'Total_Recent_Incidents',
             'Average_Vessel_Age', 'Crew_Experience_Level']
#scaler = MinMaxScaler()
#df[features] = scaler.fit_transform(df[features])

# Encode categorical features
df = pd.get_dummies(df, columns=['Previous_Safety_Awards', 'Training_Programs', 'Incident_Severity',
                                 'Cargo_Type', 'Navigational_Risks', 'Regulatory_Compliance',
                                 'Ship_Maintenance_Records', 'Insurance_Coverage', 'Ship_Type',
                                 'Emergency_Response_Plans'])

# Define weights for RiskScore calculation
weights = {
    'Total_Incidents_Count': 0.2,
    'Carbon_Emissions': 0.1,
    'Number_Of_Travels': 0.1,
    'Average_Transit_Days': 0.1,
    'Total_Recent_Incidents': 0.1,
    'Cargo_Value': 0.1,
    'Average_Vessel_Age': 0.05,
    'Crew_Experience_Level': 0.05,
    'Cargo_Type_hazardous': 0.05,
    'Navigational_Risks_congested areas': 0.05,
    'Regulatory_Compliance_non-compliant': 0.05,
    'Ship_Maintenance_Records_overdue': 0.05,
    'Insurance_Coverage_partial': 0.05,
    'Ship_Type_bulk carrier': 0.05,
    'Emergency_Response_Plans_not in place': 0.05
}

# Calculate RiskScore based on weighted sum
df['RiskScore'] = (
    df['Total_Incidents_Count'] * weights['Total_Incidents_Count'] +
    df['Carbon_Emissions'] * weights['Carbon_Emissions'] +
    df['Number_Of_Travels'] * weights['Number_Of_Travels'] +
    df['Average_Transit_Days'] * weights['Average_Transit_Days'] +
    df['Total_Recent_Incidents'] * weights['Total_Recent_Incidents'] +
    df['Cargo_Value'] * weights['Cargo_Value'] +
    df['Average_Vessel_Age'] * weights['Average_Vessel_Age'] +
    df['Crew_Experience_Level'] * weights['Crew_Experience_Level'] +
    df['Cargo_Type_hazardous'] * weights['Cargo_Type_hazardous'] +
    df['Navigational_Risks_congested areas'] * weights['Navigational_Risks_congested areas'] +
    df['Regulatory_Compliance_non-compliant'] * weights['Regulatory_Compliance_non-compliant'] +
    df['Ship_Maintenance_Records_overdue'] * weights['Ship_Maintenance_Records_overdue'] +
    df['Insurance_Coverage_partial'] * weights['Insurance_Coverage_partial'] +
    df['Ship_Type_bulk carrier'] * weights['Ship_Type_bulk carrier'] +
    df['Emergency_Response_Plans_not in place'] * weights['Emergency_Response_Plans_not in place']
)

# Normalize RiskScore to [0, 1] range
df['RiskScore'] = (df['RiskScore'] - df['RiskScore'].min()) / (df['RiskScore'].max() - df['RiskScore'].min())

# Define bins and labels for RiskScore categories
bins = [0, 0.33, 0.66, 1]
labels = ['Low Risk', 'Medium Risk', 'High Risk']

# Convert RiskScore to categories
df['RiskScore_Category'] = pd.cut(df['RiskScore'], bins=bins, labels=labels)

# Save the updated DataFrame
df.to_csv('enhanced_shipping_data_with_route_and_risks1.csv', index=False)




Unnamed: 0,RouteID,POL_Code,POD_Code,Distance,Average_Transit_Days,Number_Of_Travels,Total_Incidents_Count,Carbon_Emissions,Cargo_Value,Route_Duration,...,Ship_Maintenance_Records_up-to-date,Insurance_Coverage_full,Insurance_Coverage_none,Insurance_Coverage_partial,Ship_Type_bulk carrier,Ship_Type_container,Emergency_Response_Plans_in place,Emergency_Response_Plans_not in place,RiskScore,RiskScore_Category
0,1,POL1,POD2,989,9,27,4,92.070577,1566985.0,20,...,False,False,True,False,True,False,False,True,0.538121,Medium Risk
1,2,POL2,POD2,641,17,13,2,113.040586,1689671.0,33,...,True,False,True,False,True,False,True,False,0.601449,Medium Risk
2,3,POL1,POD2,780,13,16,9,110.20425,1766207.0,16,...,False,True,False,False,True,False,True,False,0.640961,Medium Risk
3,4,POL2,POD2,992,10,20,5,88.251958,1621702.0,14,...,False,False,True,False,False,True,True,False,0.566359,Medium Risk
4,5,POL2,POD1,542,7,19,5,89.05014,1225656.0,30,...,False,True,False,False,False,True,False,True,0.361939,Medium Risk


In [11]:
df['RiskScore_Category']=='Medium Risk'

0       True
1       True
2       True
3       True
4       True
       ...  
995    False
996     True
997     True
998     True
999     True
Name: RiskScore_Category, Length: 1000, dtype: bool