## Code to simulate and generate data specific to Use Case

In [1]:
import pandas as pd
import numpy as np
from datetime import timedelta, date

In [2]:
# Generate a list of dates
def generate_dates(start_date, end_date):
    current = start_date
    while current <= end_date:
        yield current
        current += timedelta(days=1)

# Simulation parameters
start_date = date(2023, 1, 1)
end_date = date(2023, 12, 31)
dates = list(generate_dates(start_date, end_date))
countries = ["USA", "Germany", "Japan", "India", "Australia", "Brazil", "Canada", "UK", "South Africa"]
hospital_types = ["General", "Specialty", "Teaching"]
n_robots = 100

# Simulate data
data = []
for robot_id in range(1, n_robots + 1):
    robot_name = f"Robot_{str(robot_id).zfill(3)}"
    country = np.random.choice(countries)
    deployment_date = np.random.choice(dates[:100])  # Assume deployment dates are in the first 100 days
    for task_date in dates:
        if task_date >= deployment_date:
            data.append({
                "Robot ID": robot_name,
                "Country": country,
                "Deployment Date": deployment_date,
                "Date": task_date,
                "Daily Task Volume": np.random.randint(5, 50),
                "Procedure Time (mins)": np.random.uniform(30, 300),
                "Daily Energy Consumption (kWh)": np.random.uniform(10, 100),
                "Error Rate (%)": np.random.uniform(0, 5),
                "Downtime (hrs)": np.random.uniform(0, 2),
                "Maintenance Events": np.random.choice([0, 1], p=[0.9, 0.1]),
                "Task Success Rate (%)": np.random.uniform(90, 100),
                "Predicted Maintenance Need": np.random.choice(["Yes", "No"], p=[0.1, 0.9]),
                "Patient Satisfaction Score": np.random.uniform(7, 10),
                "Cost per Task (USD)": np.random.uniform(200, 2000),
                "Hospital Type": np.random.choice(hospital_types),
                "Robot Operating Time (hrs)": np.random.uniform(5, 12)
            })

# Create DataFrame
robotics_df = pd.DataFrame(data)

# Save as CSV
robotics_df.to_csv("healthcare_robotics_worldwide.csv", index=False)
print("Simulated worldwide healthcare robotics dataset saved as 'healthcare_robotics_worldwide.csv'")


Simulated worldwide healthcare robotics dataset saved as 'healthcare_robotics_worldwide.csv'


In [3]:
robotics_df.head(5)

Unnamed: 0,Robot ID,Country,Deployment Date,Date,Daily Task Volume,Procedure Time (mins),Daily Energy Consumption (kWh),Error Rate (%),Downtime (hrs),Maintenance Events,Task Success Rate (%),Predicted Maintenance Need,Patient Satisfaction Score,Cost per Task (USD),Hospital Type,Robot Operating Time (hrs)
0,Robot_001,USA,2023-01-15,2023-01-15,23,101.946315,45.635648,0.101421,0.533915,0,92.881157,No,8.53331,459.515941,Teaching,11.352914
1,Robot_001,USA,2023-01-15,2023-01-16,48,255.085449,95.178172,4.926009,1.159964,0,98.010373,No,8.843037,634.411877,General,5.914921
2,Robot_001,USA,2023-01-15,2023-01-17,41,34.112225,28.788179,1.055748,0.647513,0,91.8076,No,9.032817,1782.808657,General,6.302192
3,Robot_001,USA,2023-01-15,2023-01-18,19,121.153327,74.254665,0.001965,1.835709,0,96.220439,No,9.21698,681.879471,General,10.995965
4,Robot_001,USA,2023-01-15,2023-01-19,44,164.622282,97.974143,4.693244,0.108314,0,91.079321,Yes,7.770172,214.927069,General,7.952537


In [4]:
robotics_df.shape

(31084, 16)

In [5]:
robotics_df1 = robotics_df.head(1000)

In [6]:
robotics_df1.shape

(1000, 16)

In [27]:
import pandas as pd
import numpy as np
from datetime import timedelta, date

# Set random seed for reproducibility
np.random.seed(42)

# Generate a list of dates
def generate_dates(start_date, end_date):
    current = start_date
    while current <= end_date:
        yield current
        current += timedelta(days=1)

# Parameters to control the dataset size
start_date = date(2023, 1, 1)
end_date = date(2023, 3, 31)  # Limit the date range to reduce rows
dates = list(generate_dates(start_date, end_date))  # ~90 days
n_robots = 100  # Number of robots to simulate
countries = ["USA", "Germany", "Japan", "India", "Australia","UK",
    "Canada",
    "Brazil",
    "China",
    "South Korea" ,
    "South Africa",
    "France" ,
    "Italy",
    "Mexico",
    "Russia",
    "Spain",
    "Netherlands",
    "Sweden",
    "Norway",
    "Denmark"]
hospital_types = ["General", "Specialty", "Teaching"]

# Define country-specific success rate ranges
success_rate_by_country = {
    "USA": (95, 100),
    "Germany": (85, 98),
    "Japan": (94, 99),
    "India": (88, 95),
    "Australia": (75, 91),
    "UK": (90, 96),
    "Canada": (92, 98),
    "Brazil": (78, 88),
    "China": (85, 93),
    "South Korea": (92, 98),
    "South Africa": (70, 85),
    "France": (87, 95),
    "Italy": (80, 90),
    "Mexico": (77, 89),
    "Russia": (72, 85),
    "Spain": (84, 92),
    "Netherlands": (89, 96),
    "Sweden": (90, 97),
    "Norway": (91, 98),
    "Denmark": (88, 96)
}

# Simulate data
data = []
for robot_id in range(1, n_robots + 1):
    robot_name = f"Robot_{str(robot_id).zfill(3)}"
    country = np.random.choice(countries)
    deployment_date = np.random.choice(dates[:30])  # Assume deployment happens in the first month
    for task_date in dates:
        if len(data) >= 10000:  # Stop once 10,000 rows are reached
            break
        if task_date >= deployment_date:
            data.append({
                "Robot ID": robot_name,
                "Country": country,
                "Deployment Date": deployment_date,
                "Date": task_date,
                "Daily Task Volume": np.random.randint(5, 50),
                "Procedure Time (mins)": np.random.uniform(30, 300),
                "Daily Energy Consumption (kWh)": np.random.uniform(10, 100),
                "Error Rate (%)": np.random.uniform(0, 5),
                "Downtime (hrs)": np.random.uniform(0, 2),
                "Maintenance Events": np.random.choice([0, 1], p=[0.9, 0.1]),
                "Task Success Rate (%)": np.random.uniform(*success_rate_by_country[country]),  # Use country-specific range
                "Predicted Maintenance Need": np.random.choice(["Yes", "No"], p=[0.1, 0.9]),
                "Patient Satisfaction Score": np.random.uniform(7, 10),
                "Cost per Task (USD)": np.random.uniform(200, 2000),
                "Hospital Type": np.random.choice(hospital_types),
                "Robot Operating Time (hrs)": np.random.uniform(5, 12)
            })

# Create DataFrame
robotics_df = pd.DataFrame(data)

# Save as CSV
robotics_df.to_csv("healthcare_robotics_1.csv", index=False)
print("Simulated dataset saved as 'healthcare_robotics_10000_rows.csv'")


Simulated dataset saved as 'healthcare_robotics_10000_rows.csv'


In [28]:
robotics_df.head(5)

Unnamed: 0,Robot ID,Country,Deployment Date,Date,Daily Task Volume,Procedure Time (mins),Daily Energy Consumption (kWh),Error Rate (%),Downtime (hrs),Maintenance Events,Task Success Rate (%),Predicted Maintenance Need,Patient Satisfaction Score,Cost per Task (USD),Hospital Type,Robot Operating Time (hrs)
0,Robot_001,Canada,2023-01-20,2023-01-20,33,79.527393,80.17219,2.984251,0.891666,0,94.755493,No,7.4286,1371.599251,General,11.789369
1,Robot_001,Canada,2023-01-20,2023-01-21,48,283.409231,10.070089,4.961058,1.234963,0,92.042398,Yes,8.574324,919.749749,Teaching,8.19249
2,Robot_001,Canada,2023-01-20,2023-01-22,48,168.843298,63.317311,0.232252,1.21509,0,92.39031,No,9.896896,1655.115227,General,5.111764
3,Robot_001,Canada,2023-01-20,2023-01-23,6,214.742917,49.613724,0.610191,0.990354,0,97.455922,No,8.987567,761.079937,Specialty,6.455592
4,Robot_001,Canada,2023-01-20,2023-01-24,8,79.910703,97.262616,3.875664,1.878998,0,95.5874,No,7.265478,552.769152,General,11.728204


In [29]:
robotics_df['Task Success Rate (%)'].unique()

array([94.75549335, 92.04239783, 92.39030956, ..., 81.89449947,
       78.83501915, 86.65070864])

In [30]:
robotics_df.shape

(7572, 16)

In [4]:
import pandas as pd
import numpy as np
from datetime import timedelta, date

# Set random seed for reproducibility
np.random.seed(42)

# Generate a list of dates
def generate_dates(start_date, end_date):
    current = start_date
    while current <= end_date:
        yield current
        current += timedelta(days=1)

# Parameters to control the dataset size
start_date = date(2023, 1, 1)  # Start from January
end_date = date(2023, 12, 31)  # Extend the Date column to the full year
dates = list(generate_dates(start_date, end_date))  # All dates in the year
n_robots_per_country = 5  # Number of robots per country
countries = ["USA", "Germany", "Japan", "India", "Australia", "UK",
    "Canada",
    "Brazil",
    "China",
    "South Korea",
    "South Africa",
    "France",
    "Italy",
    "Mexico",
    "Russia",
    "Spain",
    "Netherlands",
    "Sweden",
    "Norway",
    "Denmark"]
hospital_types = ["General", "Specialty", "Teaching"]

# Define country-specific success rate ranges
success_rate_by_country = {
    "USA": (95, 100),
    "Germany": (85, 98),
    "Japan": (94, 99),
    "India": (88, 95),
    "Australia": (75, 91),
    "UK": (90, 96),
    "Canada": (92, 98),
    "Brazil": (78, 88),
    "China": (85, 93),
    "South Korea": (92, 98),
    "South Africa": (70, 85),
    "France": (87, 95),
    "Italy": (80, 90),
    "Mexico": (77, 89),
    "Russia": (72, 85),
    "Spain": (84, 92),
    "Netherlands": (89, 96),
    "Sweden": (90, 97),
    "Norway": (91, 98),
    "Denmark": (88, 96)
}

# Simulate data
data = []
for country in countries:
    for robot_id in range(1, n_robots_per_country + 1):
        robot_name = f"Robot_{country}_{str(robot_id).zfill(3)}"
        deployment_date = np.random.choice(dates[:90])  # Limit deployment dates to the first 3 months
        for task_date in dates:
            if len(data) >= 20000:  # Increased row limit to ensure all countries are included
                break
            if task_date >= deployment_date:
                data.append({
                    "Robot ID": robot_name,
                    "Country": country,
                    "Deployment Date": deployment_date,
                    "Date": task_date,
                    "Daily Task Volume": np.random.randint(5, 50),
                    "Procedure Time (mins)": np.random.uniform(30, 300),
                    "Daily Energy Consumption (kWh)": np.random.uniform(10, 100),
                    "Error Rate (%)": np.random.uniform(0, 5),
                    "Downtime (hrs)": np.random.uniform(0, 2),
                    "Maintenance Events": np.random.choice([0, 1], p=[0.9, 0.1]),
                    "Task Success Rate (%)": np.random.uniform(*success_rate_by_country[country]),
                    "Predicted Maintenance Need": np.random.choice(["Yes", "No"], p=[0.1, 0.9]),
                    "Patient Satisfaction Score": max(7, min(10, np.random.normal(loc=8.5, scale=1))),
                    "Cost per Task (USD)": np.random.uniform(200, 2000),
                    "Hospital Type": np.random.choice(hospital_types),
                    "Robot Operating Time (hrs)": np.random.uniform(5, 12)
                })

# Create DataFrame
robotics_df = pd.DataFrame(data)

# Verify country distribution
print(robotics_df["Country"].value_counts())  # Check that all countries are represented

# Save as CSV
robotics_df.to_csv("healthcare_robotics_1.csv", index=False)
print("Simulated dataset saved as 'healthcare_robotics_1.csv'")

Country
France          1689
China           1677
USA             1658
Germany         1629
India           1627
Japan           1623
UK              1591
Brazil          1584
South Korea     1570
Australia       1564
Canada          1559
South Africa    1559
Italy            670
Name: count, dtype: int64
Simulated dataset saved as 'healthcare_robotics_1.csv'
