In [2]:
import pandas as pd
import os

# Load the dataset from CSV
probes_df = pd.read_csv("../data/Catalog - Probes.csv")

# Text cleaning
probes_df['Array_Type'] = probes_df['Array_Type'].str.lower()
probes_df['Array_Type'] = probes_df['Array_Type'].str.replace('[^\w\s]', '')
probes_df['Applications'] = probes_df['Applications'].str.lower()
probes_df['Applications'] = probes_df['Applications'].str.replace('[^\w\s]', '')

# Split "Compatible_Systems" column
probes_df['Compatible_Systems'] = probes_df['Compatible_Systems'].str.split(', ')

# Split "Applications" column
probes_df['Applications'] = probes_df['Applications'].str.split(', ')

# Rename 'Connection_Type' column to 'Cartridge_Connection'
probes_df = probes_df.rename(columns={'Connection_Type': 'Cartridge_Connection'})

# Convert 'Cartridge_Connection' values to binary (1 for "Cartridge", 0 for NaN)
probes_df['Cartridge_Connection'] = probes_df['Cartridge_Connection'].apply(lambda x: 1 if x == 'Cartridge' else 0)

# Extract minimum and maximum frequencies from 'Frequency_Range'
probes_df[['Min_Frequency', 'Max_Frequency']] = probes_df['Frequency_Range'].str.extract(r'(\d+(?:\.\d+)?)\s*(?:-\s*(\d+(?:\.\d+)?))?')
probes_df['Min_Frequency'] = probes_df['Min_Frequency'].astype(float)
probes_df['Max_Frequency'] = probes_df['Max_Frequency'].fillna(probes_df['Min_Frequency']).astype(float)

# Drop the original 'Frequency_Range' column
probes_df = probes_df.drop(['Frequency_Range'], axis=1)

# Initialize a dictionary to store the ultrasound systems information
systems_dict = {}

# Iterate over the rows in probes_df
for _, row in probes_df.iterrows():
    manufacturer = row['Manufacturer']
    probe_model = row['Probe_Model']
    compatible_systems = row['Compatible_Systems']
    
    for system in compatible_systems:
        if system not in systems_dict:
            systems_dict[system] = {
                'ultrasound_system': system,
                'manufacturer': manufacturer,
                'compatible_probes': set()  # Use a set to avoid duplicates
            }
        systems_dict[system]['compatible_probes'].add(probe_model)

# Convert the dictionary to a DataFrame
systems_list = []
for system, data in systems_dict.items():
    data['compatible_probes'] = ', '.join(sorted(data['compatible_probes']))  # Sort for consistency
    systems_list.append(data)

systems_df = pd.DataFrame(systems_list)

# Specify the output directory
output_dir = "../data"
os.makedirs(output_dir, exist_ok=True)

# Save the DataFrame to a CSV file
systems_df.to_csv(os.path.join(output_dir, "systems.csv"), index=False)

KeyError: 'Frequency_Range'

In [10]:
systems_df.to_csv("../data/systems_df.csv", index=False)



In [12]:
systems_df

Unnamed: 0,ultrasound_system,manufacturer,compatible_probes
0,HDI 5000,ATL,"C3, C4-2, C5-2, C5-IVT, C7-4, C8-4v, C9-5ICT, ..."
1,HDI 1500,ATL,"C5-2, C5-IVT, C7-4, C9-5ICT, L7-4"
2,HDI 3000,ATL,"C5-2, C5-IVT, C7-4, C8-4v, CL 10-5, CT8-4, L7-4"
3,HDI 3500,ATL,"C5-2, C5-IVT, C8-4v, CT8-4"
4,UM9 HDI,ATL,"C5-IVT, C7-4, L7-4"
5,HDI 1000,ATL,"C9-5ICT, CT8-4"
6,HDI 4000,ATL,C9-5ICT
7,LOGIQ 700,G.E.,"227s, 348c, 618c"
8,Voluson 730 Pro,G.E.,RAB2-5L
9,Voluson 730,G.E.,RAB2-5L
