In [13]:
#CSV to Roi folder Part 1
import pandas as pd
import numpy as np
from roifile import ImagejRoi, roiwrite
import os

# Input folder containing CSV files
input_folder = '/media/yhs/5596744f-db7c-442f-9235-d0c9d50c0a6b/Cellpose/DBScan_Rois/DRG4'# Replace with the path to your folder of CSVs
# Output folder to store all ROI subfolders
output_folder = '/media/yhs/5596744f-db7c-442f-9235-d0c9d50c0a6b/Cellpose/DBScan_Rois/DRG4_roi'  # Replace with the path to the folder where ROI subfolders will be created
os.makedirs(output_folder, exist_ok=True)  # Create the main output folder if it doesn't exist

# Loop through each CSV file in the input folder
for csv_file in os.listdir(input_folder):
    if csv_file.endswith('.csv'):
        # Full path to the current CSV file
        input_csv_path = os.path.join(input_folder, csv_file)
        
        # Read the CSV into a DataFrame
        data = pd.read_csv(input_csv_path)
        
        # Create a subfolder for this CSV's ROI files
        csv_name = os.path.splitext(csv_file)[0]  # Get the CSV name without the .csv extension
        roi_folder = os.path.join(output_folder, csv_name)  # Subfolder for the ROIs of this CSV
        os.makedirs(roi_folder, exist_ok=True)  # Create subfolder if it doesn't exist

        # Initialize a counter for sequential numbering
        roi_counter = 1

        # Group by 'Name' and create ROIscell_id for each group
        for _, group in data.groupby('Name'):
            # Extract coordinates
            coordinates = group[['X', 'Y']].to_numpy()
            
            # Create an ROI for the cell
            roi = ImagejRoi.frompoints(coordinates)
            
            # Save the ROI with sequential numbering
            roi_file = os.path.join(roi_folder, f"roi_{roi_counter}.roi")
            roiwrite(roi_file, roi)
            
            # Increment the counter
            roi_counter += 1

        print(f"ROIs for {csv_file} saved in {roi_folder}")

print(f"All CSV files have been processed. ROIs are saved in {output_folder}.")


ROIs for drg4_all_e7_n48.csv saved in /media/yhs/5596744f-db7c-442f-9235-d0c9d50c0a6b/Cellpose/DBScan_Rois/DRG4_roi/drg4_all_e7_n48
All CSV files have been processed. ROIs are saved in /media/yhs/5596744f-db7c-442f-9235-d0c9d50c0a6b/Cellpose/DBScan_Rois/DRG4_roi.


In [14]:
#CSV to ROI part 2 ZIP
import os
import zipfile

# Folder containing the ROI files
roi_folder = '/media/yhs/5596744f-db7c-442f-9235-d0c9d50c0a6b/Cellpose/DBScan_Rois/drg4_all_e7_n48'  # Replace with your folder path
zip_file = '/media/yhs/5596744f-db7c-442f-9235-d0c9d50c0a6b/Cellpose/DBScan_Rois/drg4_all_e7_n48.zip'  # Desired ZIP file name

# Create a ZIP archive of all ROIs
with zipfile.ZipFile(zip_file, 'w') as zipf:
    for file in os.listdir(roi_folder):
        if file.endswith('.roi'):  # Only include .roi files
            file_path = os.path.join(roi_folder, file)
            zipf.write(file_path, arcname=file)  # Add file to ZIP archive

print(f"All ROIs zipped into {zip_file}")
os.getcwd()

All ROIs zipped into /media/yhs/5596744f-db7c-442f-9235-d0c9d50c0a6b/Cellpose/DBScan_Rois/drg4_all_e7_n48.zip


'/media/yhs/5596744f-db7c-442f-9235-d0c9d50c0a6b/Standardized Pipelines for Pub'

In [None]:
#CSV to TXT 
#Enter Path to CSV in Link
import pandas as pd
import os

# Input folder containing CSV files
input_folder = '/media/yhs/5596744f-db7c-442f-9235-d0c9d50c0a6b/Cellpose/Figures/Evaluation Image/ManualDRG4'  # Replace with the path to your folder of CSVs
# Output folder to store all TXT files
output_folder = '//home/yhs/Documents'  # Replace with the path to your folder for TXT files
os.makedirs(output_folder, exist_ok=True)  # Create the output folder if it doesn't exist

# Loop through each CSV file in the input folder
for csv_file in os.listdir(input_folder):
    if csv_file.endswith('.csv'):
        # Full path to the current CSV file
        input_csv_path = os.path.join(input_folder, csv_file)
        
        # Read the CSV into a DataFrame
        df = pd.read_csv(input_csv_path)
        
        # Group the data by 'Name' and prepare the coordinates as lists
        grouped = df.groupby('Name')

        # Initialize a list to store the lines to write to the TXT file
        lines = []

        # Process each group of cells (grouped by 'Name')
        for cell, group in grouped:
            # Start with the cell ID (the 'class' in YOLO format)
            line = f"0"  # Assuming class '0' for all cells, adjust if needed

            # Add coordinates to the line
            for _, row in group.iterrows():
                # The CSV has columns 'X' and 'Y' which are the coordinates
                line += f" {row['X'] / 500} {row['Y'] / 500}"  # Divide by 500 to normalize coordinates
            
            # Add this line to the list
            lines.append(line)

        # Create a corresponding TXT filename
        txt_filename = os.path.splitext(csv_file)[0] + '.txt'  # Change .csv to .txt
        output_txt_path = os.path.join(output_folder, txt_filename)
        
        # Write the lines to the output TXT file
        with open(output_txt_path, 'w') as f:
            f.write("\n".join(lines))
        
        print(f"Converted {csv_file} to {txt_filename} and saved to {output_folder}")

print(f"All CSV files have been converted and saved as TXT files in {output_folder}.")


In [None]:
#CSV to GEOJSON
import pandas as pd
import json

# Load the CSV
csv_file = '/home/yhs/Desktop/Segmentation file format/evaluation.csv'
data = pd.read_csv(csv_file)

# Initialize a dictionary with the 'geometries' key
geojson_format = {'geometries': []}

# Group by 'name' and create polygons
for name, group in data.groupby('Name'):
    # Extract coordinates as a list of [x, y] pairs
    coordinates = group[['X', 'Y']].to_numpy().tolist()
    
    # Ensure the polygon is closed (repeat the first coordinate at the end if needed)
    if coordinates[0] != coordinates[-1]:
        coordinates.append(coordinates[0])
    
    # Append the polygon entry to the 'geometries' list
    geojson_format['geometries'].append({
        'coordinates': [coordinates],  # GeoJSON expects a nested list
        'type': 'Polygon',
        'cell': name  # Use the 'name' as the cell identifier
    })

# Save to a JSON file
output_file = 'baysor_geometries.json'
with open(output_file, 'w') as f:
    json.dump(geojson_format, f, indent=4)

print(f"GeoJSON-like file saved to {output_file}")


In [None]:
#CSV to NumPy
import numpy as np
import pandas as pd
import cv2
import os

# Input folder containing CSV files
input_folder = '/path/to/your/input/csv/folder'  # Replace with the path to your folder of CSVs
# Output folder to store all NumPy files
output_folder = '/path/to/your/output/numpy/folder'  # Replace with the path to your folder for NumPy files
os.makedirs(output_folder, exist_ok=True)  # Create the output folder if it doesn't exist

# Image size for the binary mask
image_size = (512, 512)  # You can adjust this as needed

# Loop through each CSV file in the input folder
for csv_file in os.listdir(input_folder):
    if csv_file.endswith('.csv'):
        # Full path to the current CSV file
        input_csv_path = os.path.join(input_folder, csv_file)
        
        # Read the CSV into a DataFrame
        df = pd.read_csv(input_csv_path)
        
        # Create a blank 512x512 binary array
        binary_array = np.zeros(image_size, dtype=np.uint8)
        
        # Group points by 'Cell' and fill the mask
        for cell_label, group in df.groupby("Cell"):
            # Extract the contour points for this cell
            contour = group[["X", "Y"]].to_numpy().astype(np.int32)
            contour = contour.reshape((-1, 1, 2))  # Reshape for OpenCV format
            
            # Fill the polygon (square, rectangle, etc.) inside the contour
            cv2.fillPoly(binary_array, [contour], 1)  # Fill inside the polygon with 1's

        # Create a corresponding NumPy filename
        numpy_filename = os.path.splitext(csv_file)[0] + '.npy'  # Change .csv to .npy
        output_numpy_path = os.path.join(output_folder, numpy_filename)
        
        # Save the NumPy array
        np.save(output_numpy_path, binary_array)
        
        print(f"Converted {csv_file} to {numpy_filename} and saved to {output_folder}")

print(f"All CSV files have been converted and saved as NumPy files in {output_folder}.")


In [5]:
#TXT to CSV
import os
import pandas as pd

# Input folder containing TXT files
input_folder = '/media/yhs/5596744f-db7c-442f-9235-d0c9d50c0a6b/FinalProjectPlots/DBScan_HY/Images/new_Crop/in'  # Replace with the path to your folder of TXT files
# Output folder to store all CSV files
output_folder = '/media/yhs/5596744f-db7c-442f-9235-d0c9d50c0a6b/FinalProjectPlots/DBScan_HY/Images/new_Crop/out'  # Replace with the path to your folder for CSV files
os.makedirs(output_folder, exist_ok=True)  # Create the output folder if it doesn't exist

# Loop through each TXT file in the input folder
for txt_file in os.listdir(input_folder):
    if txt_file.endswith('.txt'):
        # Full path to the current TXT file
        input_txt_path = os.path.join(input_folder, txt_file)
        
        # Read the YOLOv8 TXT file line by line
        with open(input_txt_path, 'r') as f:
            lines = f.readlines()

        # Initialize an empty list to store rows
        rows = []

        # Process each line
        for cell_index, line in enumerate(lines):
            # Split the line into components
            components = line.strip().split()
            
            # Scale X and Y by first and second number specified
            coordinates = [round(float(components[i]) * 5797 if i % 2 == 0 else float(components[i]) * 7261) for i in range(1, len(components))]
            
            # Reshape coordinates into pairs of (x, y)
            coordinates_pairs = [(coordinates[i], coordinates[i+1]) for i in range(0, len(coordinates), 2)]
            
            # Create a row for each cell, include the cell ID and all coordinates
            rows.append({'Name': f'ROI_{cell_index + 1}', 'Coordinates': coordinates_pairs})

        # Convert rows to a DataFrame
        df = pd.DataFrame(rows)

        # Expand the DataFrame so each coordinate gets its own row
        expanded_rows = []
        for _, row in df.iterrows():
            cell_id = row['Name']
            for x, y in row['Coordinates']:
                expanded_rows.append({'Name': cell_id, 'X': x, 'Y': y})

        expanded_df = pd.DataFrame(expanded_rows)

        # Create a corresponding CSV filename
        csv_filename = os.path.splitext(txt_file)[0] + '.csv'  # Change .txt to .csv
        output_csv_path = os.path.join(output_folder, csv_filename)
        
        # Save the CSV file
        expanded_df.to_csv(output_csv_path, index=False)
        
        print(f"Converted {txt_file} to {csv_filename} and saved to {output_folder}")

print(f"All TXT files have been converted and saved as CSV files in {output_folder}.")



Converted DRG_1_SegTrans_2000_3000.txt to DRG_1_SegTrans_2000_3000.csv and saved to /media/yhs/5596744f-db7c-442f-9235-d0c9d50c0a6b/FinalProjectPlots/DBScan_HY/Images/new_Crop/out
All TXT files have been converted and saved as CSV files in /media/yhs/5596744f-db7c-442f-9235-d0c9d50c0a6b/FinalProjectPlots/DBScan_HY/Images/new_Crop/out.


In [None]:
#NUMPY to CSV 
import os
import numpy as np
import pandas as pd
import cv2

# Input folder containing NumPy (.npy) files
input_folder = '/home/yhs/Desktop/combo_DRG4'  # Replace with the path to your folder of NumPy files
# Output folder to store all CSV files
output_folder = '/home/yhs/Desktop/combo_DRG4/csv'  # Replace with the path to your folder for CSV files
os.makedirs(output_folder, exist_ok=True)  # Create the output folder if it doesn't exist

# Loop through each NumPy file in the input folder
for npy_file in os.listdir(input_folder):
    if npy_file.endswith('.npy'):
        # Full path to the current NumPy file
        input_npy_path = os.path.join(input_folder, npy_file)
        
        # Step 1: Load the NumPy array from the file
        binary_array = np.load(input_npy_path)

        # Step 2: Ensure the array is binary (0s and 1s)
        binary_array = (binary_array > 0).astype(np.uint8)

        # Step 3: Find contours using OpenCV
        contours, _ = cv2.findContours(binary_array, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

        # Step 4: Prepare data for CSV
        data = []
        for cell_id, contour in enumerate(contours, start=1):
            cell_label = f"Cell_{cell_id}"  # Add prefix like "Cell_1", "Cell_2", etc.
            for point in contour:
                x, y = point[0]  # OpenCV returns [x, y] format
                data.append({"Cell": cell_label, "X": x, "Y": y})

        # Step 5: Create DataFrame and save to CSV
        df = pd.DataFrame(data)

        # Create a corresponding CSV filename (same name as the .npy file)
        csv_filename = os.path.splitext(npy_file)[0] + '.csv'  # Change .npy to .csv
        output_csv_path = os.path.join(output_folder, csv_filename)

        # Save the CSV file
        df.to_csv(output_csv_path, index=False)
        
        print(f"Converted {npy_file} to {csv_filename} and saved to {output_folder}")

print(f"All NumPy files have been converted and saved as CSV files in {output_folder}.")


In [8]:
#GEOJSON to CSV 
import json
import pandas as pd

file_path = '/media/yhs/5596744f-db7c-442f-9235-d0c9d50c0a6b/Xenium_segementation/Baysor/DRG_1_1gene/DRG_1_4/segmentation_polygons.json'
with open(file_path, 'r') as file:
    data = json.load(file)
new_data = data.get('geometries')

# Now 'data' contains the JSON data loaded from the file
df_data = {'Name': [], 'X': [], 'Y': []}
counter = 0
for i in range(len(new_data)):
    counter = counter+1
    coords = new_data[i]['coordinates'][0]
    for k in range(len(coords)):
        df_data['Name'].extend(['roi'+str(counter)])
        df_data['X'].extend([round(coords[k][0])])
        df_data['Y'].extend([round(coords[k][1])])
        
df = pd.DataFrame.from_dict(df_data)
                             
    
name_counts = df['Name'].value_counts()

# Filter out rows where the Name appears less than 4 times
df_filtered = df[df['Name'].isin(name_counts[name_counts >= 4].index)]
out_path = '/media/yhs/5596744f-db7c-442f-9235-d0c9d50c0a6b/Xenium_segementation/Baysor/DRG_1_1gene/DRG_1_4/DRG_1_1_gene_trial_4.csv'
df_filtered.to_csv(out_path, sep=',', index=False, header=True)

In [1]:
#ROI TO CSV 
import pandas as pd
import os
import gc
from read_roi import read_roi_file

# Specify the folder path containing the .roi files
roi_folder = '/media/yhs/5596744f-db7c-442f-9235-d0c9d50c0a6b/export/RoiSet/XETG00171__0018220_DRG-Region_1_RoiSet'  # Replace with the actual folder path

# Initialize empty lists to store the coordinates and cell IDs
final_x = []
final_y = []
final_l = []

# Get a list of all .roi files in the specified folder
dir_list = [f for f in os.listdir(roi_folder) if f.endswith('.roi')]

# Loop through each ROI file in the folder
for roi_file in dir_list:
    try:
        # Construct the full path to the ROI file
        roi_path = os.path.join(roi_folder, roi_file)
        
        # Read the ROI file
        roi = read_roi_file(roi_path)
        r2 = roi.keys()
        name = roi[list(r2)[0]]['name']
        x_cord = roi[list(r2)[0]]['x']
        y_cord = roi[list(r2)[0]]['y']
        
        # Append data to the lists
        l = [name] * len(x_cord)  # Repeat the name for each point
        final_x += x_cord
        final_y += y_cord
        final_l += l
    except Exception as e:
        print(f"Error processing {roi_file}: {e}")

# Create a DataFrame from the collected data
data = {'cell_id': final_l, 'x': final_x, 'y': final_y}
df = pd.DataFrame.from_dict(data)

# Specify the output CSV file path
output_csv = '/media/yhs/5596744f-db7c-442f-9235-d0c9d50c0a6b/Xenium_segementation/ManuscriptProject/Manual_Cell_ids_csvs/cellids11.csv'  # Replace with the desired output path

# Save the DataFrame to a CSV file
df.to_csv(output_csv, index=False)

# Clean up memory
gc.collect()

print(f"CSV file saved to {output_csv}")


Error processing 09253-05421.roi: 'x'
Error processing 04456-12122.roi: 'x'
Error processing 13572-04841.roi: 'x'
Error processing 07510-16932.roi: 'x'
Error processing 07418-10856.roi: 'x'
Error processing 16630-10213.roi: 'x'
Error processing 06851-07694.roi: 'x'
Error processing 14716-12918.roi: 'x'
Error processing 11435-08107.roi: 'x'
Error processing 04649-15772.roi: 'x'
CSV file saved to /media/yhs/5596744f-db7c-442f-9235-d0c9d50c0a6b/Xenium_segementation/ManuscriptProject/Manual_Cell_ids_csvs/cellids11.csv
