In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

def load_data(file_path):
    data = pd.read_csv(file_path)
    return data

def preprocess_data(data, metadata):
    data = data.merge(metadata, on='cell_id')
    data = data.dropna()  # Handle missing values as needed
    return data

def feature_extraction(data):
    scaler = StandardScaler()
    scaled_data = scaler.fit_transform(data.iloc[:, :-1])  # Assuming the last column is the label
    pca = PCA(n_components=10)
    features = pca.fit_transform(scaled_data)
    return features

def prepare_data_for_ml(features, labels):
    X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)
    return X_train, X_test, y_train, y_test

def train_rf_classifier(X_train, y_train):
    clf = RandomForestClassifier(n_estimators=100, random_state=42)
    clf.fit(X_train, y_train)
    return clf

def evaluate_classifier(clf, X_test, y_test):
    y_pred = clf.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    cm = confusion_matrix(y_test, y_pred)
    return accuracy, cm

def build_deep_learning_model(input_dim):
    model = Sequential()
    model.add(Dense(64, activation='relu', input_dim=input_dim))
    model.add(Dropout(0.2))
    model.add(Dense(32, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

def train_deep_learning_model(model, X_train, y_train, epochs=100, batch_size=32):
    model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size)
    return model

if __name__ == "__main__":
    data_file_path = "your_data_file.csv"
    metadata_file_path = "your_metadata_file.csv"

    # Load and preprocess data
    data = load_data(data_file_path)
    metadata = load_data(metadata_file_path)
    preprocessed_data = preprocess_data(data, metadata)

    # Feature extraction
    features = feature_extraction(preprocessed_data)
    labels = preprocessed_data['label']  # Assuming a column named 'label' contains the class labels

    # Prepare data for machine learning
    X_train, X_test, y_train, y_test = prepare_data_for_ml(features, labels)

    # Train and evaluate a Random Forest classifier
    rf_clf = train_rf_classifier(X_train, y_train)
    rf_accuracy, rf_cm = evaluate_classifier(rf_clf, X_test, y_test)
    print(f"Random Forest Accuracy: {rf_accuracy}")
    print(f"Random Forest Confusion Matrix: {rf_cm}")


In [None]:
import os
import cv2
import numpy as np
import pandas as pd
from skimage import measure
from skimage.segmentation import clear_border
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.models import Sequential
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator

def load_image_data(image_folder, image_files):
    images = []
    for image_file in image_files:
        image_path = os.path.join(image_folder, image_file)
        image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
        images.append(image)
    return images

def get_morphological_features(images):
    features = []
    for image in images:
        # Thresholding
        _, thresh = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

        # Clear border artifacts
        cleared = clear_border(thresh)

        # Label connected regions
        labels = measure.label(cleared)

        # Extract region properties
        props = measure.regionprops(labels)

        # Calculate morphological features
        areas = [prop.area for prop in props]
        perimeters = [prop.perimeter for prop in props]
        eccentricities = [prop.eccentricity for prop in props]

        features.append([np.mean(areas), np.mean(perimeters), np.mean(eccentricities)])

    return np.array(features)

def build_cnn_model(input_shape):
    model = Sequential()
    model.add(Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
    model.add(MaxPooling2D((2, 2)))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2, 2)))
    model.add(Flatten())
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

def preprocess_images(images, img_size):
    preprocessed = [cv2.resize(img, img_size) for img in images]
    preprocessed = np.array(preprocessed).reshape(-1, img_size[0], img_size[1], 1)
    preprocessed = preprocessed / 255.0
    return preprocessed

if __name__ == "__main__":
    image_folder = "your_image_folder"
    image_files = os.listdir(image_folder)

    images = load_image_data(image_folder, image_files)

    # Calculate morphological features
    morph_features = get_morphological_features(images)
    print("Morphological features: ", morph_features)

    # CNN feature engineering
    img_size = (64, 64)
    preprocessed_images = preprocess_images(images, img_size)
    labels = np.array([0, 1])  # Adjust according to your dataset

    X_train, X_test, y_train, y_test = train_test_split(preprocessed_images, labels, test_size=0.2, random_state=42)

    cnn_model = build_cnn_model((img_size[0], img_size[1], 1))

In [None]:
import cv2
import numpy as np
from skimage import measure
from skimage.segmentation import clear_border
import matplotlib.pyplot as plt

def preprocess_image(image, threshold_value):
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, thresh = cv2.threshold(gray_image, threshold_value, 255, cv2.THRESH_BINARY)
    return thresh

def split_cells(thresh_image, min_size=100, max_size=5000):
    cleared = clear_border(thresh_image)
    labels = measure.label(cleared)

    individual_cells = []

    for region in measure.regionprops(labels):
        if min_size <= region.area <= max_size:
            cell = thresh_image[region.bbox[0]:region.bbox[2], region.bbox[1]:region.bbox[3]]
            individual_cells.append(cell)
    
    return individual_cells

if __name__ == "__main__":
    image_path = "your_microscopy_image_path.jpg"
    image = cv2.imread(image_path)
    threshold_value = 128  # Adjust based on fluorescence intensity
    min_size = 100  # Minimum cell size (adjust as needed)
    max_size = 5000  # Maximum cell size (adjust as needed)

    thresh_image = preprocess_image(image, threshold_value)
    individual_cells = split_cells(thresh_image, min_size, max_size)

    # Display individual cells
    for i, cell in enumerate(individual_cells):
        plt.subplot(1, len(individual_cells), i+1)
        plt.imshow(cell, cmap='gray')
        plt.axis('off')
    plt.show()

In [None]:
import cv2
import numpy as np
from skimage import measure
from skimage.morphology import convex_hull_image

def sholl_feature(cell):
    cell_center = np.array(cell.shape) // 2
    max_radius = int(np.sqrt(cell_center[0]**2 + cell_center[1]**2))

    sholl_counts = []
    for r in range(1, max_radius):
        circle_img = np.zeros_like(cell)
        cv2.circle(circle_img, tuple(cell_center), r, 255, 1)
        intersections = cv2.bitwise_and(circle_img, cell)
        intersection_count = cv2.countNonZero(intersections)
        sholl_counts.append(intersection_count)

    return sholl_counts

def fmax_feature(cell):
    contours, _ = cv2.findContours(cell, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    largest_contour = max(contours, key=cv2.contourArea)

    _, (w, h), _ = cv2.minAreaRect(largest_contour)
    fmax = max(w, h)

    return fmax

def solidity_feature(cell):
    convex_hull = convex_hull_image(cell)
    area = np.sum(cell > 0)
    convex_area = np.sum(convex_hull > 0)
    solidity = area / convex_area

    return solidity

def feret_diameter_ratio_feature(cell):
    contours, _ = cv2.findContours(cell, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    largest_contour = max(contours, key=cv2.contourArea)

    _, (w, h), _ = cv2.minAreaRect(largest_contour)
    fmax = max(w, h)
    fmin = min(w, h)
    feret_diameter_ratio = fmax / fmin

    return feret_diameter_ratio

if __name__ == "__main__":
    cell_image_path = "your_cell_image_path.jpg"
    cell = cv2.imread(cell_image_path, cv2.IMREAD_GRAYSCALE)
    _, cell = cv2.threshold(cell, 128, 255, cv2.THRESH_BINARY)

    sholl_counts = sholl_feature(cell)
    print("Sholl counts:", sholl_counts)

    fmax = fmax_feature(cell)
    print("Fmax:", fmax)

    solidity = solidity_feature(cell)
    print("Solidity:", solidity)

    feret_diameter_ratio = feret_diameter_ratio_feature(cell)
    print("Feret's Diameter Ratio:", feret_diameter_ratio)


In [None]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score

# Include the feature calculation functions (sholl_feature, fmax_feature, solidity_feature, feret_diameter_ratio_feature) here

def extract_features(cell):
    sholl_counts = sholl_feature(cell)
    fmax = fmax_feature(cell)
    solidity = solidity_feature(cell)
    feret_diameter_ratio = feret_diameter_ratio_feature(cell)

    # You can modify the features array based on your needs
    features = [fmax, solidity, feret_diameter_ratio] + sholl_counts

    return features

def load_cells_and_labels(cells_folder):
    cell_files = os.listdir(cells_folder)
    cells = []
    labels = []

    for cell_file in cell_files:
        cell_path = os.path.join(cells_folder, cell_file)
        cell = cv2.imread(cell_path, cv2.IMREAD_GRAYSCALE)
        _, cell = cv2.threshold(cell, 128, 255, cv2.THRESH_BINARY)

        cells.append(cell)
        label = 0 if 'negative' in cell_file else 1  # Adjust based on your file naming convention
        labels.append(label)

    return cells, labels

if __name__ == "__main__":
    cells_folder = "your_cells_folder"
    cells, labels = load_cells_and_labels(cells_folder)

    feature_matrix = np.array([extract_features(cell) for cell in cells])

    X_train, X_test, y_train, y_test = train_test_split(feature_matrix, labels, test_size=0.2, random_state=42)

    # Optimize SVM model using GridSearchCV
    param_grid = {'C': [0.1, 1, 10, 100], 'gamma': [1, 0.1, 0.01, 0.001], 'kernel': ['rbf', 'linear']}
    grid = GridSearchCV(SVC(), param_grid, refit=True, verbose=2)
    grid.fit(X_train, y_train)

    print("Best parameters found by grid search:", grid.best_params_)

    predictions = grid.predict(X_test)

    print("Classification report:")
    print(classification_report(y_test, predictions))

    accuracy = accuracy_score(y_test, predictions)
    print("Accuracy:", accuracy)


In [None]:
import os
import cv2
import numpy as np
from sklearn.cluster import SpectralClustering
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, accuracy_score

# Include the feature calculation functions (split_cells, sholl_feature, fmax_feature, solidity_feature, feret_diameter_ratio_feature) here

def segment_rna_spots(image, threshold_value=128):
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, thresh = cv2.threshold(gray_image, threshold_value, 255, cv2.THRESH_BINARY)

    return thresh

def rna_spot_properties(cell, rna_spots, min_spot_size=5, max_spot_size=50):
    cell_mask = cell > 0
    rna_spots_in_cell = rna_spots.copy()
    rna_spots_in_cell[~cell_mask] = 0

    spot_labels = measure.label(rna_spots_in_cell)
    properties = measure.regionprops_table(spot_labels, properties=('label', 'area', 'mean_intensity', 'equivalent_diameter'))

    filtered_properties = {k: [v[i] for i in range(len(properties['label'])) if min_spot_size <= properties['area'][i] <= max_spot_size] for k, v in properties.items()}

    return filtered_properties

def extract_features(cells, rna_spots):
    feature_matrix = []

    for cell in cells:
        cell_properties = rna_spot_properties(cell, rna_spots)
        num_spots = len(cell_properties['label'])
        avg_intensity = np.mean(cell_properties['mean_intensity'])
        avg_diameter = np.mean(cell_properties['equivalent_diameter'])

        # Add more features as needed
        features = [num_spots, avg_intensity, avg_diameter]
        feature_matrix.append(features)

    return np.array(feature_matrix)

if __name__ == "__main__":
    image_path = "your_spatial_transcriptomics_image_path.jpg"
    image = cv2.imread(image_path)

    cells_folder = "your_cells_folder"  # Contains individual cell images
    cells, labels = load_cells_and_labels(cells_folder)

    rna_spots = segment_rna_spots(image)
    feature_matrix = extract_features(cells, rna_spots)

    # Perform Spectral Clustering for dimensionality reduction
    reduced_dim = 3
    clustering = SpectralClustering(n_clusters=reduced_dim, affinity='nearest_neighbors', random_state=42)
    reduced_features = clustering.fit_transform(feature_matrix)

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(reduced_features, labels, test_size=0.2, random_state=42)

    # Train and test XGBoost classifier
    model = XGBClassifier()
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)

    print("Classification report:")
    print(classification_report(y_test, y_pred))

    accuracy = accuracy_score(y_test, y_pred)
    print("Accuracy:", accuracy)
