In [1]:
import pandas as pd
from sklearn.feature_selection import VarianceThreshold

def remove_low_variance_features(input_csv, output_csv, threshold=0.0):
    # Read the CSV file into a DataFrame
    df = pd.read_csv(input_csv)
    
    # Separate the sample names and features
    sample_names = df.iloc[:, 0]
    features = df.iloc[:, 1:]
    
    # Remove binary traits
    non_binary_features = features.loc[:, features.nunique() > 2]
    
    # Apply VarianceThreshold to remove low variance features
    selector = VarianceThreshold(threshold=threshold)
    features_reduced = selector.fit_transform(non_binary_features)
    
    # Get the column names of the remaining features
    remaining_columns = non_binary_features.columns[selector.get_support()]
    
    # Create a new DataFrame with the sample names and the remaining features
    df_reduced = pd.DataFrame(features_reduced, columns=remaining_columns)
    df_reduced.insert(0, sample_names.name, sample_names)
    
    # Save the new DataFrame to a CSV file
    df_reduced.to_csv(output_csv, index=False)


list