In [1]:
import pandas as pd
import numpy as np

In [2]:
def generate_deltaprot_designs_data():

    no_disulfide_df =pd.read_csv("/home/tadas/code/single_chain_dp_bristol/selected_deltaprots/no_disulfide/no_disulfide_selected_deltaprots.csv")
    variable_linkers_df = pd.read_csv("/home/tadas/code/single_chain_dp_bristol/selected_deltaprots/variable_linkers/variable_linkers/variable_linkers_selected_deltaprots.csv")

    no_disulfide_df["name"]="no_disulfide_"+no_disulfide_df["orientation_code"]
    variable_linkers_df["name"]="variable_linkers_"+variable_linkers_df["orientation_code"]

    # merge the two piplene dataframes 
    df = pd.concat([no_disulfide_df, variable_linkers_df], axis=0)

    well_df = pd.read_csv("/home/tadas/code/single_chain_dp_bristol/order_optimised_codons_96_wp.csv")

    # merge well_df with df by Name and sort by Well Position
    df = pd.merge(well_df, df, how="left", left_on="Name", right_on="name")
    df.to_csv("/home/tadas/code/single_chain_dp_bristol/deltaprot_designs_data.csv", index=False)

def load_deltaprot_designs_data():
    return pd.read_csv("/home/tadas/code/single_chain_dp_bristol/deltaprot_designs_data.csv")

In [None]:
generate_deltaprot_designs_data()
df = load_deltaprot_designs_data()


In [None]:
df 
# populate received_from_idt column. Received everything except for C2,C4,D5,D6,F2,F3,F6
not_received = ['C2', 'C4', 'D5', 'D6', 'F2', 'F3', 'F6']
df['received_from_idt'] = ~df["Well Position"].isin(not_received)

# populate transformation_attempted for (A1-A12,B1-B12,C1,C3,F4,C5,C6)
attempted_ids = set([f"A{i}" for i in range(1, 13)] +
                    [f"B{i}" for i in range(1, 13)] +
                    ['C1', 'C3', 'F4', 'C5', 'C6'])
df['transformation_attempted'] = df["Well Position"].isin(attempted_ids)
df['transformation_successful'] = df["Well Position"].apply(
    lambda x: (False if x == 'C6' else True) if x in attempted_ids else np.nan
)

# populate tranformation_successful for transformation_attempted only (otherwise none), but false for C6
df['transformation_successful'] = df["Well Position"].apply(
    lambda x: (False if x == 'C6' else True) if x in attempted_ids else np.nan
)


# populate expression_levels: (None, "low","medium","high") low: A1,A3,A6,A7,A8,A10,B1. Medium: A5,A11,B2,B3. High: A9,A12
expr_map = {
    **dict.fromkeys(['A1', 'A3', 'A6', 'A7', 'A8', 'A10', 'B1'], 'low'),
    **dict.fromkeys(['A5', 'A11', 'B2', 'B3'], 'medium'),
    **dict.fromkeys(['A9', 'A12'], 'high')
}
df['expression_levels'] = df["Well Position"].map(expr_map)




In [None]:
# save df to csv
df.to_csv("/home/tadas/code/single_chain_dp_bristol/experimental_results/deltaprot_designs_data_with_results.csv", index=False)

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Assuming 'df' is your DataFrame and it includes a categorical 'expression_levels'
# Identify all numeric columns in the dataframe
numeric_columns = df.select_dtypes(include=[np.number]).columns.tolist()

# 1. Box Plots for each numeric feature
for col in numeric_columns:
    print(col)
    plt.figure(figsize=(4,3))
    sns.boxplot(x='expression_levels', y=col, data=df)
    sns.swarmplot(x='expression_levels', y=col, data=df)
    plt.title(f'Box Plot of {col} by Expression Levels')
    plt.show()

# # 2. Violin Plots for each numeric feature
# for col in numeric_columns:
#     plt.figure(figsize=(6,4))
#     sns.violinplot(x='expression_levels', y=col, data=df)
#     plt.title(f'Violin Plot of {col} by Expression Levels')
#     plt.show()

# # 3. Scatter Plot Matrix (Pairplot)
# sns.pairplot(df, hue='expression_levels', vars=numeric_columns)
# plt.show()

# # 4. Faceted Histograms using seaborn's FacetGrid for one numeric feature example:
# g = sns.FacetGrid(df, col="expression_levels", col_wrap=4, height=3)
# g.map(plt.hist, numeric_columns[0], bins=20)
# plt.show()
