In [1]:
from PIL import Image, ImageDraw
import pandas as pd
from scipy.special import expit
import pdb

### Image and hex codes setup

In [137]:
# define image size
width = 25
height = 25

In [3]:
colormap = {
    # X11 colour table from https://drafts.csswg.org/css-color-4/, with
    # gray/grey spelling issues fixed.  This is a superset of HTML 4.0
    # colour names used in CSS 1.
    "aliceblue": "#f0f8ff",
    "antiquewhite": "#faebd7",
    "aqua": "#00ffff",
    "aquamarine": "#7fffd4",
    "azure": "#f0ffff",
    "beige": "#f5f5dc",
    "bisque": "#ffe4c4",
    "black": "#000000",
    "blanchedalmond": "#ffebcd",
    "blue": "#0000ff",
    "blueviolet": "#8a2be2",
    "brown": "#a52a2a",
    "burlywood": "#deb887",
    "cadetblue": "#5f9ea0",
    "chartreuse": "#7fff00",
    "chocolate": "#d2691e",
    "coral": "#ff7f50",
    "cornflowerblue": "#6495ed",
    "cornsilk": "#fff8dc",
    "crimson": "#dc143c",
    "darkblue": "#00008b",
    "darkcyan": "#008b8b",
    "darkgoldenrod": "#b8860b",
    "darkgray": "#a9a9a9",
    "darkgreen": "#006400",
    "darkkhaki": "#bdb76b",
    "darkmagenta": "#8b008b",
    "darkolivegreen": "#556b2f",
    "darkorange": "#ff8c00",
    "darkorchid": "#9932cc",
    "darkred": "#8b0000",
    "darksalmon": "#e9967a",
    "darkseagreen": "#8fbc8f",
    "darkslateblue": "#483d8b",
    "darkslategray": "#2f4f4f",
    "darkturquoise": "#00ced1",
    "darkviolet": "#9400d3",
    "deeppink": "#ff1493",
    "deepskyblue": "#00bfff",
    "dimgray": "#696969",
    "dodgerblue": "#1e90ff",
    "firebrick": "#b22222",
    "floralwhite": "#fffaf0",
    "forestgreen": "#228b22",
    "gainsboro": "#dcdcdc",
    "ghostwhite": "#f8f8ff",
    "gold": "#ffd700",
    "goldenrod": "#daa520",
    "gray": "#808080",
    "green": "#008000",
    "greenyellow": "#adff2f",
    "honeydew": "#f0fff0",
    "hotpink": "#ff69b4",
    "indianred": "#cd5c5c",
    "indigo": "#4b0082",
    "ivory": "#fffff0",
    "khaki": "#f0e68c",
    "lavender": "#e6e6fa",
    "lavenderblush": "#fff0f5",
    "lawngreen": "#7cfc00",
    "lemonchiffon": "#fffacd",
    "lightblue": "#add8e6",
    "lightcoral": "#f08080",
    "lightcyan": "#e0ffff",
    "lightgoldenrodyellow": "#fafad2",
    "lightgreen": "#90ee90",
    "lightgray": "#d3d3d3",
    "lightpink": "#ffb6c1",
    "lightsalmon": "#ffa07a",
    "lightseagreen": "#20b2aa",
    "lightskyblue": "#87cefa",
    "lightslategray": "#778899",
    "lightsteelblue": "#b0c4de",
    "lightyellow": "#ffffe0",
    "lime": "#00ff00",
    "limegreen": "#32cd32",
    "linen": "#faf0e6",
    "magenta": "#ff00ff",
    "maroon": "#800000",
    "mediumaquamarine": "#66cdaa",
    "mediumblue": "#0000cd",
    "mediumorchid": "#ba55d3",
    "mediumpurple": "#9370db",
    "mediumseagreen": "#3cb371",
    "mediumslateblue": "#7b68ee",
    "mediumspringgreen": "#00fa9a",
    "mediumturquoise": "#48d1cc",
    "mediumvioletred": "#c71585",
    "midnightblue": "#191970",
    "mintcream": "#f5fffa",
    "mistyrose": "#ffe4e1",
    "moccasin": "#ffe4b5",
    "navajowhite": "#ffdead",
    "navy": "#000080",
    "oldlace": "#fdf5e6",
    "olive": "#808000",
    "olivedrab": "#6b8e23",
    "orange": "#ffa500",
    "orangered": "#ff4500",
    "orchid": "#da70d6",
    "palegoldenrod": "#eee8aa",
    "palegreen": "#98fb98",
    "paleturquoise": "#afeeee",
    "palevioletred": "#db7093",
    "papayawhip": "#ffefd5",
    "peachpuff": "#ffdab9",
    "peru": "#cd853f",
    "pink": "#ffc0cb",
    "plum": "#dda0dd",
    "powderblue": "#b0e0e6",
    "purple": "#800080",
    "rebeccapurple": "#663399",
    "red": "#ff0000",
    "rosybrown": "#bc8f8f",
    "royalblue": "#4169e1",
    "saddlebrown": "#8b4513",
    "salmon": "#fa8072",
    "sandybrown": "#f4a460",
    "seagreen": "#2e8b57",
    "seashell": "#fff5ee",
    "sienna": "#a0522d",
    "silver": "#c0c0c0",
    "skyblue": "#87ceeb",
    "slateblue": "#6a5acd",
    "slategray": "#708090",
    "snow": "#fffafa",
    "springgreen": "#00ff7f",
    "steelblue": "#4682b4",
    "tan": "#d2b48c",
    "teal": "#008080",
    "thistle": "#d8bfd8",
    "tomato": "#ff6347",
    "turquoise": "#40e0d0",
    "violet": "#ee82ee",
    "wheat": "#f5deb3",
    "whitesmoke": "#f5f5f5",
    "yellow": "#ffff00",
    "yellowgreen": "#9acd32",
}

# extract color hex code and put them in a list
colormap_values = list(colormap.values())

### Dataset set up

In [39]:
# import main df
df = pd.read_csv('/Users/zhuoheng/DL/kr_train_labeled_0622.csv', index_col='Gene')

In [40]:
# residual df
residual_df = pd.read_csv('/Users/zhuoheng/DL/kr_train_resid_0615.csv', index_col='Gene')

In [41]:
# remove unclassified genes
df2 = df.loc[df['Localization'] != 'Unclassified', :]

In [55]:
# remove localization column
df3 = df2.loc[:, df.columns != 'Localization']

In [65]:
# sigmoid transform for mRNA and protein respectively
df3_1 = df3.iloc[:, 0:122].apply(lambda row: expit(row), axis=1)
df3_2 = df3.iloc[:, 122:244].apply(lambda row: expit(row), axis=1)

# merge back together
df4 = pd.merge(df3_1, df3_2, on='Gene')

In [69]:
# order the column name
df4 = df4.sort_index(axis=1)

In [70]:
df4

Unnamed: 0_level_0,CPT000814_mrna,CPT000814_prot,CPT001846_mrna,CPT001846_prot,X01BR001_mrna,X01BR001_prot,X01BR008_mrna,X01BR008_prot,X01BR009_mrna,X01BR009_prot,...,X21BR001_mrna,X21BR001_prot,X21BR002_mrna,X21BR002_prot,X21BR010_mrna,X21BR010_prot,X22BR005_mrna,X22BR005_prot,X22BR006_mrna,X22BR006_prot
Gene,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A1BG,0.524147,0.271424,0.723749,0.624965,0.408001,0.723899,0.356522,0.292271,0.634552,0.600766,...,0.554861,0.272888,0.085572,0.182983,0.262946,0.583744,0.186459,0.464415,0.378245,0.466236
A2ML1,0.859043,0.840430,0.254680,0.367156,0.527988,0.489726,0.843511,0.782579,0.814541,0.840744,...,0.256509,0.382682,0.148185,0.376263,0.406102,0.326594,0.539868,0.169523,0.320222,0.229984
AAAS,0.715260,0.900963,0.537346,0.756653,0.145220,0.598047,0.536359,0.661013,0.687878,0.336609,...,0.857604,0.872870,0.863816,0.711914,0.179253,0.387679,0.535470,0.576307,0.739464,0.720338
AACS,0.633000,0.438942,0.269605,0.353412,0.446675,0.441320,0.381473,0.211212,0.648766,0.347700,...,0.650693,0.358306,0.717532,0.184827,0.060666,0.305918,0.435375,0.450499,0.516471,0.493822
AAGAB,0.244621,0.358087,0.720924,0.304359,0.054185,0.199908,0.334523,0.208309,0.250941,0.086676,...,0.540904,0.367972,0.891540,0.524932,0.773609,0.384216,0.662955,0.834704,0.490456,0.892135
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZWINT,0.707341,0.797595,0.388768,0.520802,0.092787,0.457126,0.827945,0.702765,0.295953,0.479648,...,0.601563,0.452071,0.447086,0.288104,0.080635,0.376773,0.741295,0.779702,0.085668,0.170706
ZYG11B,0.172323,0.803673,0.545333,0.519580,0.344187,0.677775,0.065880,0.386541,0.256719,0.679914,...,0.242389,0.633072,0.329809,0.674711,0.781805,0.511145,0.388600,0.144236,0.882787,0.431801
ZYX,0.750476,0.235737,0.916291,0.890605,0.330009,0.584250,0.633181,0.343922,0.507021,0.453842,...,0.656818,0.255555,0.796781,0.496126,0.239208,0.552948,0.264082,0.497912,0.117372,0.885371
ZZEF1,0.612155,0.118039,0.375155,0.440953,0.398704,0.106020,0.704245,0.499687,0.818218,0.461697,...,0.898757,0.900059,0.844588,0.652678,0.226921,0.521103,0.284117,0.406867,0.001039,0.909681


In [100]:
# common indices (6173 genes in common between df3 and residual_df)
common_indices = df4.index.intersection(residual_df.index)

In [101]:
common_indices

Index(['A1BG', 'A2ML1', 'AAAS', 'AACS', 'AAGAB', 'AAK1', 'AAMDC', 'AAMP',
       'AAR2', 'AARS',
       ...
       'ZRSR2', 'ZSCAN18', 'ZSWIM8', 'ZW10', 'ZWILCH', 'ZWINT', 'ZYG11B',
       'ZYX', 'ZZEF1', 'ZZZ3'],
      dtype='object', name='Gene', length=6173)

### 1: Create image with patients as all white pixels

In [142]:
def CreateImage(DF):
    
    df = DF
    
    # remove NA
    df.dropna(axis=0, inplace=True)

    # extract gene list for file name later
    gene_list = df.index
        
    # Extract mRNA and prot abundance for each gene and put them in a nested list
    mRNA_abund = []
    for _, row in df.iterrows():
        columns = row.index[::2].values  # Sample every other column
        result = list(row[columns])
        mRNA_abund.append(result)

    # extract prot for each gene
    prot_abund = []
    for _, row in df.iterrows():
        columns = row.index[1::2].values  # Skip the first column using [1::2]
        result = list(row[columns])
        prot_abund.append(result)
    
    for i in range(len(gene_list)):
        
        # Create a blank image with white background
        image = Image.new("RGB", (width, height), "white")

        # Create a draw object
        draw = ImageDraw.Draw(image)
    
        # Set specific coordinates (between 0 and 1), iterate through each gene
        x_coordinates = mRNA_abund[i]
        y_coordinates = prot_abund[i]

        # Scale the coordinates to fit the image size
        x_scaled = [float(x * width) for x in x_coordinates]
        y_scaled = [float(y * height) for y in y_coordinates]

        # Set the size of the points
        point_size = 1e-50

        for x, y in zip(x_scaled, y_scaled):
            x1 = x - point_size
            y1 = y - point_size
            x2 = x + point_size
            y2 = y + point_size
            draw.rectangle([(x1, y1), (x2, y2)], fill="black")
        
        # since upper left is (0,0), we need to rotate counter clockwise for 90 degree
        image = image.rotate(90)
        
        # Save the image
        image.save(f"/Users/zhuoheng/DL/images_white_pixels/{gene_list[i]}.png")


In [143]:
CreateImage(df4)

### 2: Create image with patients as all different colored pixels

In [146]:
def CreateImage_PatientColored(DF):
    
    df = DF
    
    # remove NA
    df.dropna(axis=0, inplace=True)
    
    # extract gene list
    gene_list = df.index
    
    # Extract mRNA and prot abundance for each gene and put them in a nested list
    mRNA_abund = []
    for _, row in df.iterrows():
        columns = row.index[::2].values  # Skip the first column using [1::2]
        result = list(row[columns])
        mRNA_abund.append(result)

    # extract prot for each gene
    prot_abund = []
    for _, row in df.iterrows():
        columns = row.index[1::2].values  # Skip the first column using [1::2]
        result = list(row[columns])
        prot_abund.append(result)

    for i in range(len(gene_list)):
        
        # Create a blank image with white background
        image = Image.new("RGB", (width, height), "black")

        # Create a draw object
        draw = ImageDraw.Draw(image)
    
        # Set specific coordinates (between 0 and 1), iterate through each gene
        x_coordinates = mRNA_abund[i]
        y_coordinates = prot_abund[i]

        # Scale the coordinates to fit the image size
        x_scaled = [float(x * width) for x in x_coordinates]
        y_scaled = [float(y * height) for y in y_coordinates]

        # Set the size of the points
        point_size = 1e-50

        for x, y, color in zip(x_scaled, y_scaled, colormap_values[0:122]):
            x1 = x - point_size
            y1 = y - point_size
            x2 = x + point_size
            y2 = y + point_size
            draw.rectangle([(x1, y1), (x2, y2)], fill=color)
        
        # since upper left is (0,0), we need to rotate counter clockwise for 90 degree
        image = image.rotate(90)
        
        # Save the image
        image.save(f"/Users/zhuoheng/DL/images_patient/{gene_list[i]}.png")


In [147]:
CreateImage_PatientColored(df4)

### 3: Create image with patients using residuals as gradient pixels

In [None]:
def CreateImage_ResidualGradient(DF, DF2):
    
    # sigmoid transform
    df = DF.apply(lambda row: expit(row), axis=1)
    
    # remove NA
    df.dropna(axis=0, inplace=True)
    
    # extract gene list
    gene_list = df.index
    
    # Extract mRNA and prot abundance for each gene and put them in a nested list
    mRNA_abund = []
    for _, row in df.iterrows():
        columns = row.index[::2].values  # Skip the first column using [1::2]
        result = list(row[columns])
        mRNA_abund.append(result)

    # extract prot for each gene
    prot_abund = []
    for _, row in df.iterrows():
        columns = row.index[1::2].values  # Skip the first column using [1::2]
        result = list(row[columns])
        prot_abund.append(result)
    
    residual = []
    for _, row in DF2.iterrows():
        residual.append(list(row.values))
        
    # Define the colors for the gradient (start and end colors)
    start_color = (255, 255, 255)  # White
    end_color = (0, 0, 0)    # Black
    
    
    for i in range(len(gene_list)):
        
        # Create a blank image with white background
        image = Image.new("RGB", (width, height), "black")

        # Create a draw object
        draw = ImageDraw.Draw(image)
    
        # Set specific coordinates (between 0 and 1), iterate through each gene
        x_coordinates = mRNA_abund[i]
        y_coordinates = prot_abund[i]

        # Scale the coordinates to fit the image size
        x_scaled = [int(x * width) for x in x_coordinates]
        y_scaled = [int(y * height) for y in y_coordinates]

        # Set the size of the points
        point_size = 3
        
        # get residual for each gene
        residual_gradient = residual[i]
        
        for x, y, value in zip(x_scaled, y_scaled, residual_gradient):
            x1 = x - point_size // 2
            y1 = y - point_size // 2
            x2 = x + point_size // 2
            y2 = y + point_size // 2
            
            t = (value - min(residual_gradient)) / (max(residual_gradient) - min(residual_gradient))
            color = tuple(int(start + t * (end - start)) for start, end in zip(start_color, end_color))
            
            draw.rectangle([(x1, y1), (x2, y2)], fill=color)
            
        # since upper left is (0,0), we need to rotate counter clockwise for 90 degree
        image = image.rotate(90)
        
        # Save the image
        image.save(f"/Users/zhuoheng/DL/images_residuals/{gene_list[i]}.png")


In [None]:
CreateImage_ResidualGradient(df3, residual_df.loc[common_indices, :])