In [1]:
from PIL import Image, ImageDraw
import pandas as pd
from scipy.special import expit
import pdb

### Image and hex codes setup

In [46]:
# define image size
width = 200
height = 200

In [3]:
colormap = {
    # X11 colour table from https://drafts.csswg.org/css-color-4/, with
    # gray/grey spelling issues fixed.  This is a superset of HTML 4.0
    # colour names used in CSS 1.
    "aliceblue": "#f0f8ff",
    "antiquewhite": "#faebd7",
    "aqua": "#00ffff",
    "aquamarine": "#7fffd4",
    "azure": "#f0ffff",
    "beige": "#f5f5dc",
    "bisque": "#ffe4c4",
    "black": "#000000",
    "blanchedalmond": "#ffebcd",
    "blue": "#0000ff",
    "blueviolet": "#8a2be2",
    "brown": "#a52a2a",
    "burlywood": "#deb887",
    "cadetblue": "#5f9ea0",
    "chartreuse": "#7fff00",
    "chocolate": "#d2691e",
    "coral": "#ff7f50",
    "cornflowerblue": "#6495ed",
    "cornsilk": "#fff8dc",
    "crimson": "#dc143c",
    "darkblue": "#00008b",
    "darkcyan": "#008b8b",
    "darkgoldenrod": "#b8860b",
    "darkgray": "#a9a9a9",
    "darkgreen": "#006400",
    "darkkhaki": "#bdb76b",
    "darkmagenta": "#8b008b",
    "darkolivegreen": "#556b2f",
    "darkorange": "#ff8c00",
    "darkorchid": "#9932cc",
    "darkred": "#8b0000",
    "darksalmon": "#e9967a",
    "darkseagreen": "#8fbc8f",
    "darkslateblue": "#483d8b",
    "darkslategray": "#2f4f4f",
    "darkturquoise": "#00ced1",
    "darkviolet": "#9400d3",
    "deeppink": "#ff1493",
    "deepskyblue": "#00bfff",
    "dimgray": "#696969",
    "dodgerblue": "#1e90ff",
    "firebrick": "#b22222",
    "floralwhite": "#fffaf0",
    "forestgreen": "#228b22",
    "gainsboro": "#dcdcdc",
    "ghostwhite": "#f8f8ff",
    "gold": "#ffd700",
    "goldenrod": "#daa520",
    "gray": "#808080",
    "green": "#008000",
    "greenyellow": "#adff2f",
    "honeydew": "#f0fff0",
    "hotpink": "#ff69b4",
    "indianred": "#cd5c5c",
    "indigo": "#4b0082",
    "ivory": "#fffff0",
    "khaki": "#f0e68c",
    "lavender": "#e6e6fa",
    "lavenderblush": "#fff0f5",
    "lawngreen": "#7cfc00",
    "lemonchiffon": "#fffacd",
    "lightblue": "#add8e6",
    "lightcoral": "#f08080",
    "lightcyan": "#e0ffff",
    "lightgoldenrodyellow": "#fafad2",
    "lightgreen": "#90ee90",
    "lightgray": "#d3d3d3",
    "lightpink": "#ffb6c1",
    "lightsalmon": "#ffa07a",
    "lightseagreen": "#20b2aa",
    "lightskyblue": "#87cefa",
    "lightslategray": "#778899",
    "lightsteelblue": "#b0c4de",
    "lightyellow": "#ffffe0",
    "lime": "#00ff00",
    "limegreen": "#32cd32",
    "linen": "#faf0e6",
    "magenta": "#ff00ff",
    "maroon": "#800000",
    "mediumaquamarine": "#66cdaa",
    "mediumblue": "#0000cd",
    "mediumorchid": "#ba55d3",
    "mediumpurple": "#9370db",
    "mediumseagreen": "#3cb371",
    "mediumslateblue": "#7b68ee",
    "mediumspringgreen": "#00fa9a",
    "mediumturquoise": "#48d1cc",
    "mediumvioletred": "#c71585",
    "midnightblue": "#191970",
    "mintcream": "#f5fffa",
    "mistyrose": "#ffe4e1",
    "moccasin": "#ffe4b5",
    "navajowhite": "#ffdead",
    "navy": "#000080",
    "oldlace": "#fdf5e6",
    "olive": "#808000",
    "olivedrab": "#6b8e23",
    "orange": "#ffa500",
    "orangered": "#ff4500",
    "orchid": "#da70d6",
    "palegoldenrod": "#eee8aa",
    "palegreen": "#98fb98",
    "paleturquoise": "#afeeee",
    "palevioletred": "#db7093",
    "papayawhip": "#ffefd5",
    "peachpuff": "#ffdab9",
    "peru": "#cd853f",
    "pink": "#ffc0cb",
    "plum": "#dda0dd",
    "powderblue": "#b0e0e6",
    "purple": "#800080",
    "rebeccapurple": "#663399",
    "red": "#ff0000",
    "rosybrown": "#bc8f8f",
    "royalblue": "#4169e1",
    "saddlebrown": "#8b4513",
    "salmon": "#fa8072",
    "sandybrown": "#f4a460",
    "seagreen": "#2e8b57",
    "seashell": "#fff5ee",
    "sienna": "#a0522d",
    "silver": "#c0c0c0",
    "skyblue": "#87ceeb",
    "slateblue": "#6a5acd",
    "slategray": "#708090",
    "snow": "#fffafa",
    "springgreen": "#00ff7f",
    "steelblue": "#4682b4",
    "tan": "#d2b48c",
    "teal": "#008080",
    "thistle": "#d8bfd8",
    "tomato": "#ff6347",
    "turquoise": "#40e0d0",
    "violet": "#ee82ee",
    "wheat": "#f5deb3",
    "whitesmoke": "#f5f5f5",
    "yellow": "#ffff00",
    "yellowgreen": "#9acd32",
}

# extract color hex code and put them in a list
colormap_values = list(colormap.values())

### Dataset set up

In [4]:
# import main df
df = pd.read_csv('kr_train_labeled_0622.csv', index_col='Gene')

In [5]:
# residual df
residual_df = pd.read_csv('kr_train_resid_0615.csv', index_col='Gene')

In [6]:
# remove unclassified genes
df2 = df.loc[df['Localization'] != 'Unclassified', :]

In [7]:
df2

Unnamed: 0_level_0,CPT000814_prot,CPT001846_prot,X01BR001_prot,X01BR008_prot,X01BR009_prot,X01BR018_prot,X01BR020_prot,X01BR026_prot,X01BR027_prot,X01BR031_prot,...,X16BR012_mrna,X18BR003_mrna,X18BR006_mrna,X21BR001_mrna,X06BR006_mrna,X09BR004_mrna,X11BR058_mrna,X11BR059_mrna,X18BR009_mrna,Localization
Gene,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A1BG,-0.987411,0.510677,0.963885,-0.884379,0.408660,0.917514,-0.619700,1.194873,-0.684982,-0.329661,...,0.218828,0.144107,0.764447,0.220329,-0.714911,0.663401,0.910886,-0.389751,-0.077204,Cytosol
A2ML1,1.661429,-0.544436,-0.041102,1.280762,1.663777,1.202683,-0.110906,2.219136,2.370290,0.652244,...,-0.620095,-0.869702,-0.552137,-1.064193,0.569962,-0.232299,0.578424,0.192208,0.950777,Secretory
AAAS,2.207973,1.134415,0.397335,0.667814,-0.678440,-0.057840,-0.031918,-0.626766,1.597606,1.044369,...,0.821988,1.053310,0.108760,1.795527,-0.529808,1.399996,0.475302,0.110150,-0.811366,Secretory
AACS,-0.245457,-0.604077,-0.235807,-1.317636,-0.629167,-1.635303,-0.258907,-1.174333,-0.816438,-0.356915,...,0.164188,-0.910929,-0.391370,0.622088,0.683623,-0.360852,2.476981,0.569896,-1.812279,Cytosol
AAGAB,-0.583678,-0.826627,-1.386869,-1.335151,-2.354911,-0.718082,-0.080367,-0.536340,-0.133962,-1.627420,...,-0.683927,0.689205,-0.241635,0.163983,-2.243191,-2.240611,-1.676689,1.603091,-0.974182,Cytosol
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZWINT,1.371329,0.083257,-0.171917,0.860499,-0.081454,0.925838,1.335048,-1.007033,0.628282,-0.120866,...,1.527251,0.207676,0.630735,0.411982,-0.698508,-0.812772,-0.435766,1.195212,0.179110,Cytosol
ZYG11B,1.409412,0.078361,0.743565,-0.461876,0.753376,0.562789,-0.954989,1.885959,-0.128470,1.061210,...,-0.601207,-0.525623,0.243973,-1.139624,0.258679,-0.750952,0.130281,-0.019938,0.156530,Cytosol
ZYX,-1.176195,2.096937,0.340245,-0.645866,-0.185159,-0.899925,0.091207,1.277302,-0.592755,0.402433,...,0.443438,-0.546257,0.069830,0.649147,-0.131948,0.394178,1.710765,0.612839,0.356367,Cytosol
ZZEF1,-2.011132,-0.237296,-2.132052,-0.001250,-0.153514,-0.034971,-1.830495,0.957046,-1.300320,-0.294340,...,0.802856,0.602635,0.299360,2.183494,-0.308316,0.393773,0.362344,-0.108846,0.649340,Cytosol


In [8]:
# remove localization column
df3 = df2.loc[:, df.columns != 'Localization']

In [9]:
# sigmoid transform for mRNA and protein respectively
#df3_1 = df3.iloc[:, 0:122].apply(lambda row: expit(row), axis=1)
#df3_2 = df3.iloc[:, 122:244].apply(lambda row: expit(row), axis=1)


# merge back together
#df4 = pd.merge(df3_1, df3_2, on='Gene')

# NEW 09052023 DO MIN-MAX TRANSFORMATION
df4 = df3.apply(lambda x: (x+3)/6, axis=1)

In [10]:
# order the column name
df4 = df4.sort_index(axis=1)

In [11]:
df4

Unnamed: 0_level_0,CPT000814_mrna,CPT000814_prot,CPT001846_mrna,CPT001846_prot,X01BR001_mrna,X01BR001_prot,X01BR008_mrna,X01BR008_prot,X01BR009_mrna,X01BR009_prot,...,X21BR001_mrna,X21BR001_prot,X21BR002_mrna,X21BR002_prot,X21BR010_mrna,X21BR010_prot,X22BR005_mrna,X22BR005_prot,X22BR006_mrna,X22BR006_prot
Gene,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A1BG,0.516110,0.335432,0.660523,0.585113,0.437961,0.660648,0.401585,0.352603,0.591966,0.568110,...,0.536722,0.336663,0.105178,0.250623,0.328215,0.556360,0.254470,0.476237,0.417166,0.477456
A2ML1,0.801228,0.776905,0.321032,0.409261,0.518678,0.493150,0.780764,0.713460,0.746632,0.777296,...,0.322635,0.420303,0.208515,0.415760,0.436650,0.379395,0.526635,0.235165,0.374541,0.298600
AAAS,0.653512,0.867996,0.524944,0.689069,0.204567,0.566222,0.524282,0.611302,0.631703,0.386927,...,0.799255,0.821097,0.807892,0.650783,0.246431,0.423820,0.523686,0.551272,0.673864,0.657690
AACS,0.590851,0.459090,0.333895,0.399321,0.464314,0.460699,0.419450,0.280394,0.602270,0.395139,...,0.603681,0.402879,0.655376,0.252670,0.043369,0.363455,0.456674,0.466891,0.510984,0.495881
AAGAB,0.312082,0.402720,0.658175,0.362229,0.023394,0.268855,0.385367,0.277475,0.317733,0.107515,...,0.527331,0.409845,0.851095,0.516635,0.704800,0.421385,0.612748,0.769889,0.493637,0.852124
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZWINT,0.647084,0.728555,0.424585,0.513876,0.119989,0.471347,0.761855,0.643416,0.355559,0.486424,...,0.568664,0.467949,0.464591,0.349232,0.094374,0.416122,0.675451,0.710655,0.105380,0.236561
ZYG11B,0.238458,0.734902,0.530305,0.513060,0.392551,0.623928,0.058037,0.423021,0.322818,0.625563,...,0.310063,0.590903,0.381825,0.621595,0.712703,0.507431,0.424466,0.203243,0.836515,0.454249
ZYX,0.683526,0.303968,0.898832,0.849489,0.381976,0.556708,0.590981,0.392356,0.504681,0.469140,...,0.608191,0.321800,0.727716,0.497417,0.307162,0.535431,0.329190,0.498608,0.163740,0.840718
ZZEF1,0.576064,0.164811,0.414972,0.460451,0.431522,0.144658,0.644599,0.499792,0.750720,0.474414,...,0.863916,0.866314,0.782128,0.605138,0.295704,0.514077,0.345978,0.437178,-0.644777,0.884957


In [12]:
# common indices (6173 genes in common between df3 and residual_df)
common_indices = df4.index.intersection(residual_df.index)

In [13]:
common_indices

Index(['A1BG', 'A2ML1', 'AAAS', 'AACS', 'AAGAB', 'AAK1', 'AAMDC', 'AAMP',
       'AAR2', 'AARS',
       ...
       'ZRSR2', 'ZSCAN18', 'ZSWIM8', 'ZW10', 'ZWILCH', 'ZWINT', 'ZYG11B',
       'ZYX', 'ZZEF1', 'ZZZ3'],
      dtype='object', name='Gene', length=6173)

### 1: Create image with patients as all white pixels

In [14]:
def CreateImage(DF):
    
    df = DF
    
    # remove NA
    df.dropna(axis=0, inplace=True)

    # extract gene list for file name later
    gene_list = df.index
        
    # Extract mRNA and prot abundance for each gene and put them in a nested list
    mRNA_abund = []
    for _, row in df.iterrows():
        columns = row.index[::2].values  # Sample every other column
        result = list(row[columns])
        mRNA_abund.append(result)

    # extract prot for each gene
    prot_abund = []
    for _, row in df.iterrows():
        columns = row.index[1::2].values  # Skip the first column using [1::2]
        result = list(row[columns])
        prot_abund.append(result)
    
    for i in range(len(gene_list)):
        
        # Create a blank image with white background
        image = Image.new("RGB", (width, height), "black")

        # Create a draw object
        draw = ImageDraw.Draw(image)
    
        # Set specific coordinates (between 0 and 1), iterate through each gene
        x_coordinates = mRNA_abund[i]
        y_coordinates = prot_abund[i]

        # Scale the coordinates to fit the image size
        x_scaled = [float(x * width) for x in x_coordinates]
        y_scaled = [float(y * height) for y in y_coordinates]

        # Set the size of the points
        point_size = 1e-50

        for x, y in zip(x_scaled, y_scaled):
            x1 = x - point_size
            y1 = y - point_size
            x2 = x + point_size
            y2 = y + point_size
            draw.rectangle((x1, y1, x2, y2), fill="white")
        
        # since upper left is (0,0), we need to rotate counter clockwise for 90 degree
        image = image.rotate(90)
        
        # Save the image
        image.save(f"/Users/zhuoheng/DL/images_white_pixels_0905/{gene_list[i]}.png")


### 2: Create image with patients as all different colored pixels

In [51]:
def CreateImage_PatientColored(DF):
    
    df = DF
    
    # remove NA
    df.dropna(axis=0, inplace=True)
    
    # extract gene list
    gene_list = df.index
    
    # Extract mRNA and prot abundance for each gene and put them in a nested list
    mRNA_abund = []
    for _, row in df.iterrows():
        columns = row.index[::2].values  # Skip the first column using [1::2]
        result = list(row[columns])
        mRNA_abund.append(result)

    # extract prot for each gene
    prot_abund = []
    for _, row in df.iterrows():
        columns = row.index[1::2].values  # Skip the first column using [1::2]
        result = list(row[columns])
        prot_abund.append(result)

    for i in range(len(gene_list)):
        
        # Create a blank image with white background
        image = Image.new("RGB", (width, height), "black")

        # Create a draw object
        draw = ImageDraw.Draw(image)
    
        # Set specific coordinates (between 0 and 1), iterate through each gene
        x_coordinates = mRNA_abund[i]
        y_coordinates = prot_abund[i]

        # Scale the coordinates to fit the image size
        x_scaled = [float(x*width) for x in x_coordinates]
        y_scaled = [float(y*height) for y in y_coordinates]
        
        # Set the size of the points
        point_size = 10
        
        for x, y, color in zip(x_scaled, y_scaled, colormap_values[0:122]):
            x1 = x - point_size
            y1 = y - point_size
            x2 = x + point_size
            y2 = y + point_size
            draw.point((x1, y1, x2, y2), fill=color)
            
            #image.show()
        
        # since upper left is (0,0), we need to rotate counter clockwise for 90 degree
        image = image.rotate(90)

        
        # Save the image
        image.save(f"test/{gene_list[i]}.png")


In [52]:
CreateImage_PatientColored(df4)

### 3: Create image with patients using residuals as gradient pixels

In [None]:
def CreateImage_ResidualGradient(DF, DF2):
    
    # sigmoid transform
    df = DF.apply(lambda row: expit(row), axis=1)
    
    # remove NA
    df.dropna(axis=0, inplace=True)
    
    # extract gene list
    gene_list = df.index
    
    # Extract mRNA and prot abundance for each gene and put them in a nested list
    mRNA_abund = []
    for _, row in df.iterrows():
        columns = row.index[::2].values  # Skip the first column using [1::2]
        result = list(row[columns])
        mRNA_abund.append(result)

    # extract prot for each gene
    prot_abund = []
    for _, row in df.iterrows():
        columns = row.index[1::2].values  # Skip the first column using [1::2]
        result = list(row[columns])
        prot_abund.append(result)
    
    residual = []
    for _, row in DF2.iterrows():
        residual.append(list(row.values))
        
    # Define the colors for the gradient (start and end colors)
    start_color = (255, 255, 255)  # White
    end_color = (0, 0, 0)    # Black
    
    
    for i in range(len(gene_list)):
        
        # Create a blank image with white background
        image = Image.new("RGB", (width, height), "black")

        # Create a draw object
        draw = ImageDraw.Draw(image)
    
        # Set specific coordinates (between 0 and 1), iterate through each gene
        x_coordinates = mRNA_abund[i]
        y_coordinates = prot_abund[i]

        # Scale the coordinates to fit the image size
        x_scaled = [int(x * width) for x in x_coordinates]
        y_scaled = [int(y * height) for y in y_coordinates]

        # Set the size of the points
        point_size = 3
        
        # get residual for each gene
        residual_gradient = residual[i]
        
        for x, y, value in zip(x_scaled, y_scaled, residual_gradient):
            x1 = x - point_size // 2
            y1 = y - point_size // 2
            x2 = x + point_size // 2
            y2 = y + point_size // 2
            
            t = (value - min(residual_gradient)) / (max(residual_gradient) - min(residual_gradient))
            color = tuple(int(start + t * (end - start)) for start, end in zip(start_color, end_color))
            
            draw.rectangle([(x1, y1), (x2, y2)], fill=color)
            
        # since upper left is (0,0), we need to rotate counter clockwise for 90 degree
        image = image.rotate(90)
        
        # Save the image
        image.save(f"/Users/zhuoheng/DL/images_residuals/{gene_list[i]}.png")
