In [1]:
import numpy as np
import pandas as pd

### Introduction

In [4]:
link_template = 'https://stepik.org/media/attachments/lesson/60794/{0}.zip'

In [9]:
from functools import partial, reduce
from operator import add

name = 'test-A'
df = pd.read_csv(link_template.format(name), header=None, compression='zip', skiprows=1)

toInt = partial(map, int)
redInt = partial(reduce, add)

df = df[0].str.split()
print(df.head())

out = df.apply(toInt).apply(redInt)
out.to_csv(f'Introduction/{name}_out.csv', index=False, header=False)
out

0    [1, 5]
1    [9, 9]
2    [8, 5]
3    [5, 4]
4    [0, 7]
Name: 0, dtype: object


0     6
1    18
2    13
3     9
4     7
5     8
6    10
7     8
8    12
9     9
Name: 0, dtype: int64

In [6]:
from re import finditer
from operator import methodcaller
from functools import partial

name = 'test-B'
df = pd.read_csv(link_template.format(name), header=None, compression='zip', skiprows=1)

seqs = df.loc[0::2, 0].reset_index(drop=True)
pats = df.loc[1::2, 0].reset_index(drop=True)

df = pd.concat([seqs, pats], axis=1, ignore_index=True)

out = df.apply(lambda x: [str(i.start() + 1) for i in finditer(f'(?=({x[1]}))', x[0])], axis=1).str.join(' ')

out.to_csv(f'Introduction/{name}_out.csv', index=False, header=False)
out

0    3 10 49 153 176 206 213 262 278 285 301 308 37...
1    15 34 49 56 117 193 218 254 261 268 313 371 40...
2    38 47 65 104 121 227 244 278 285 297 304 319 3...
3    30 81 96 106 129 146 192 220 329 424 490 643 7...
4    29 58 78 103 130 149 259 266 405 420 450 554 5...
5    3 88 95 110 205 255 333 351 488 512 519 578 60...
6    19 155 202 217 224 236 284 340 391 398 419 426...
7    155 162 222 304 477 484 527 555 570 585 592 61...
8    2 34 71 136 183 199 242 280 295 304 341 396 46...
9    20 38 45 74 81 96 129 185 192 264 271 287 294 ...
dtype: object

### Epigenomic Marks

In [18]:
def get_states(seq_lst: list):
    patterns = {'': 0}
    result = []
    for state in zip(*seq_lst):
        state = ''.join(state)
        if state not in patterns:
            max_state = max(patterns.values())
            patterns[state] = max_state + 1
        result.append(patterns.get(state)) 
    return max(patterns.values()), ' '.join(map(str, result))


file = '2'
fi = open(f'Epigenomic Marks/{file}.txt')
fo = open(f'Epigenomic Marks/{file}_out.txt', 'w')

tests_num = int(fi.readline())
for t in range(tests_num):
    
    seq_num, seq_len = map(int, fi.readline().strip().split())
    test_seqs = []
    for n in range(seq_num):
        test_seqs.append(fi.readline().strip())
    max_state, states = get_states(test_seqs)
    fo.write(f'{max_state}\n{states}\n')

fi.close()
fo.close()

### Metabolite Annotation

In [12]:
# Matrix for 1,2,3,5
file = '0'
fi = open(f'Metabolite Annotation/{file}.txt')
fo = open(f'Metabolite Annotation/{file}_out.txt', 'w')

tests_num = int(fi.readline())
for t in range(tests_num):
    M, K, N = map(int, fi.readline().strip().split())
    metabos = list(map(float, fi.readline().strip().split()))
    metabos = np.array(metabos).reshape(M,1)
    adducts = list(map(float, fi.readline().strip().split()))
    adducts = np.array(adducts).reshape(1,K)
    matrix = np.zeros((M, K)) + metabos + adducts
    del metabos, adducts
    for sam in map(float, fi.readline().strip().split()):
        ind = np.unravel_index(np.argmin(np.absolute(matrix - sam)), (M, K))
        fo.write(f'{ind[0] + 1} {ind[1] + 1}\n')

fi.close()
fo.close()

In [10]:
# BST for 4
file = '1'
fi = open(f'Metabolite Annotation/{file}.txt')
fo = open(f'Metabolite Annotation/{file}_out_bst.txt', 'w')

def ST(sample):
    ml, mr = 0, len(metabos) - 1
    al, ar = 0, len(adducts) - 1
    upper, lower = True, True
    while (upper or lower) and (ml < mr-1) and (al < ar-1):
        if metabos[mr-2][0] + adducts[ar][0] > sample:
            mr -= 1
        elif metabos[mr][0] + adducts[ar-2][0] > sample:
            ar -= 1
        else:
            upper = False
        if metabos[ml][0] + adducts[al+2][0] < sample:
            al += 1
        elif metabos[ml+2][0] + adducts[al][0] < sample:
            ml += 1
        else:
            lower = False
    deltas = []
    for m in metabos[ml:mr+1]:
        for a in adducts[al:ar+1]:
            deltas.append([m[0] + a[0] - sample, (m[1], a[1])])
    min_delta = min(deltas, key=lambda x: abs(x[0]))
    return tuple(map(int, min_delta[1]))

tests_num = int(fi.readline())
for t in range(tests_num):
    M, K, N = map(int, fi.readline().strip().split())
    
    metabos = np.array(fi.readline().strip().split(), dtype=float, copy=False)
    metabos = np.vstack([metabos, range(M)]).transpose()
    metabos = metabos[metabos[:,0].argsort()]
    
    adducts = np.array(fi.readline().strip().split(), dtype=float, copy=False)
    adducts = np.vstack([adducts, range(K)]).transpose()
    adducts = adducts[adducts[:,0].argsort()]
    
    samples = np.array(fi.readline().strip().split(), dtype=np.float, copy=False)
    for sam in samples:
        ind = ST(sam)
        fo.write(f'{ind[0] + 1} {ind[1] + 1}\n')

fi.close()
fo.close()

In [4]:
metabos.shape

(997478, 2)

In [11]:
del ml, mr, al, ar

### Diagnosis

In [10]:
file = '0'
fi = open(f'Diagnosis/test{file}')
fo = open(f'Diagnosis/test{file}_out', 'w')

vertices = int(fi.readline())
parents = list(map(int, fi.readline().strip().split()))
values = list(map(int, fi.readline().strip().split()))
diseases_num = int(fi.readline())
diseases = []
for dn in range(diseases_num):
    diseases.append(list(map(int, fi.readline().strip().split())))
    
patients_num = int(fi.readline())
patients = []
for dn in range(patients_num):
    patients.append(list(map(int, fi.readline().strip().split())))
    
fi.close()
fo.close()

In [11]:
patients

[[3, 5, 9, 8], [1, 6], [2, 7, 10], [1, 10]]

In [1]:
fi = open(f'Epigenomic Marks/4_out.txt')
fo = open(f'Epigenomic Marks/4_out2.txt', 'w')
for i, line in enumerate(fi):
    fo.write(line)
for j in range(i+1, 992+996):
    fo.write('1 1 \n')
fi.close()
fo.close()

FileNotFoundError: [Errno 2] No such file or directory: 'Epigenomic Marks/4_out.txt'