In [209]:
# Import packages
import gurobipy as gp
from gurobipy import GRB
import parse_graph_new
import networkx as nx
import path_enumeration

In [210]:
# Read file and extract all paths
with open("/home/laura/Documents/Transkript_Assembly/data/human_geuvadis/test3.graph") as f: 
    fileEndReached = False
    f.readline()
    while not fileEndReached:
        f.readline()
        Chromosome, Strand, Exons = parse_graph_new.parse_meta(f)
        Bins = parse_graph_new.parse_bins(f)
        PairedBins = parse_graph_new.parse_pairs(f)
        
        # Build graphs
        G_full = nx.DiGraph()
        fileEndReached, skip = parse_graph_new.parse_graph(f, G_full, Exons) # Full Graph
        
        if not fileEndReached and not skip:
            G_clean = nx.DiGraph()  
            fileEndReached, _ = parse_graph_new.parse_graph(f, G_clean, Exons) # Cleaned Graph
            
        # Full path enumeration of cleaned graph
        transcripts = path_enumeration.enumeration(G_clean,[],"0",["0"],"1",False)
        print("Transkripts:", len(transcripts), transcripts)

f.close()

Transkripts: 38 [['0', '2', '87', '40', '56', '42', '54', '1'], ['0', '3', '86', '4', '85', '5', '84', '6', '83', '7', '82', '8', '81', '9', '80', '11', '78', '40', '56', '42', '54', '1'], ['0', '3', '86', '4', '85', '5', '84', '6', '83', '7', '82', '8', '81', '33', '61', '34', '60', '35', '59', '36', '58', '37', '57', '40', '56', '42', '54', '1'], ['0', '3', '86', '4', '85', '5', '84', '6', '83', '7', '82', '8', '81', '33', '61', '34', '60', '36', '58', '37', '57', '40', '56', '42', '54', '1'], ['0', '3', '86', '4', '85', '5', '84', '6', '83', '7', '82', '8', '81', '33', '61', '34', '60', '48', '32', '50', '39', '1'], ['0', '3', '86', '4', '85', '5', '84', '6', '83', '7', '82', '8', '81', '33', '61', '36', '58', '37', '57', '40', '56', '42', '54', '1'], ['0', '3', '86', '4', '85', '5', '84', '6', '83', '7', '82', '8', '81', '33', '61', '48', '32', '50', '39', '1'], ['0', '3', '86', '4', '85', '23', '68', '24', '67', '26', '65', '27', '64', '30', '63', '33', '61', '34', '60', '35', '59

In [211]:
for edgeKey, edgeValue in G_clean.edges.items():
    print(edgeValue)
    print(edgeKey)

{'type': 'SpliceJunction', 'counts': {'0': 204, '1': 117, '2': 347, '3': 231, '4': 154, 'c': 1053}, 'length': 1, 'startExon': 0, 'endExon': 38}
('87', '40')
{'type': 'Exon', 'counts': {'0': 3408, '1': 1874, '2': 6693, '3': 3274, '4': 1765, 'c': 17014}, 'length': 48, 'exon': 38}
('40', '56')
{'type': 'SpliceJunction', 'counts': {'0': 2838, '1': 1620, '2': 5589, '3': 2831, '4': 1537, 'c': 14415}, 'length': 1, 'startExon': 38, 'endExon': 40}
('56', '42')
{'type': 'Exon', 'counts': {'0': 3481, '1': 1895, '2': 6833, '3': 3340, '4': 1651, 'c': 17200}, 'length': 434, 'exon': 40}
('42', '54')
{'type': 'SpliceJunction', 'counts': {'0': 10, '1': 10, '2': 5, '3': 6, '4': 6, 'c': 37}, 'length': 1, 'startExon': 1, 'endExon': 2}
('86', '4')
{'type': 'Exon', 'counts': {'0': 210, '1': 151, '2': 476, '3': 254, '4': 113, 'c': 1204}, 'length': 16, 'exon': 2}
('4', '85')
{'type': 'SpliceJunction', 'counts': {'0': 7, '1': 11, '2': 3, '3': 5, '4': 4, 'c': 30}, 'length': 1, 'startExon': 2, 'endExon': 3}
('85

In [212]:
# Create edge dictionary storing the counts for each edge
edges_dict = {}
for edgeKey, edgeValue in G_clean.edges.items():
    count = edgeValue["counts"]["c"]
    if edgeValue["type"] == "SpliceJunction" or edgeValue["type"] == "Exon":
        edges_dict[edgeKey] = count
print(edges_dict)
edges = list(edges_dict.keys())
print(len(edges))


{('87', '40'): 1053, ('40', '56'): 17014, ('56', '42'): 14415, ('42', '54'): 17200, ('86', '4'): 37, ('4', '85'): 1204, ('85', '5'): 30, ('85', '23'): 1173, ('5', '84'): 37, ('23', '68'): 15583, ('68', '24'): 14779, ('68', '27'): 145, ('24', '67'): 17544, ('27', '64'): 17378, ('84', '6'): 7, ('6', '83'): 100, ('83', '7'): 92, ('7', '82'): 355, ('82', '8'): 324, ('8', '81'): 1990, ('81', '9'): 43, ('81', '33'): 1938, ('9', '80'): 74, ('33', '61'): 11563, ('80', '11'): 53, ('11', '78'): 168, ('78', '40'): 153, ('76', '22'): 947, ('22', '69'): 9270, ('69', '23'): 8940, ('71', '19'): 313, ('19', '70'): 8582, ('70', '22'): 7608, ('67', '26'): 14671, ('26', '65'): 16826, ('65', '27'): 15843, ('64', '30'): 11180, ('30', '63'): 12793, ('63', '33'): 7598, ('58', '37'): 8265, ('37', '57'): 11388, ('57', '40'): 10829, ('61', '34'): 653, ('61', '36'): 6826, ('61', '48'): 1058, ('34', '60'): 706, ('60', '35'): 117, ('60', '36'): 403, ('60', '48'): 58, ('35', '59'): 1006, ('36', '58'): 8265, ('48', 

In [213]:
print(transcripts)

[['0', '2', '87', '40', '56', '42', '54', '1'], ['0', '3', '86', '4', '85', '5', '84', '6', '83', '7', '82', '8', '81', '9', '80', '11', '78', '40', '56', '42', '54', '1'], ['0', '3', '86', '4', '85', '5', '84', '6', '83', '7', '82', '8', '81', '33', '61', '34', '60', '35', '59', '36', '58', '37', '57', '40', '56', '42', '54', '1'], ['0', '3', '86', '4', '85', '5', '84', '6', '83', '7', '82', '8', '81', '33', '61', '34', '60', '36', '58', '37', '57', '40', '56', '42', '54', '1'], ['0', '3', '86', '4', '85', '5', '84', '6', '83', '7', '82', '8', '81', '33', '61', '34', '60', '48', '32', '50', '39', '1'], ['0', '3', '86', '4', '85', '5', '84', '6', '83', '7', '82', '8', '81', '33', '61', '36', '58', '37', '57', '40', '56', '42', '54', '1'], ['0', '3', '86', '4', '85', '5', '84', '6', '83', '7', '82', '8', '81', '33', '61', '48', '32', '50', '39', '1'], ['0', '3', '86', '4', '85', '23', '68', '24', '67', '26', '65', '27', '64', '30', '63', '33', '61', '34', '60', '35', '59', '36', '58', '

In [214]:
# Create gurobi model
model = gp.Model("Transcript Expression")

In [215]:
# Add variables
no_trans = len(transcripts)
var1 = model.addVars(no_trans, vtype=GRB.CONTINUOUS, name="expression")
helper1 = model.addVars(edges, lb=-GRB.INFINITY, vtype=GRB.CONTINUOUS, name="x")
helper2 = model.addVars(edges, vtype=GRB.CONTINUOUS, name="y")
norm = model.addVar(name="L0-Norm")

print(vars)

{0: <gurobi.Var *Awaiting Model Update*>, 1: <gurobi.Var *Awaiting Model Update*>}


In [216]:
# Create Adjazenzmatrix (path,edge): 0/1
adj_matrix = {}
for i in range(0,len(transcripts)):
    for j in range(1,len(transcripts[i])-1):
        startnode = transcripts[i][j] 
        endnode = transcripts[i][j+1]
        current_edge = (startnode,endnode)
        if current_edge in edges:
            adj_matrix[i,current_edge] = 1
    for edge in edges:
        if (i,edge) not in adj_matrix.keys():
            adj_matrix[i,edge] = 0
print(adj_matrix)

        

{(0, ('2', '87')): 1, (0, ('87', '40')): 1, (0, ('40', '56')): 1, (0, ('56', '42')): 1, (0, ('42', '54')): 1, (0, ('86', '4')): 0, (0, ('4', '85')): 0, (0, ('85', '5')): 0, (0, ('85', '23')): 0, (0, ('5', '84')): 0, (0, ('23', '68')): 0, (0, ('68', '24')): 0, (0, ('68', '27')): 0, (0, ('24', '67')): 0, (0, ('27', '64')): 0, (0, ('84', '6')): 0, (0, ('6', '83')): 0, (0, ('83', '7')): 0, (0, ('7', '82')): 0, (0, ('82', '8')): 0, (0, ('8', '81')): 0, (0, ('81', '9')): 0, (0, ('81', '33')): 0, (0, ('9', '80')): 0, (0, ('33', '61')): 0, (0, ('80', '11')): 0, (0, ('11', '78')): 0, (0, ('78', '40')): 0, (0, ('76', '22')): 0, (0, ('22', '69')): 0, (0, ('69', '23')): 0, (0, ('71', '19')): 0, (0, ('19', '70')): 0, (0, ('70', '22')): 0, (0, ('67', '26')): 0, (0, ('26', '65')): 0, (0, ('65', '27')): 0, (0, ('64', '30')): 0, (0, ('30', '63')): 0, (0, ('63', '33')): 0, (0, ('58', '37')): 0, (0, ('37', '57')): 0, (0, ('57', '40')): 0, (0, ('61', '34')): 0, (0, ('61', '36')): 0, (0, ('61', '48')): 0, 

In [217]:
# Define optimization problem
for j in edges:
    model.addConstr(helper1[j] == (edges_dict[j] - (gp.quicksum(adj_matrix[i,j] * var1[i] for i in range(len(transcripts))))))
    model.addConstr(helper2[j] >= helper1[j])
    model.addConstr(helper2[j] >= -helper1[j])

# L1 Norm Sparsity Constraint
#factor1 = 10000
#model.addConstr(var1.sum() <= factor1)

# LO Norm Sparsity Constraint
#factor2 = 0
#factor3 = 2
#model.addConstr(norm == gp.norm(var1, 0.0)) # Hinzufügen dieser Zeile verändert sofort die Werte der Variablen var1, warum?
#model.setObjective((gp.quicksum(helper2[j] for j in edges) + (factor2*norm)), GRB.MINIMIZE)
#model.addConstr(norm <= factor3)

model.setObjective(gp.quicksum(helper2[j] for j in edges), GRB.MINIMIZE)

In [218]:
# Solve optimization problem
model.optimize()

Gurobi Optimizer version 9.5.1 build v9.5.1rc2 (linux64)
Thread count: 4 physical cores, 8 logical processors, using up to 8 threads
Optimize a model with 189 rows, 165 columns and 1101 nonzeros
Model fingerprint: 0x94274233
Coefficient statistics:
  Matrix range     [1e+00, 1e+00]
  Objective range  [1e+00, 1e+00]
  Bounds range     [0e+00, 0e+00]
  RHS range        [3e+00, 2e+04]
Presolve removed 34 rows and 35 columns
Presolve time: 0.01s
Presolved: 155 rows, 130 columns, 1182 nonzeros

Iteration    Objective       Primal Inf.    Dual Inf.      Time
       0    0.0000000e+00   3.034150e+05   0.000000e+00      0s
      86    8.9259000e+04   0.000000e+00   0.000000e+00      0s

Solved in 86 iterations and 0.01 seconds (0.00 work units)
Optimal objective  8.925900000e+04


In [219]:
# Print results
for var in model.getVars():
    print(var.VarName)
    print(var.X)

expression[0]
3494.0
expression[1]
92.0
expression[2]
0.0
expression[3]
0.0
expression[4]
0.0
expression[5]
0.0
expression[6]
0.0
expression[7]
0.0
expression[8]
403.0
expression[9]
0.0
expression[10]
1541.0
expression[11]
0.0
expression[12]
0.0
expression[13]
0.0
expression[14]
0.0
expression[15]
0.0
expression[16]
0.0
expression[17]
303.0
expression[18]
0.0
expression[19]
0.0
expression[20]
0.0
expression[21]
1058.0
expression[22]
0.0
expression[23]
0.0
expression[24]
0.0
expression[25]
0.0
expression[26]
0.0
expression[27]
0.0
expression[28]
0.0
expression[29]
0.0
expression[30]
8582.0
expression[31]
0.0
expression[32]
0.0
expression[33]
0.0
expression[34]
0.0
expression[35]
0.0
expression[36]
0.0
expression[37]
3.0
x[87,40]
-2441.0
x[40,56]
2599.0
x[56,42]
0.0
x[42,54]
2785.0
x[86,4]
-1999.0
x[4,85]
-832.0
x[85,5]
-62.0
x[85,23]
-771.0
x[5,84]
-55.0
x[23,68]
3696.0
x[68,24]
2892.0
x[68,27]
145.0
x[24,67]
5657.0
x[27,64]
5491.0
x[84,6]
-85.0
x[6,83]
8.0
x[83,7]
0.0
x[7,82]
263.0
x[8

In [220]:
for i in var1:
    print(var1[i].X)


3494.0
92.0
0.0
0.0
0.0
0.0
0.0
0.0
403.0
0.0
1541.0
0.0
0.0
0.0
0.0
0.0
0.0
303.0
0.0
0.0
0.0
1058.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
8582.0
0.0
0.0
0.0
0.0
0.0
0.0
3.0


In [221]:
model.reset()

Discarded solution information
