In [131]:
# Import packages
import gurobipy as gp
from gurobipy import GRB
import parse_graph_new
import networkx as nx
import path_enumeration

In [132]:
# Read file and extract all paths
with open("/home/laura/Documents/Transkript_Assembly/data/human_geuvadis/test2.graph") as f: 
    fileEndReached = False
    f.readline()
    while not fileEndReached:
        f.readline()
        Chromosome, Strand, Exons = parse_graph_new.parse_meta(f)
        Bins = parse_graph_new.parse_bins(f)
        PairedBins = parse_graph_new.parse_pairs(f)
        
        # Build graphs
        G_full = nx.DiGraph()
        fileEndReached, skip = parse_graph_new.parse_graph(f, G_full, Exons) # Full Graph
        
        if not fileEndReached and not skip:
            G_clean = nx.DiGraph()  
            fileEndReached, _ = parse_graph_new.parse_graph(f, G_clean, Exons) # Cleaned Graph
            
        # Full path enumeration of cleaned graph
        transcripts = path_enumeration.enumeration(G_clean,[],"0",["0"],"1",False)
        print("Transkripts:", len(transcripts), transcripts)

f.close()

Transkripts: 2 [['0', '2', '19', '3', '18', '4', '17', '5', '16', '1'], ['0', '2', '19', '3', '18', '4', '17', '7', '14', '9', '12', '10', '11', '1']]


In [133]:
for edgeKey, edgeValue in G_clean.edges.items():
    print(edgeValue)
    print(edgeKey)

{'type': 'SpliceJunction', 'counts': {'0': 34, '1': 33, '2': 33, '3': 46, '4': 28, 'c': 174}, 'length': 1, 'startExon': 0, 'endExon': 1}
('19', '3')
{'type': 'Exon', 'counts': {'0': 40, '1': 37, '2': 41, '3': 58, '4': 40, 'c': 216}, 'length': 815, 'exon': 1}
('3', '18')
{'type': 'SpliceJunction', 'counts': {'0': 29, '1': 20, '2': 36, '3': 54, '4': 24, 'c': 163}, 'length': 1, 'startExon': 1, 'endExon': 2}
('18', '4')
{'type': 'Exon', 'counts': {'0': 52, '1': 27, '2': 47, '3': 64, '4': 37, 'c': 227}, 'length': 155, 'exon': 2}
('4', '17')
{'type': 'SpliceJunction', 'counts': {'0': 9, '1': 4, '2': 7, '3': 23, '4': 6, 'c': 49}, 'length': 1, 'startExon': 2, 'endExon': 3}
('17', '5')
{'type': 'SpliceJunction', 'counts': {'0': 24, '1': 16, '2': 30, '3': 36, '4': 23, 'c': 129}, 'length': 1, 'startExon': 2, 'endExon': 5}
('17', '7')
{'type': 'Exon', 'counts': {'0': 20, '1': 11, '2': 18, '3': 29, '4': 11, 'c': 89}, 'length': 478, 'exon': 3}
('5', '16')
{'type': 'Exon', 'counts': {'0': 33, '1': 18

In [134]:
# Create edge dictionary storing the counts for each edge
edges_dict = {}
for edgeKey, edgeValue in G_clean.edges.items():
    count = edgeValue["counts"]["c"]
    if edgeValue["type"] == "SpliceJunction" or edgeValue["type"] == "Exon":
        edges_dict[edgeKey] = count
print(edges_dict)
edges = list(edges_dict.keys())
print(len(edges))


{('19', '3'): 174, ('3', '18'): 216, ('18', '4'): 163, ('4', '17'): 227, ('17', '5'): 49, ('17', '7'): 129, ('5', '16'): 89, ('7', '14'): 146, ('14', '9'): 28, ('9', '12'): 53, ('12', '10'): 53, ('10', '11'): 53, ('2', '19'): 211}
13


In [135]:
print(transcripts)

[['0', '2', '19', '3', '18', '4', '17', '5', '16', '1'], ['0', '2', '19', '3', '18', '4', '17', '7', '14', '9', '12', '10', '11', '1']]


In [136]:
# Create gurobi model
model = gp.Model("Transcript Expression")

In [137]:
# Add variables
no_trans = len(transcripts)
var1 = model.addVars(no_trans, vtype=GRB.CONTINUOUS, name="expression")
helper1 = model.addVars(edges, lb=-GRB.INFINITY, vtype=GRB.CONTINUOUS, name="x")
helper2 = model.addVars(edges, vtype=GRB.CONTINUOUS, name="y") # L1 Norm
norm0 = model.addVar(name="L0_norm") # L0 norm
sparsity_norm0 = model.addVar(name="Sparsity_Constraint_L0") # Sparsity Constraint L0 Norm

print(vars)

<built-in function vars>


In [138]:
# Create Adjazenzmatrix (path,edge): 0/1
adj_matrix = {}
for i in range(0,len(transcripts)):
    for j in range(1,len(transcripts[i])-1):
        startnode = transcripts[i][j] 
        endnode = transcripts[i][j+1]
        current_edge = (startnode,endnode)
        if current_edge in edges:
            adj_matrix[i,current_edge] = 1
    for edge in edges:
        if (i,edge) not in adj_matrix.keys():
            adj_matrix[i,edge] = 0
print(adj_matrix)

{(0, ('2', '19')): 1, (0, ('19', '3')): 1, (0, ('3', '18')): 1, (0, ('18', '4')): 1, (0, ('4', '17')): 1, (0, ('17', '5')): 1, (0, ('5', '16')): 1, (0, ('17', '7')): 0, (0, ('7', '14')): 0, (0, ('14', '9')): 0, (0, ('9', '12')): 0, (0, ('12', '10')): 0, (0, ('10', '11')): 0, (1, ('2', '19')): 1, (1, ('19', '3')): 1, (1, ('3', '18')): 1, (1, ('18', '4')): 1, (1, ('4', '17')): 1, (1, ('17', '7')): 1, (1, ('7', '14')): 1, (1, ('14', '9')): 1, (1, ('9', '12')): 1, (1, ('12', '10')): 1, (1, ('10', '11')): 1, (1, ('17', '5')): 0, (1, ('5', '16')): 0}


In [139]:
# Define optimization problem
# L1 Norm
"""
for j in edges:
    model.addConstr(helper1[j] == (edges_dict[j] - (gp.quicksum(adj_matrix[i,j] * var1[i] for i in range(len(transcripts))))))
    model.addConstr(helper2[j] >= helper1[j])
    model.addConstr(helper2[j] >= -helper1[j])
model.setObjective(gp.quicksum(helper2[j] for j in edges), GRB.MINIMIZE)
"""

# L0 Norm
"""
for j in edges:
    model.addConstr(helper1[j] == (edges_dict[j] - (gp.quicksum(adj_matrix[i,j] * var1[i] for i in range(len(transcripts))))))
model.addConstr(norm0 == gp.norm(helper1, 0.0))
model.setObjective(norm0, GRB.MINIMIZE)
"""

# L2 Norm
"""

"""

# L1 Norm Sparsity Constraint
"""
factor1 = 10000
model.addConstr(var1.sum() <= factor1)
"""

# LO Norm Sparsity Constraint
"""
factor2 = 5000
#factor3 = 2
model.addConstr(sparsity_norm0 == gp.norm(var1, 0.0)) # Hinzufügen dieser Zeile verändert sofort die Werte der Variablen var1, warum?
model.setObjective((gp.quicksum(helper2[j] for j in edges) + (factor2*sparsity_norm0)), GRB.MINIMIZE)
#model.addConstr(norm <= factor3)
"""



In [140]:
# Solve optimization problem
model.optimize()

Gurobi Optimizer version 9.5.1 build v9.5.1rc2 (linux64)
Thread count: 4 physical cores, 8 logical processors, using up to 8 threads
Optimize a model with 13 rows, 30 columns and 31 nonzeros
Model fingerprint: 0x60d122b6
Model has 1 general constraint
Variable types: 30 continuous, 0 integer (0 binary)
Coefficient statistics:
  Matrix range     [1e+00, 1e+00]
  Objective range  [1e+00, 1e+00]
  Bounds range     [0e+00, 0e+00]
  RHS range        [3e+01, 2e+02]
Presolve removed 2 rows and 4 columns
Presolve time: 0.00s
Presolved: 11 rows, 26 columns, 23 nonzeros
Presolved model has 13 SOS constraint(s)
Variable types: 13 continuous, 13 integer (13 binary)

Root relaxation: objective 0.000000e+00, 0 iterations, 0.00 seconds (0.00 work units)

    Nodes    |    Current Node    |     Objective Bounds      |     Work
 Expl Unexpl |  Obj  Depth IntInf | Incumbent    BestBd   Gap | It/Node Time

H    0     0                      13.0000000    0.00000   100%     -    0s
H    0     0            

In [141]:
# Print results
for var in model.getVars():
    print(var.VarName)
    print(var.X)

expression[0]
158.00000000000006
expression[1]
52.99999999999994
x[19,3]
-37.0
x[3,18]
5.0
x[18,4]
-48.0
x[4,17]
16.0
x[17,5]
-109.00000000000006
x[17,7]
76.00000000000006
x[5,16]
-69.00000000000006
x[7,14]
93.00000000000006
x[14,9]
-24.999999999999943
x[9,12]
0.0
x[12,10]
0.0
x[10,11]
0.0
x[2,19]
0.0
y[19,3]
0.0
y[3,18]
0.0
y[18,4]
0.0
y[4,17]
0.0
y[17,5]
0.0
y[17,7]
0.0
y[5,16]
0.0
y[7,14]
0.0
y[14,9]
0.0
y[9,12]
0.0
y[12,10]
0.0
y[10,11]
0.0
y[2,19]
0.0
L0_norm
9.0
Sparsity_Constraint_L0
0.0


In [142]:
for i in var1:
    print(var1[i].X)

158.00000000000006
52.99999999999994


In [143]:
model.reset()

Discarded solution information
