# GENE TREES

In [28]:
import plotly.express as px
import pandas as pd
import plotly.graph_objects as go
import numpy as np
import orjson


## 1. Data input

In [None]:

genomes = [
    'AUGUAAA',
    'AUGAUAA',
    'AUGAAUA',
    'AUGAAAU',
    'AUGUCAA',
    'AUGAAUA',
    'AUGGCAA',
    'AUGAAAA',
    'AUGAAGA',
    'AUGAACC',
]

# ASSERTIONS
equal_lengths = np.array([len(g)==len(genomes[0]) for g in genomes]).all()
assert equal_lengths, 'Genomes of different length present.'
correct_format = np.array([not('u' in g.lower() and 't' in g.lower()) for g in genomes]).all()
assert equal_lengths, 'Both U and T present.'

# PREPROCESS
genomes = [g.lower().replace('t','u') for g in genomes]

# 1. Get gene distribution for each position
gene_distribution = []
for i in range(len(genomes[0])):
    gene_distribution.append({
        t: np.array([
            int(g[i] == t) for g in genomes
        ]).sum()
        for t in ['a','u','g','c']
    })
print(gene_distribution)
print()

# 2. Get order of positions in time
maxs = [np.array(list(g.values())).max() for g in gene_distribution]
positions_ordered = np.argsort(maxs)

# 3. Create the tree
tree_data = [
    {
        'id': 'root',
        'parent': None,
        'value': 0,
        'group': None,
        'depth':0
    }
]

groups = []
for depth, p in positions_ordered:
    aux_groups = {
        t: np.array([
            i+1 for i, g in enumerate(genomes) if g[p] == t
        ])
        for t in ['a', 'u', 'g', 'c']
    }
    
    for g, ids in aux_groups.items():
        for id in ids:
            if(len(groups)==0):
                parent = 'root'
            else:
                parent = 'root' #! REVISAR
                
            tree_data.append({
                'id': f'{parent}.{id}.{g}',
                'parent': parent,
                'value': len(tree_data),
                'group': g
            })
    print(orjson.dumps(tree_data).decode('utf-8').replace('},{','},\n{'))
    break
    groups.append(aux_groups)
    






[{'a': 10, 'u': 0, 'g': 0, 'c': 0}, {'a': 0, 'u': 10, 'g': 0, 'c': 0}, {'a': 0, 'u': 0, 'g': 10, 'c': 0}, {'a': 7, 'u': 2, 'g': 1, 'c': 0}, {'a': 7, 'u': 1, 'g': 0, 'c': 2}, {'a': 6, 'u': 2, 'g': 1, 'c': 1}, {'a': 8, 'u': 1, 'g': 0, 'c': 1}]

[{"id":"root","parent":null,"value":0,"group":null},
{"id":"root.1.a","parent":"root","value":1,"group":"a"},
{"id":"root.2.a","parent":"root","value":2,"group":"a"},
{"id":"root.4.a","parent":"root","value":3,"group":"a"},
{"id":"root.5.a","parent":"root","value":4,"group":"a"},
{"id":"root.7.a","parent":"root","value":5,"group":"a"},
{"id":"root.8.a","parent":"root","value":6,"group":"a"},
{"id":"root.3.u","parent":"root","value":7,"group":"u"},
{"id":"root.6.u","parent":"root","value":8,"group":"u"},
{"id":"root.9.g","parent":"root","value":9,"group":"g"},
{"id":"root.10.c","parent":"root","value":10,"group":"c"}]


## 2. Creating the tree

In [35]:
# Sample data for a tree
data = pd.DataFrame({
    "id": ["root", "child_1", "child_2", "grandchild_1", "grandchild_2"],
    "parent": ["", "root", "root", "child_1", "child_1"],
    "value": [10, 4, 6, 2, 2]
})
data = pd.DataFrame(tree_data)

## 3. Tree plots

In [38]:
# Create a Sunburst (Tree-Like) visualization
fig = px.sunburst(data, names='id', parents='parent', values='value')
fig.show()

In [39]:
fig = px.treemap(data, names='id', parents='parent', values='value')
fig.show()

In [7]:
fig = px.icicle(data, names='id', parents='parent', values='value')
fig.show()