In [6]:
#!pip install dash
import dash
from dash import dcc, html, dash_table
from dash.dependencies import Input, Output
import plotly.express as px
import pandas as pd
import plotly.graph_objects as go

Read files

In [7]:
seq_index_tsv_file = pd.read_csv('/Users/MiladM-Dev/Documents/1PhD/project-1-N450/project-1.1-gendata/Nextstrain/trial-1/results/sequence_index.tsv', sep='\t')
metadata_tsv_file = pd.read_csv('/Users/MiladM-Dev/Documents/1PhD/project-1-N450/project-1.1-gendata/Nextstrain/trial-1/results/metadata.tsv', sep='\t')

#seq_index_tsv_file view
seq_index_tsv_file

Unnamed: 0,strain,length,A,C,G,T,N,other_IUPAC,-,?,invalid_nucleotides
0,00-00022,456,124,112,144,76,0,0,0,0,0
1,00-00035,456,124,112,144,76,0,0,0,0,0
2,00-00043,456,140,111,128,77,0,0,0,0,0
3,00-00049,456,140,110,128,78,0,0,0,0,0
4,00-00061,456,140,111,128,77,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
3458,24-01146,450,132,113,132,73,0,0,0,0,0
3459,24-01149,450,132,113,132,73,0,0,0,0,0
3460,24-01150,450,132,113,132,73,0,0,0,0,0
3461,24-01151,450,132,113,132,73,0,0,0,0,0


In [8]:
metadata_tsv_file

Unnamed: 0,SequenceName,type,date
0,00-00022,D7,2000-02-03
1,00-00035,D7,2000-02-10
2,00-00043,C2,2000-02-21
3,00-00049,C2,2000-02-18
4,00-00061,C2,2000-02-24
...,...,...,...
3458,24-01146,D8,2024-09-02
3459,24-01149,D8,2024-09-02
3460,24-01150,D8,2024-08-27
3461,24-01151,D8,2024-08-27


In [9]:
#describe the seq_index_tsv_file data
seq_index_tsv_file.describe()

Unnamed: 0,length,A,C,G,T,N,other_IUPAC,-,?,invalid_nucleotides
count,3463.0,3463.0,3463.0,3463.0,3463.0,3463.0,3463.0,3463.0,3463.0,3463.0
mean,455.430552,132.047935,109.685822,135.023679,78.672827,0.0,0.000289,0.0,0.0,0.0
std,2.629601,8.468142,5.031621,6.059117,7.805594,0.0,0.016993,0.0,0.0,0.0
min,376.0,73.0,93.0,102.0,58.0,0.0,0.0,0.0,0.0,0.0
25%,456.0,128.0,108.0,132.0,76.0,0.0,0.0,0.0,0.0,0.0
50%,456.0,134.0,110.0,133.0,78.0,0.0,0.0,0.0,0.0,0.0
75%,456.0,136.0,112.0,141.0,79.0,0.0,0.0,0.0,0.0,0.0
max,456.0,144.0,144.0,145.0,143.0,0.0,1.0,0.0,0.0,0.0


In [10]:
metadata_tsv_file.describe()

Unnamed: 0,SequenceName,type,date
count,3463,3463,3463
unique,3463,11,1836
top,00-00022,D8,2011-05-04
freq,1,1640,17


In [16]:
#rename
seq_index_tsv_file= seq_index_tsv_file.rename(columns={'SequenceName': 'ID'})
metadata_tsv_file= metadata_tsv_file.rename(columns={'SequenceName': 'ID'})


In [17]:
#merge

merged_df = pd.merge(metadata_tsv_file, seq_index_tsv_file , on='ID')
merged_df = merged_df.drop(['N', 'other_IUPAC', '-', '?', 'invalid_nucleotides'], axis=1)
merged_df

Unnamed: 0,ID,type,date,length,A,C,G,T
0,00-00022,D7,2000-02-03,456,124,112,144,76
1,00-00035,D7,2000-02-10,456,124,112,144,76
2,00-00043,C2,2000-02-21,456,140,111,128,77
3,00-00049,C2,2000-02-18,456,140,110,128,78
4,00-00061,C2,2000-02-24,456,140,111,128,77
...,...,...,...,...,...,...,...,...
3458,24-01146,D8,2024-09-02,450,132,113,132,73
3459,24-01149,D8,2024-09-02,450,132,113,132,73
3460,24-01150,D8,2024-08-27,450,132,113,132,73
3461,24-01151,D8,2024-08-27,450,132,113,132,73


In [20]:
# Initialize the Dash app
app = dash.Dash(__name__)

# 1. Pie chart: Count and percentage of types
type_counts = merged_df['type'].value_counts().reset_index()
type_counts.columns = ['type', 'count']
type_counts['percentage'] = (type_counts['count'] / type_counts['count'].sum()) * 100

pie_chart = go.Figure(data=[go.Pie(
    labels=type_counts['type'],
    values=type_counts['count'],
    hoverinfo='label+percent+value',
    textinfo='label+percent+value'
)])

# Dash layout with pie chart and table
app.layout = html.Div([
    html.H1("Data Visualizations"),
    
    # Pie Chart
    dcc.Graph(
        id='pie-chart',

        figure=pie_chart.update_layout(width=650, height=650)
    ), 

    # Table displaying the counts and percentages
    html.H2("Pie Chart Data Table"),
    dash_table.DataTable(
        id='pie-table',
        columns=[
            {"name": "Type", "id": "type"},
            {"name": "Count", "id": "count"},
            {"name": "Percentage", "id": "percentage", "type": "numeric", "format": {"specifier": ".2f"}}
        ],
        data=type_counts.to_dict('records'),
        style_table={'width': '50%', 'margin': 'auto'},
        style_header={'fontWeight': 'bold', 'textAlign': 'center'},
        style_cell={'textAlign': 'center'},
    )
])

# Run the app
if __name__ == '__main__':
    app.run(debug=True)