In [13]:
pip install dash plotly



## Natural Occurance

In [None]:
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.graph_objs as go
import pandas as pd


# Read the CSV file
df = pd.read_csv("fdb_fn_properties.csv")

# Filter out empty cells and categorize based on 'not' prefix
filtered_data = df[df['Natural_occurrence'].notnull()]
categorized_data = filtered_data['Natural_occurrence'].apply(lambda x: 'Not naturally occuring' if x.startswith('Not') else 'Naturally occuring')

# Count occurrences of each category
counts = categorized_data.value_counts()

# Sample data for the bar chart
x_data = counts.index
y_data = counts

# Sample data for the pie chart
labels = counts.index
values = counts

# Initialize the Dash app
app = dash.Dash(__name__)

# Define the layout of the app
app.layout = html.Div([
    html.H1("Bar Chart & Pie Chart"),
    dcc.Dropdown(
        id='chart-type',
        options=[
            {'label': 'Bar', 'value': 'bar'},
            {'label': 'Pie', 'value': 'pie'}
        ],
        value='bar'
    ),
    dcc.Graph(id='chart')
])

# Define callback to update the graph based on dropdown selection
@app.callback(
    Output('chart', 'figure'),
    [Input('chart-type', 'value')]
)
def update_chart(chart_type):
    if chart_type == 'bar':
        trace = go.Bar(x=x_data, y=y_data)
        layout = go.Layout(title='Natural Occurence')
    else:
        trace = go.Pie(labels=labels, values=values)
        layout = go.Layout(title='Natural Occurence')
    return {'data': [trace], 'layout': layout}

# Run the app
if __name__ == '__main__':
    app.run_server(debug=True)


<IPython.core.display.Javascript object>

## IOFI Distribution

In [None]:
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.graph_objs as go
import pandas as pd


# Read the CSV file
df = pd.read_csv("fdb_fn_properties.csv")

# Filter out empty cells and categorize based on 'not' prefix
filtered_data = df.dropna(subset=['IOFI'])
def categorize_iofi(iofi):
    if iofi.lower() == 'artificial':
        return 'Artificial'
    elif iofi.lower() == 'natural':
        return 'Natural'
    elif iofi.lower() == 'nature identical':
        return 'Nature identical'
    else:
        return 'Others'

# Apply categorization
filtered_data['IOFI_Category'] = filtered_data['IOFI'].apply(categorize_iofi)

# Count occurrences of each category
counts = filtered_data['IOFI_Category'].value_counts()
# Count occurrences of each category
# Sample data for the bar chart
x_data = counts.index
y_data = counts

# Sample data for the pie chart
labels = counts.index
values = counts

# Initialize the Dash app
app = dash.Dash(__name__)

# Define the layout of the app
app.layout = html.Div([
    html.H1("Bar Chart & Pie Chart"),
    dcc.Dropdown(
        id='chart-type',
        options=[
            {'label': 'Bar', 'value': 'bar'},
            {'label': 'Pie', 'value': 'pie'}
        ],
        value='bar'
    ),
    dcc.Graph(id='chart')
])

# Define callback to update the graph based on dropdown selection
@app.callback(
    Output('chart', 'figure'),
    [Input('chart-type', 'value')]
)
def update_chart(chart_type):
    if chart_type == 'bar':
        trace = go.Bar(x=x_data, y=y_data)
        layout = go.Layout(title='IOFI Distribution')
    else:
        trace = go.Pie(labels=labels, values=values)
        layout = go.Layout(title='IOFI Distribution')
    return {'data': [trace], 'layout': layout}

# Run the app
if __name__ == '__main__':
    app.run_server(debug=True)


## Enery Distribution

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Read the CSV file into a DataFrame
df = pd.read_csv("fdb_more_properties.csv")

# Assuming "energy" is the column you want to plot
energy_values = df["energy"]

# Plot the data
plt.figure(figsize=(10, 6))  # Set the size of the figure
plt.plot(energy_values, color='blue', marker='o', linestyle='-')  # Plot energy values
plt.title("Energy Distribution")  # Set the title of the plot
plt.xlabel("Index")  # Set the label for the x-axis
plt.ylabel("Energy")  # Set the label for the y-axis
plt.ylim(0, 3000)  # Set the y-axis limits
plt.grid(True)  # Add gridlines to the plot
plt.show()  # Show the plot


In [None]:
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.graph_objs as go
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Read the CSV file
df = pd.read_csv("fdb_more_properties.csv")

# Initialize the Dash app
app = dash.Dash(__name__)

# Define the layout of the app
app.layout = html.Div([
    html.H1("Energy Distribution"),
    dcc.Dropdown(
        id='chart-type',
        options=[
            {'label': 'Histogram', 'value': 'histogram'},
            {'label': 'Violin Plot', 'value': 'violin'}
        ],
        value='histogram'
    ),
    dcc.Graph(id='chart')
])

# Define callback to update the graph based on dropdown selection
@app.callback(
    Output('chart', 'figure'),
    [Input('chart-type', 'value')]
)
def update_chart(chart_type):
    threshold = 300  # Adjust as needed based on your data
    # Filter out outliers
    df_filtered = df[df['energy'] < threshold]
    # Add a dummy category column
    df_filtered['category'] = 'All Dishes'

    if chart_type == 'histogram':
        # Plot the histogram
        trace = go.Histogram(x=df_filtered['energy'], nbinsx=30, marker_color='blue', opacity=0.7)
        layout = go.Layout(title='Energy Distribution', xaxis=dict(title='Energy'), yaxis=dict(title='Frequency'))
    else:
        # Plot the violin plot
        trace = go.Violin(y=df_filtered['energy'], box_visible=True, line_color='blue', meanline_visible=True, fillcolor='lightblue')
        layout = go.Layout(title='Energy Distribution', yaxis=dict(title='Energy'))

    return {'data': [trace], 'layout': layout}

# Run the app
if __name__ == '__main__':
    app.run_server(debug=True)


## Receptors

In [None]:
import pandas as pd
import plotly.express as px

# Assuming 'receptor_name' column contains the receptors
# Replace 'file_path.csv' with the actual path to your CSV file
df = pd.read_csv('fdb_receptors.csv')
df = df.dropna(subset=['receptor_name'])

# Define categories based on common prefixes
categories = {
    'mGluR': ['mGluR4', 'mGluR1'],
    'TAS2R': ['TAS2R1', 'TAS2R3', 'TAS2R4', 'TAS2R5', 'TAS2R7', 'TAS2R8', 'TAS2R9', 'TAS2R10',
              'TAS2R13', 'TAS2R14', 'TAS2R16', 'TAS2R19', 'TAS2R20', 'TAS2R30', 'TAS2R31',
              'TAS2R38', 'TAS2R39', 'TAS2R40', 'TAS2R41', 'TAS2R42', 'TAS2R43', 'TAS2R45',
              'TAS2R46', 'TAS2R50', 'TAS2R60'],
    'TAS1R': ['TAS1R1', 'TAS1R2', 'TAS1R3'],
    'PKD2L1': ['PKD2L1'],
    'HCN': ['HCN1', 'HCN4'],
    'OR': [col for col in df['receptor_name'] if col.startswith('OR')],
    'CNGA': ['CNGA2', 'CNCA', 'CNCA1', 'CNCG2'],
    'GNAL': ['GNAL'],
    'GFY': ['GFY'],
    'VN1R': ['VN1R1', 'V1RL1', 'VNR19I1']
}

# Count the number of receptors in each category
category_counts = {category: sum(receptor in category_receptors for receptor in df['receptor_name'])
                  for category, category_receptors in categories.items()}

# Calculate percentages


# Calculate percentages
total_receptors = sum(category_counts.values())
percentages = {category: count / total_receptors * 100 for category, count in category_counts.items()}

# Create a DataFrame for Plotly
data = pd.DataFrame({'Category': list(category_counts.keys()), 'Count': list(category_counts.values()), 'Percentage': list(percentages.values())})

# Create a Plotly bar plot
fig = px.bar(data, x='Category', y='Count', hover_data={'Percentage': ':.2f%'}, color='Count',
             labels={'Count': 'Count', 'Category': 'Receptor Categories', 'Percentage': 'Percentage'},
             title='Distribution of Receptor Categories')

# Update hover template to include category names on X-axis
fig.update_traces(hovertemplate='<b>%{x}</b><br>Count: %{y}<br>')

# Show the plot
fig.show()

## Entities

In [None]:
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.graph_objs as go
import pandas as pd
import plotly.express as px

# Read the CSV file
df = pd.read_csv("fdb_entities.csv")

# Get the top 20 entities from the 'entity_alias' column
top_entities = df['entity_alias'].value_counts().head(20).index.tolist()

# Filter the dataframe based on the top 20 entities
df_top_entities = df[df['entity_alias'].isin(top_entities)]

# Initialize the Dash app
app = dash.Dash(__name__)

# Define the layout of the app
app.layout = html.Div([
    html.H1("Visualization of Top 20 Entities"),
    dcc.Dropdown(
        id='chart-type',
        options=[
            {'label': 'Bar Chart', 'value': 'bar'},
            {'label': 'Pie Chart', 'value': 'pie'},
            {'label': 'Histogram', 'value': 'histogram'},
            {'label': 'Box Plot', 'value': 'box'},
            {'label': 'Treemap', 'value': 'treemap'}
        ],
        value='bar'
    ),
    dcc.Graph(id='chart')
])

# Define callback to update the graph based on dropdown selection
@app.callback(
    Output('chart', 'figure'),
    [Input('chart-type', 'value')]
)
def update_chart(chart_type):
    if chart_type == 'bar':
        top_entities_count = df_top_entities['entity_alias'].value_counts().head(20)
        trace = go.Bar(x=top_entities_count.index, y=top_entities_count.values)
        layout = go.Layout(title='Top 20 Entity Distribution - Bar Chart')
    elif chart_type == 'histogram':
        trace = go.Histogram(x=df_top_entities['entity_alias'])
        layout = go.Layout(title='Top 20 Entity Distribution - Histogram')
    elif chart_type == 'box':
        fig_box_entity = px.box(df_top_entities, x='entity_alias', title='Box Plot of Top 20 Entities')
        return fig_box_entity
    elif chart_type == 'treemap':
        fig_treemap = px.treemap(df_top_entities, path=['entity_alias'], title='Treemap of Top 20 Entities')
        return fig_treemap
    else:
        pie_data = df_top_entities['entity_alias'].value_counts().head(20)
        trace = go.Pie(labels=pie_data.index, values=pie_data.values)
        layout = go.Layout(title='Top 20 Entity Distribution - Pie Chart')

    return {'data': [trace], 'layout': layout}

# Run the app
if __name__ == '__main__':
    app.run_server(debug=True)


<IPython.core.display.Javascript object>

## Categories

In [None]:
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.graph_objs as go
import pandas as pd
import plotly.express as px

# Read the CSV file
df = pd.read_csv("fdb_entities.csv")

# Get the top 20 categories from the 'category_readable' column
top_categories = df['category_readable'].value_counts().head(20).index.tolist()

# Filter the dataframe based on the top 20 categories
df_top_categories = df[df['category_readable'].isin(top_categories)]

# Initialize the Dash app
app = dash.Dash(__name__)

# Define the layout of the app
app.layout = html.Div([
    html.H1("Visualization of Top 20 Category"),
    dcc.Dropdown(
        id='chart-type',
        options=[
            {'label': 'Bar Chart', 'value': 'bar'},
            {'label': 'Pie Chart', 'value': 'pie'},
            {'label': 'Histogram', 'value': 'histogram'},
            {'label': 'Box Plot', 'value': 'box'},
            {'label': 'Treemap', 'value': 'treemap'}
        ],
        value='bar'
    ),
    dcc.Graph(id='chart')
])

# Define callback to update the graph based on dropdown selection
@app.callback(
    Output('chart', 'figure'),
    [Input('chart-type', 'value')]
)
def update_chart(chart_type):
    if chart_type == 'bar':
        top_categories_count = df_top_categories['category_readable'].value_counts().head(20)
        trace = go.Bar(x=top_categories_count.index, y=top_categories_count.values)
        layout = go.Layout(title='Top 20 Category Distribution - Bar Chart')
    elif chart_type == 'histogram':
        trace = go.Histogram(x=df_top_categories['category_readable'])
        layout = go.Layout(title='Top 20 Category Distribution - Histogram')
    elif chart_type == 'box':
        fig_box_category = px.box(df_top_categories, x='category_readable', title='Box Plot of Top 20 Category Readable')
        return fig_box_category
    elif chart_type == 'treemap':
        fig_treemap = px.treemap(df_top_categories, path=['category_readable'], title='Treemap of Top 20 Categories')
        return fig_treemap
    else:
        pie_data = df_top_categories['category_readable'].value_counts().head(20)
        trace = go.Pie(labels=pie_data.index, values=pie_data.values)
        layout = go.Layout(title='Top 20 Category Distribution - Pie Chart')

    return {'data': [trace], 'layout': layout}

# Run the app
if __name__ == '__main__':
    app.run_server(debug=True)


<IPython.core.display.Javascript object>

## Natural Source

In [None]:
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.graph_objs as go
import pandas as pd
import plotly.express as px

# Read the CSV file
df = pd.read_csv("fdb_entities.csv")

# Get the top 20 natural sources from the 'natural_source_name' column
top_sources = df['natural_source_name'].value_counts().head(20).index.tolist()

# Filter the dataframe based on the top 20 natural sources
df_top_sources = df[df['natural_source_name'].isin(top_sources)]

# Initialize the Dash app
app = dash.Dash(__name__)

# Define the layout of the app
app.layout = html.Div([
    html.H1("Visualization of Top 20 Natural Sources"),
    dcc.Dropdown(
        id='chart-type',
        options=[
            {'label': 'Bar Chart', 'value': 'bar'},
            {'label': 'Pie Chart', 'value': 'pie'},
            {'label': 'Histogram', 'value': 'histogram'},
            {'label': 'Box Plot', 'value': 'box'},
            {'label': 'Treemap', 'value': 'treemap'}
        ],
        value='bar'
    ),
    dcc.Graph(id='chart')
])

# Define callback to update the graph based on dropdown selection
@app.callback(
    Output('chart', 'figure'),
    [Input('chart-type', 'value')]
)
def update_chart(chart_type):
    if chart_type == 'bar':
        top_sources_count = df_top_sources['natural_source_name'].value_counts().head(20)
        trace = go.Bar(x=top_sources_count.index, y=top_sources_count.values)
        layout = go.Layout(title='Top 20 Natural Sources Distribution - Bar Chart')
    elif chart_type == 'histogram':
        trace = go.Histogram(x=df_top_sources['natural_source_name'])
        layout = go.Layout(title='Top 20 Natural Sources Distribution - Histogram')
    elif chart_type == 'box':
        fig_box_source = px.box(df_top_sources, x='natural_source_name', title='Box Plot of Top 20 Natural Sources')
        return fig_box_source
    elif chart_type == 'treemap':
        fig_treemap = px.treemap(df_top_sources, path=['natural_source_name'], title='Treemap of Top 20 Natural Sources')
        return fig_treemap
    else:
        pie_data = df_top_sources['natural_source_name'].value_counts().head(20)
        trace = go.Pie(labels=pie_data.index, values=pie_data.values)
        layout = go.Layout(title='Top 20 Natural Sources Distribution - Pie Chart')

    return {'data': [trace], 'layout': layout}

# Run the app
if __name__ == '__main__':
    app.run_server(debug=True)


<IPython.core.display.Javascript object>

## Flavor Profile

In [None]:
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import pandas as pd
import plotly.express as px

# Read the CSV file
df = pd.read_csv("fdb_molecules.csv")

# Split the flavor_profile column by '@' and explode into separate rows
df['flavor_profile'] = df['flavor_profile'].str.split('@')
df = df.explode('flavor_profile')

# Get the top 15 flavor profiles
top_flavor_profiles = df['flavor_profile'].value_counts().head(15)

# Initialize the Dash app
app = dash.Dash(__name__)

# Define the layout of the app
app.layout = html.Div([
    html.H1("Visualization of Flavor Profiles"),
    dcc.Dropdown(
        id='chart-type',
        options=[
            {'label': 'Pie Chart', 'value': 'pie'},
            {'label': 'Bar Chart', 'value': 'bar'},
            {'label': 'Histogram', 'value': 'histogram'}
        ],
        value='pie'
    ),
    dcc.Graph(id='chart')
])

# Define callback to update the graph based on dropdown selection
@app.callback(
    Output('chart', 'figure'),
    [Input('chart-type', 'value')]
)
def update_chart(chart_type):
    if chart_type == 'pie':
        fig = px.pie(names=top_flavor_profiles.index, values=top_flavor_profiles.values, title='Top 15 Flavor Profiles')
    elif chart_type == 'bar':
        fig = px.bar(x=top_flavor_profiles.index, y=top_flavor_profiles.values, title='Top 15 Flavor Profiles')
    else:  # Histogram
        fig = px.histogram(x=df['flavor_profile'], title='Flavor Profile Distribution')
    return fig

# Run the app
if __name__ == '__main__':
    app.run_server(debug=True)


<IPython.core.display.Javascript object>

In [16]:
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import pandas as pd
import plotly.express as px

# Read the CSV file
df = pd.read_csv("fdb_molecules.csv")

# Split the fooddb_flavor_profile column by '@' and explode into separate rows
df['fooddb_flavor_profile'] = df['fooddb_flavor_profile'].str.split('@')
df = df.explode('fooddb_flavor_profile')

# Get the top 15 flavor profiles
top_flavor_profiles = df['fooddb_flavor_profile'].value_counts().head(15)

# Initialize the Dash app
app = dash.Dash(__name__)

# Define the layout of the app
app.layout = html.Div([
    html.H1("Visualization of FoodDB Flavor Profiles"),
    dcc.Dropdown(
        id='chart-type',
        options=[
            {'label': 'Pie Chart', 'value': 'pie'},
            {'label': 'Bar Chart', 'value': 'bar'},
            {'label': 'Histogram', 'value': 'histogram'}
        ],
        value='pie'
    ),
    dcc.Graph(id='chart')
])

# Define callback to update the graph based on dropdown selection
@app.callback(
    Output('chart', 'figure'),
    [Input('chart-type', 'value')]
)
def update_chart(chart_type):
    if chart_type == 'pie':
        fig = px.pie(names=top_flavor_profiles.index, values=top_flavor_profiles.values, title='Top 15 FoodDB Flavor Profiles')
    elif chart_type == 'bar':
        fig = px.bar(x=top_flavor_profiles.index, y=top_flavor_profiles.values, title='Top 15 FoodDB Flavor Profiles')
    else :
        fig = px.histogram(x=df['fooddb_flavor_profile'], title='FoodDB Flavor Profile Distribution')
    return fig

# Run the app
if __name__ == '__main__':
    app.run_server(debug=True)


<IPython.core.display.Javascript object>

## Sub-Categories

In [18]:
import pandas as pd
import plotly.graph_objects as go

# Step 1: Load Data
data = pd.read_csv('fdb_entities.csv')

# Step 2: Data Preparation
category_counts = data['category_readable'].value_counts()
subcategory_data = {}
for category in category_counts.index:
    subcategory_counts = data[data['category_readable'] == category]['entity_alias'].value_counts()
    subcategory_data[category] = subcategory_counts

# Step 3: Create Initial Pie Chart
fig = go.Figure()

for category in category_counts.index:
    fig.add_trace(go.Pie(labels=subcategory_data[category].index,
                         values=subcategory_data[category].values,
                         name=category,
                         visible='legendonly'))  # Initially hide all but first pie chart

fig.update_layout(updatemenus=[dict(type='buttons',
                                    direction='down',
                                    buttons=[dict(label=category,
                                                  method='update',
                                                  args=[{'visible': [category == trace.name for trace in fig.data]}])
                                             for category in category_counts.index])])

fig.update_layout(title='Top Categories')

# Step 5: Display Interactive Plot
fig.show()


## Sub-Categories(Unique)

In [17]:
import pandas as pd
import plotly.graph_objects as go

# Step 1: Load Data
data = pd.read_csv('fdb_entities.csv')

# Step 2: Data Preparation
# Combine names based on the first word of '-' separated name
def split_first_word(x):
    if isinstance(x, str):
        return x.split('-')[0]
    else:
        return x

data['entity_alias'] = data['entity_alias'].apply(split_first_word)

category_counts = data['category_readable'].value_counts()
subcategory_data = {}
for category in category_counts.index:
    subcategory_counts = data[data['category_readable'] == category]['entity_alias'].value_counts()
    subcategory_data[category] = subcategory_counts

# Step 3: Create Initial Pie Chart
fig = go.Figure()

for category in category_counts.index:
    fig.add_trace(go.Pie(labels=subcategory_data[category].index,
                         values=subcategory_data[category].values,
                         name=category,
                         visible='legendonly'))  # Initially hide all but first pie chart

fig.update_layout(updatemenus=[dict(type='buttons',
                                    direction='down',
                                    buttons=[dict(label=category,
                                                  method='update',
                                                  args=[{'visible': [category == trace.name for trace in fig.data]}])
                                             for category in category_counts.index])])

fig.update_layout(title='Top Categories')

# Step 5: Display Interactive Plot
fig.show()


## Category Tree Map

In [None]:
import pandas as pd
import plotly.express as px

# Step 1: Load Data
data = pd.read_csv('fdb_entities.csv')

# Step 2: Data Preparation
category_counts = data['category_readable'].value_counts()
subcategory_data = {}
for category in category_counts.index:
    subcategory_counts = data[data['category_readable'] == category]['entity_alias'].value_counts()
    subcategory_data[category] = subcategory_counts

# Step 3: Create Initial Treemap
fig = px.treemap(names=category_counts.index,
                 parents=['']*len(category_counts),
                 values=category_counts.values,
                 hover_data=[category_counts.index, category_counts.values],
                 title='Top Categories')

# Step 4: Add Clickable Behavior
def update_treemap(trace, points, selector):
    category = points.label['name']
    subcategory_counts = subcategory_data[category]
    subfig = px.treemap(names=subcategory_counts.index,
                        parents=['']*len(subcategory_counts),
                        values=subcategory_counts.values,
                        hover_data=[subcategory_counts.index, subcategory_counts.values],
                        title=f'Subcategories of {category}')
    subfig.show()

fig.for_each_trace(lambda t: t.on_click(update_treemap))

# Step 5: Display Treemap
fig.show()


In [None]:
import pandas as pd
import plotly.express as px

# Read the CSV file
df = pd.read_csv("fdb_molecules.csv")

# Split the flavor_profile column by '@' and explode into separate rows
df['flavor_profile'] = df['flavor_profile'].str.split('@')
df = df.explode('flavor_profile')

# Get the top 15 flavor profiles
top_flavor_profiles = df['flavor_profile'].value_counts().head(30)

# Create treemap for flavor profiles
fig = px.treemap(names=top_flavor_profiles.index,
                 parents=['']*len(top_flavor_profiles),
                 values=top_flavor_profiles.values,
                 title='Top 30 Flavor Profiles Treemap')
fig.show()

# Similarly, for fooddb_flavor_profile
df['fooddb_flavor_profile'] = df['fooddb_flavor_profile'].str.split('@')
df = df.explode('fooddb_flavor_profile')

# Get the top 15 fooddb_flavor_profiles
top_fooddb_flavor_profiles = df['fooddb_flavor_profile'].value_counts().head(30)

# Create treemap for fooddb_flavor_profiles
fig = px.treemap(names=top_fooddb_flavor_profiles.index,
                 parents=['']*len(top_fooddb_flavor_profiles),
                 values=top_fooddb_flavor_profiles.values,
                 title='Top 30 FoodDB Flavor Profiles Treemap')
fig.show()
