In [None]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import plotly.express as px
from datetime import datetime, timedelta
import plotly.graph_objects as go
import random as rand

## Generating dummy data

In [None]:
# Generate random dates
def random_dates(start, end, n=1):
    start_u = start.timestamp()
    end_u = end.timestamp()
    return [datetime.fromtimestamp(np.random.uniform(start_u, end_u)) for _ in range(n)]

In [None]:
# Define cities and countries
cities = {
    'Northhaven': 'Utopia',
    'Southville': 'Utopia',
    'Eastford': 'Novaria',
    'Westfield': 'Novaria',
    'Larkspur': 'Eldoria',
    'Riverton': 'Eldoria',
    'Greenfield': 'Tranquilia',
    'Hillcrest': 'Tranquilia',
    'Fairview': 'Verdantia',
    'Brooktown': 'Verdantia',
    'Seaside': 'Aquaria',
    'Mountainview': 'Aquaria',
    'Meadowland': 'Serenia',
    'Crystal Falls': 'Serenia'
}

In [None]:
# number of rows we want
num_dp = 1000

In [None]:
# Create importation data
np.random.seed(0)  # For reproducibility
origin_cities = list(cities.keys())
destination_cities = list(cities.keys())
dates = random_dates(datetime(2024, 9, 2), datetime(2024, 9, 30), num_dp)
cases = np.random.randint(15, 50, size=num_dp)

importation_data = pd.DataFrame({
    'Origin City': np.random.choice(origin_cities, size=num_dp),
    'Destination City': np.random.choice(destination_cities, size=num_dp),
    'Date': dates,
    'Number of Cases': cases
})

In [None]:
# Create city-to-country mapping
city_to_country = pd.DataFrame(list(cities.items()), columns=['City', 'Country'])

# cant export to self or own country
# Merge importation_data with city_to_country to get country info
importation_data = importation_data.merge(city_to_country, left_on='Origin City', right_on='City')
importation_data = importation_data.rename(columns={'Country': 'Origin Country'})
importation_data = importation_data.drop(columns=['City'])

importation_data = importation_data.merge(city_to_country, left_on='Destination City', right_on='City')
importation_data = importation_data.rename(columns={'Country': 'Destination Country'})
importation_data = importation_data.drop(columns=['City'])

# Filter out rows where origin and destination cities are in the same country
importation_data = importation_data[importation_data['Origin Country'] != importation_data['Destination Country']]

# Drop intermediate country columns if not needed
importation_data = importation_data.drop(columns=['Origin Country', 'Destination Country'])

In [None]:
# adding a multiplier so that the numbers make slightly more sense
base_multiplier = 1.0
growth_rate = .1

start_date = importation_data['Date'].min()
importation_data['Days Since Start'] = (importation_data['Date'] - start_date).dt.days

# Calculate multipliers for each day
importation_data['Multiplier'] = base_multiplier + (importation_data['Days Since Start'] * growth_rate)

# Adjust the number of cases based on the multiplier
importation_data['Number of Cases'] = (importation_data['Number of Cases'] * importation_data['Multiplier']).astype(int)
del importation_data['Days Since Start']
del importation_data['Multiplier']

In [None]:
# Grouping data by week
# Ensure 'Date' is in datetime format
importation_data['Date'] = pd.to_datetime(importation_data['Date'])

# Create a new column for the week (based on 'Date' column)
importation_data['Week'] = importation_data['Date'].dt.to_period('W').apply(lambda r: r.start_time)

# Group by Origin City, Destination City, and Week, then sum the Number of Cases
weekly_importation = importation_data.groupby(
    ['Origin City', 'Destination City', 'Week']
)['Number of Cases'].sum().reset_index()

# Sort the result for better readability
weekly_importation = weekly_importation.sort_values(by=['Week', 'Origin City', 'Destination City'])

In [None]:
importation_data['Number of Cases'].sum()

## Area Plot For Country Exportation Volume

In [None]:
country='Tranquilia'

In [None]:
# getting only rows where country is the origin
country_export = weekly_importation.merge(city_to_country,
                                       left_on='Origin City',
                                       right_on='City',
                                       how='left')
country_export = country_export[country_export['Country'] == country]

# group by date and origin city
df_country_exports = country_export[['Origin City', 'Week', 'Number of Cases']]\
.groupby(['Origin City', 'Week']).sum().reset_index()

# Create the area plot
fig = px.area(df_country_exports, x='Week', y='Number of Cases', title=f'Number of Cases from {country}', color='Origin City')
fig.show()

## Sankey Plot For Country Exportation/Importation Connections

In [None]:
# Filter by date range
date_range = [datetime(2024, 9, 5), datetime(2024, 9, 12)] # range of dates we want to look at
origin = True
normalize = True

filtered_data = importation_data[(importation_data['Date'] >= date_range[0]) & (importation_data['Date'] <= date_range[1])]

# Prepare data for Sankey diagram
# Generate a list of unique countries
countries = city_to_country['Country'].unique()
# create a map: sankey likes integers
country_indices = {country: i for i, country in enumerate(countries)}

# get only the country we care about
country_att = filtered_data.merge(city_to_country, left_on='Origin City', right_on='City')
country_att = country_att.rename(columns={'Country': 'Origin Country'})
country_att = country_att.drop(columns=['City'])

country_att = country_att.merge(city_to_country, left_on='Destination City', right_on='City')
country_att = country_att.rename(columns={'Country': 'Destination Country'})
country_att = country_att.drop(columns=['City'])
    
if origin:
    sankey_data = country_att[country_att['Origin Country'] == country]
else:
    sankey_data = country_att[country_att['Destination Country'] == country]

# we don't need time data any more
sankey_data.drop(['Date'], inplace=True, axis=1)
sankey_data = sankey_data.groupby(['Origin Country', 'Destination Country']).sum().reset_index()

# Generate source, target, and value lists for the Sankey diagram
source = sankey_data['Origin Country'].map(country_indices)
target = sankey_data['Destination Country'].map(country_indices)
value = sankey_data['Number of Cases']

# normalize values to probabilities
if normalize:
    value = value / value.sum()
    
# Create Sankey diagram
fig = go.Figure(go.Sankey(
    node=dict(
        pad=15,
        thickness=20,
        line=dict(color='black', width=0.5),
        label=countries
    ),
    link=dict(
        source=source,
        target=target,
        value=value
    )
))
    
fig.show()

## Bar Chart For Relative Probability of Importation

In [None]:
country_att.head(3)

In [None]:
# note: uses same date range as sankey plot
grouped_import = country_att[['Destination Country', 'Number of Cases']]\
.groupby('Destination Country').sum()

# normalize
norm_import = (grouped_import / sum(grouped_import['Number of Cases']))

# adding an 'infected' column
norm_import['Infected'] = [bool(rand.randint(0, 1)) for i in range(len(norm_import))]
norm_import.columns = ['Relative Probability of Importation', 'Infected']
norm_import.sort_values('Relative Probability of Importation', ascending=True, inplace=True)

fig = px.bar(norm_import,x='Relative Probability of Importation', orientation='h', color='Infected',
      color_discrete_map={True: '#de3535', False: '#4266d4'},
            title="Relative Importation Plot")
fig.update_layout(yaxis_categoryorder = 'total ascending')
fig.update_layout(showlegend=False)

fig.show()

### Playing with Dash in my spare time

In [None]:
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.express as px
import plotly.graph_objects as go
import dash_bootstrap_components as dbc
from datetime import datetime

In [None]:
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

app.layout = dbc.Container([
    html.H1("Exportation Dashboard", className="text-center my-4"),

    # Country selector
    dbc.Row([
        dbc.Col([
            html.Label("Select Country"),
            dcc.Dropdown(
                id='country-dropdown',
                options=[{'label': country, 'value': country} for country in city_to_country['Country'].unique()],
                value=city_to_country['Country'].unique()[0],  # default value
                clearable=False
            )
        ], width=12)
    ], className="mb-4"),

    # Area plot
    dbc.Row([
        dbc.Col([
            dcc.Graph(id='export-graph')
        ], width=12)
    ], className="mb-4"),

    # Date picker and toggle switch in one row with cards for both
    dbc.Row([
        dbc.Col([
            dbc.Card([
                dbc.CardBody([
                    html.Label("Select Date Range"),
                    dcc.DatePickerRange(
                        id='date-picker-range',
                        start_date=datetime(2024, 9, 5),
                        end_date=datetime(2024, 9, 9),
                        display_format='YYYY-MM-DD'
                    )
                ])
            ], className="p-3")
        ], width=8),

        dbc.Col([
            dbc.Card([
                dbc.CardBody([
                    html.Label("Origin/Destination"),
                    dcc.RadioItems(
                        id='origin-destination-toggle',
                        options=[
                            {'label': 'Origin', 'value': 'origin'},
                            {'label': 'Destination', 'value': 'destination'}
                        ],
                        value='origin',  # default value
                        inline=True,
                        labelStyle={'margin-right': '10px'}  # Adds spacing between labels
                    )
                ])
            ], className="p-3")
        ], width=4)
    ], className="mb-4"),

    # Sankey diagram
    dbc.Row([
        dbc.Col([
            dcc.Graph(id='sankey-graph')
        ], width=12)
    ])
])

# Callbacks for updating graphs remain the same
@app.callback(
    Output('export-graph', 'figure'),
    [Input('country-dropdown', 'value')]
)
def update_area_plot(selected_country):
    country_export = weekly_importation.merge(city_to_country,
                                         left_on='Origin City',
                                         right_on='City',
                                         how='left')
    country_export = country_export[country_export['Country'] == selected_country]
    df_country_exports = country_export[['Origin City', 'Week', 'Number of Cases']]\
        .groupby(['Origin City', 'Week']).sum().reset_index()
    fig = px.area(df_country_exports, x='Week', y='Number of Cases', title=f'Number of Cases from {selected_country}', color='Origin City')
    return fig

@app.callback(
    Output('sankey-graph', 'figure'),
    [Input('country-dropdown', 'value'),
     Input('date-picker-range', 'start_date'),
     Input('date-picker-range', 'end_date'),
     Input('origin-destination-toggle', 'value')]
)
def update_sankey_diagram(selected_country, start_date, end_date, toggle):
    filtered_data = importation_data[(importation_data['Date'] >= start_date) & (importation_data['Date'] <= end_date)]
    sankey_data = filtered_data.merge(city_to_country, left_on='Origin City', right_on='City')
    sankey_data = sankey_data.rename(columns={'Country': 'Origin Country'})
    sankey_data = sankey_data.drop(columns=['City'])
    sankey_data = sankey_data.merge(city_to_country, left_on='Destination City', right_on='City')
    sankey_data = sankey_data.rename(columns={'Country': 'Destination Country'})
    sankey_data = sankey_data.drop(columns=['City'])

    if toggle == 'origin':
        sankey_data = sankey_data[sankey_data['Origin Country'] == selected_country]
    else:
        sankey_data = sankey_data[sankey_data['Destination Country'] == selected_country]

    sankey_data.drop(['Date', 'Week'], inplace=True, axis=1)
    sankey_data = sankey_data.groupby(['Origin Country', 'Destination Country']).sum().reset_index()

    countries = city_to_country['Country'].unique()
    country_indices = {country: i for i, country in enumerate(countries)}
    
    source = sankey_data['Origin Country'].map(country_indices)
    target = sankey_data['Destination Country'].map(country_indices)
    value = sankey_data['Number of Cases']

    fig = go.Figure(go.Sankey(
        node=dict(
            pad=15,
            thickness=20,
            line=dict(color='black', width=0.5),
            label=countries
        ),
        link=dict(
            source=source,
            target=target,
            value=value
        )
    ))

    return fig

if __name__ == '__main__':
    app.run_server(debug=True)

In [None]:
# !pip list