# Mexico analysis

## Load libraries

In [None]:
import warnings
from functools import partial

import covid_analysis.utils.paths as path
import janitor
import joblib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pandas_flavor as pf
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
import prince
import seaborn as sns
from plotly.offline import init_notebook_mode


## Set defaults for plots

In [None]:
# matplotlib
plt.style.use("seaborn-whitegrid")
plt.rcParams["figure.figsize"] = (10, 8)

# seaborn
sns.set_style("whitegrid")

# plotly
init_notebook_mode()
pio.templates.default = "plotly_white"
pd.options.plotting.backend = "plotly"

# Some plot warninigs
warnings.filterwarnings("ignore")

## Load data

In [None]:
covid_mex_file = path.data_processed_dir("positive_covid_mex.csv")

In [None]:
covid_mex_df = pd.read_csv(
    filepath_or_buffer=covid_mex_file,
    parse_dates=["date_admission", "date_symptoms", "date_death"],
    date_parser=lambda x: pd.to_datetime(x, errors="coerce")
)

covid_mex_df.head(1)

## Calculate new data

### Death column indicator

In [None]:
covid_mex_df["survived"] = covid_mex_df.date_death.isna()
covid_mex_df["survived"] = np.where(covid_mex_df.survived, "Survived", "Death")

### Binned ages

In [None]:
bins = [x*4 for x in range(0,32)]
labels = ["{}-{}".format(start, end) for start, end in zip(bins[:-1], bins[1:])]
covid_mex_df["age_range"] = pd.cut(covid_mex_df.age, bins=bins, include_lowest=True, labels=labels)

In [None]:
covid_mex_df = covid_mex_df.sort_values("survived", ascending=False)

## Covid involvement by age and sex

In [None]:
(
    covid_mex_df
    .groupby(["survived", "sex"])
    .size()
    .reset_index(name="n")
    .pipe(
        lambda df: (
            px.sunburst(
                data_frame=df,
                path=["survived", "sex"],
                values="n",
                labels=dict(
                    sex="Sex",
                    n="Count",
                ),
                hover_name="survived",
                hover_data=dict(
                    survived=False,
                )
            )
        )
    )
)

In [None]:
(
    covid_mex_df
    .groupby(["sex", "age_range", "survived"])
    .size()
    .reset_index(name="count")
    .pipe(
        lambda df: (
            px.bar(
                data_frame=df,
                x="age_range",
                y="count",
                color="survived",
                facet_row="sex",
                labels=dict(
                    age_range="Age",
                    count="Confirmed cases",
                    survived="Status"
                ),
                category_orders=dict(
                    survived=["Survived", "Death"]
                ),
                log_y=True
            )
            .for_each_annotation(
                lambda a: a.update(text=a.text.split("=")[1])
            )
            .update_xaxes(tickangle=315)
            .update_traces(hovertemplate='Count: %{y} <br> Age: %{x}')
            .update_layout(
                legend=dict(
                    orientation="h",
                    yanchor="bottom",
                    y=1.02,
                    xanchor="right",
                    x=1
                ),
                title_x=0.5
            )
        )
    )
)

## Time from infection to death

In [None]:
(
    covid_mex_df
    .filter_on("survived == 'Death'")
    .assign(
        life_time=lambda df: (df.date_death - df.date_symptoms).dt.days
    )
    .filter_on("life_time > 0 and life_time < 100")
    .pipe(
        lambda df: (
            px.histogram(
                data_frame=df,
                x="life_time",
                color="sex",
                marginal="box",
                labels=dict(
                    life_time="Number of days",
                    count="Count",
                    sex="Sex"
                )
            )
            .update_traces(hovertemplate='Count: %{y} <br> Time before dying: %{x}')
            .update_layout(
                legend=dict(
                    orientation="h",
                    yanchor="bottom",
                    y=1.02,
                    xanchor="right",
                    x=1
                )
            )
        )
    )
)

In [None]:
(
    covid_mex_df
    .filter_on("survived == 'Death'")
    .assign(
        life_time=lambda df: (df.date_death - df.date_symptoms).dt.days
    )
    .filter_on("life_time > 0")
    .filter_on("icu in ['Yes', 'No']")
    .select_columns(["age_range", "icu", "life_time"])
    .groupby(["age_range", "icu"])
    .life_time
    .describe()
    [["count", "mean"]]
    .reset_index()
    .pipe(
        lambda df: (
            px.bar(
                data_frame=df,
                x="age_range",
                y="count",
                color="mean",
                facet_row="icu",
                labels=dict(
                    age_range="Age range",
                    count="Count",
                    mean="Mean lifetime",
                    icu="ICU"
                ),
                barmode="group"
            )
            .update_yaxes(matches=None, showticklabels=True)
            .update_xaxes(tickangle=315)
            .update_layout(
                legend=dict(
                    orientation="h",
                    yanchor="bottom",
                    y=1.02,
                    xanchor="right",
                    x=1
                )
            )
        )
    )
)

## Number of deaths per state and where they happened

In [None]:
(
    covid_mex_df
    .groupby(["state", "origin", "sector", "patient_type"])
    .size()
    .reset_index(name="n")
    .pipe(
        lambda df: (
            px.treemap(
                data_frame=df,
                path=[px.Constant("All"), "state", "origin", "sector", "patient_type"],
                values="n",
                labels=dict(
                    n="Number of deaths"
                )
            )
        )
    
    )

)

## Patien conditions for survival

In [None]:
yes_or_no = [
    'sex', 'intubated', 'pneumonia',
    'pregnancy', 'diabetes', 'epoc', 'asthma', 'immunosuppressed',
    'hypertension', 'other_comorbidity', 'cardiovascular', 'obesity',
    'chronic_kidney', 'smoking', 'icu', 'survived'
]

In [None]:
def iplot_coordinates(
    mca,
    X,
    x_component=1,
    y_component=2,
    show_row_points=False,
    show_column_points=True,
    highlight = None,
    text_size=10
):
    
    x, y = x_component - 1, y_component - 1
    color, text = None, None
    
    if show_row_points:
        df = (
            mca.
            row_coordinates(X)[[x_component, y_component]])
        
        if highlight:
            df[highlight] = X[highlight]
            color = highlight
    else:
        df = (
            mca.
            column_coordinates(X)[[x_component, y_component]].
            reset_index().
            rename(columns={"index": "full_text"}).
            assign(
                full_text=lambda x: x.full_text.str.split("_"),
                Category =lambda x: x.full_text.apply(lambda y: y[0]),
                Values = lambda x: x.full_text.apply(lambda y: "_".join(y[1:]))
            )
        )
        
        color = "Category"
        text = "Values"
        
    df.rename(columns={x_component:"x", y_component:"y"}, inplace=True)
    
    fig = px.scatter(
        data_frame=df,
        x="x",
        y="y",
        color=color,
        text=text,
        labels = {
            "x": "Component {} ({:.2f}% intertia)".format(x_component, mca.explained_inertia_[x_component] * 100),
            "y": "Component {} ({:.2f}% intertia)".format(y_component, mca.explained_inertia_[y_component] * 100)
        }
    )
    
    fig.update_traces(textposition="top right")
    fig.update_layout(
        font=dict(size=text_size),
        shapes=[
            dict(
                type="line",
                yref="paper", y0=0, y1=1,
                xref="x", x0=0, x1=0
            ),
            dict(
                type="line",
                yref="y", y0=0, y1=0,
                xref="paper", x0=0, x1=1
            ),
            
        ]
    )    
    return fig 

In [None]:
X = (
    covid_mex_df
    .select_columns(yes_or_no)
)

mca = prince.MCA(n_components=4, random_state=42)
mca = mca.fit(X)
mca

In [None]:
import functools

In [None]:
x_component, y_component = 1, 2
plot_mca = functools.partial(iplot_coordinates, mca=mca, X=X, x_component=x_component, y_component=y_component)
plot_mca(show_row_points=False)

In [None]:
(
    covid_mex_df
    .filter_on("survived == 'Death'")
    .pipe(
        lambda df: (
            px.parallel_categories(
                data_frame=df,
                dimensions=["icu", "chronic_kidney", "intubated", "pneumonia", "epoc", "cardiovascular"],
                labels=dict(
                    icu="ICU",
                    chronic_kidney="Chronic kidney",
                    intubated="Intubated",
                    pneumonia="Pneumonia",
                    epoc="EPOC",
                    cardiovascular="Cardiovascular"
    
                )
            )
            .update_traces(hoveron="color", hoverinfo="count+probability")
        )
    )
)