## This notebook will utilize the dynamic and interactive library [Plotly](https://plotly.com/)

-> During wesbite development, [Docpane](https://docs.datapane.com/) will be used to display the interactive plots in the website

In [129]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
%matplotlib inline
import scipy as sp

import plotly.express as px
import plotly.graph_objs as go
import plotly.io as pio
import plotly.offline as pyo

In [130]:
df = pd.read_csv('../data/raw/cosmetics.csv')
df.head()

Unnamed: 0,Label,Brand,Name,Price,Rank,Ingredients,Combination,Dry,Normal,Oily,Sensitive
0,Moisturizer,LA MER,Crème de la Mer,175,4.1,"Algae (Seaweed) Extract, Mineral Oil, Petrolat...",1,1,1,1,1
1,Moisturizer,SK-II,Facial Treatment Essence,179,4.1,"Galactomyces Ferment Filtrate (Pitera), Butyle...",1,1,1,1,1
2,Moisturizer,DRUNK ELEPHANT,Protini™ Polypeptide Cream,68,4.4,"Water, Dicaprylyl Carbonate, Glycerin, Ceteary...",1,1,1,1,0
3,Moisturizer,LA MER,The Moisturizing Soft Cream,175,3.8,"Algae (Seaweed) Extract, Cyclopentasiloxane, P...",1,1,1,1,1
4,Moisturizer,IT COSMETICS,Your Skin But Better™ CC+™ Cream with SPF 50+,38,4.1,"Water, Snail Secretion Filtrate, Phenyl Trimet...",1,1,1,1,1


### Circos graph used to visualize the relationship between the high end brand companies that utilize

In [131]:
# c = circos.Circos()
# c.track("data", "../references/clean_products.txt")
# c.plot("circos.png", dpi=150)

### First we need to create a mini dataframe that stores all of the categories (low, moderate, high) using the generated txt [files](../references) 

In [132]:
#LOW
low_df = pd.read_csv('../references/low_hazard_products.txt', header=None)
low_df['Toxin'] = low_df[0].str.extract(r'(No|Oxybenzone|Resorcinol|Formaldehyde|Diethanol|Silane|Siloxane|Octinaxate)', expand=True)
low_df['Product'] = low_df[0].str.extract(r'\d+ - (.*)', expand=True)
low_df = low_df.drop(0, axis=1)

# Replacing all "NaN" values with the correct Toxin name
low_df.loc[0, 'Toxin'] = "Ethanolamine"
low_df.loc[2:13,'Toxin'] = "Oxybenzone"
low_df.loc[15, 'Toxin'] = "Ethanolamine"
low_df.loc[16, 'Toxin'] = "Formaldehyde"
low_df.loc[17, 'Toxin'] = "Diethanol"
low_df.loc[19:67,'Toxin'] = "Silane"
low_df.loc[69:71,'Toxin'] = "Silane"
low_df.loc[72, 'Toxin'] = "Octinaxate"

# Removing all unrelated 'NaN' values
low_df = low_df.drop([2,14,18,68], axis=0)

# Assigning the empty cells in 'Product' column to 'No products found'
low_df['Product'].fillna('No products found', inplace=True)

low_df.to_csv('../data/interim/low_hazard_data.csv')

low_df.head(10)

Unnamed: 0,Toxin,Product
0,Ethanolamine,No products found
1,Oxybenzone,No products found
3,Oxybenzone,Camera Ready CC Cream Broad Spectrum SPF 30 Da...
4,Oxybenzone,Lingerie de Peau BB Cream
5,Oxybenzone,Exfoliating Scrub
6,Oxybenzone,Hydra Life BB Creme Broad Spectrum SPF 30
7,Oxybenzone,The Broad Spectrum SPF 50 UV Protecting Fluid
8,Oxybenzone,Ultimate Sun Protection Spray Broad Spectrum S...
9,Oxybenzone,Broad Spectrum SPF 50 Sunscreen Face Cream
10,Oxybenzone,DayWear UV Base Advanced Anti-Oxidant & UV Def...


In [133]:
#MODERATE
mod_df = pd.read_csv('../references/mod_hazard_products.txt', header=None, sep='\t')
mod_df['Toxin'] = mod_df[0].str.extract(r'(No|Fragrance|Octinoxates|Homosalate|Teflon)', expand=True)
mod_df['Product'] = mod_df[0].str.extract(r'\d+ - (.*)', expand=True)
mod_df = mod_df.drop(0, axis=1)

mod_df.loc[0:132, 'Toxin'] = "Parfum"
mod_df.loc[134:633,'Toxin'] = "Fragrance"
mod_df.loc[133,'Toxin'] = "Fragrance"
mod_df.loc[636:639, 'Toxin'] = "Triclosan"
mod_df.loc[640:656, 'Toxin'] = "Homosalate"

mod_df = mod_df.drop([1,132,634, 635, 653], axis=0)

mod_df['Product'].fillna('No products found', inplace=True)

mod_df.to_csv('../data/interim/mod_hazard_data.csv')

mod_df.head(10)

Unnamed: 0,Toxin,Product
0,Parfum,No products found
2,Parfum,Benefiance WrinkleResist24 Night Cream
3,Parfum,Goodnight Glow Retin-ALT Sleeping Crème
4,Parfum,Beauty Elixir
5,Parfum,The Silk Cream
6,Parfum,Vinosource Moisturizing Sorbet
7,Parfum,Luminous Dewy Skin Night Concentrate
8,Parfum,Seaberry Moisturizing Face Oil
9,Parfum,Renewed Hope in A Jar Refreshing & Refining Mo...
10,Parfum,Cold Plasma Sub-D Firming Neck Treatment


In [134]:
#HIGH
high_df = pd.read_csv('../references/high_hazard_products.txt', header=None, sep='\t')
high_df['Toxin'] = high_df[0].str.extract(r'(No|Fragrance|Octinoxates|Homosalate|Teflon)', expand=True)
high_df['Product'] = high_df[0].str.extract(r'\d+ - (.*)', expand=True)
high_df = high_df.drop(0, axis=1)

high_df.loc[0, 'Toxin'] = "Talc"
high_df.loc[1:29, 'Toxin'] = 'Talc'
high_df.loc[32:188,'Toxin'] = "Propylene Glycol"

high_df = high_df.drop([0,30,31], axis=0)

high_df['Product'].fillna('No products found', inplace=True)

high_df.to_csv('../data/interim/high_hazard_data.csv')

high_df.head(10)

Unnamed: 0,Toxin,Product
1,Talc,CC Crème High Definition Radiance Face Cream S...
2,Talc,Bio-Performance Advanced Super Restoring Cream
3,Talc,Skin Perfecting Lotion - Blemish Prone/Oily Skin
4,Talc,Bio-Performance Glow Revival Cream
5,Talc,BB Crème au Ginseng
6,Talc,Black Label Detox BB Beauty Balm
7,Talc,Bio-Performance LiftDynamic Cream
8,Talc,Ibuki Beauty Sleeping Mask
9,Talc,Benefiance Extra Creamy Cleansing Foam
10,Talc,Pureness Deep Cleansing Foam


## Toxic dataframe
Combine all three toxicities to form a combined dataframe with low, moderate, and high hazard products

In [135]:
font_paths = {
    "Redaction": '../src/fonts/redaction-cufonfonts/Redaction-mL8P5.otf',
    "Redaction_Italics" : '../src/fonts/redaction-cufonfonts/Redaction50Italic-2ODWo.otf',
    "Redaction_Bold" : '../src/fonts/redaction-cufonfonts/Redaction50Bold-vmXRL.otf',
    "Redaction_Blur": "../src/fonts/redaction-cufonfonts/Redaction50-8Mg4n.otf",
    "Playfair_Display" : '../src/fonts/PlayfairDisplay-Regular.ttf'
}

def get_font_prop(font_name):
    font_path = font_paths[font_name]
    return matplotlib.font_manager.FontProperties(fname=font_path)

### [Sunburst Plot](https://plotly.com/python/sunburst-charts/#basic-sunburst-plot-with-plotlyexpress) of the three different categories of hazardness
- Low
- Moderate
- High

In [136]:
# read in the three dataframes
low_hazard_data = pd.read_csv('../data/interim/low_hazard_data.csv')
mod_hazard_data = pd.read_csv('../data/interim/mod_hazard_data.csv')
high_hazard_data = pd.read_csv('../data/interim/high_hazard_data.csv')

# concatenate the three dataframes into a single dataframe
df = pd.concat([low_hazard_data, mod_hazard_data, high_hazard_data])
df.head()
# create the sunburst chart
# fig = px.sunburst(
#     df,
#     names='toxic_category',
#     parents='toxic_chemical',
#     values='Product'
# )

# fig.show()

Unnamed: 0.1,Unnamed: 0,Toxin,Product
0,0,Ethanolamine,No products found
1,1,Oxybenzone,No products found
2,3,Oxybenzone,Camera Ready CC Cream Broad Spectrum SPF 30 Da...
3,4,Oxybenzone,Lingerie de Peau BB Cream
4,5,Oxybenzone,Exfoliating Scrub


### Cleanup the concated df

- [ ] rename the `Unamed: 0` column to Index Number from ref
- [ ] re-number the first column to go in increasing numerical order
- [ ] remove "No products found"

In [137]:
df = df.sort_values(by='Unnamed: 0') 
df = df[df['Product'].str.contains('No products found') == False]
df = df.reset_index(drop=True)  
df = df.rename(columns={'Unnamed: 0': 'Index Number Ref from Raw Data'}) 

df.head()

# total_rows = df.shape[0]
# print("Total number of rows:", total_rows)


Unnamed: 0,Index Number Ref from Raw Data,Toxin,Product
0,1,Talc,CC Crème High Definition Radiance Face Cream S...
1,2,Parfum,Benefiance WrinkleResist24 Night Cream
2,2,Talc,Bio-Performance Advanced Super Restoring Cream
3,3,Oxybenzone,Camera Ready CC Cream Broad Spectrum SPF 30 Da...
4,3,Parfum,Goodnight Glow Retin-ALT Sleeping Crème


### [Bar Charts](https://plotly.com/python/bar-charts/) of the three different categories of hazardness
- Low
- Moderate
- High

In [138]:
layout = go.Layout(
    title='Toxins and Products',
    template='plotly_dark',
    xaxis=dict(title='Toxin', tickangle=-45, automargin=True),
    yaxis=dict(title='Product'),
    font=dict(family="Redaction")
    # margin=dict(l=50, r=50, t=80, b=50)
)

data = [
    go.Bar(
        x=df['Toxin'],
        y=df['Product'],
        marker=dict(color='#db93b0')
    )
]

fig = go.Figure(data=data, layout=layout)

pio.show(fig)

pio.write_html(fig, file='../reports/dynamic_visuals/low_hazard_bar.html', auto_open=False)

# Low Hazard Interactive Chart

In [139]:
# low_df = df[df['Product'] != 'No products found']
layout = go.Layout(
    title='Low Hazard Toxins & Products',
    template='plotly_dark',
    xaxis=dict(title='Toxin', tickangle=-45, automargin=True),
    yaxis=dict(title='Product'),
    font=dict(family="Redaction"),
    margin=dict(l=50, r=50, t=80, b=50)
)

data = [
    go.Bar(
        x=low_df['Toxin'],
        y=low_df['Product'],
        marker=dict(color='#ebff00')
    )
]

fig = go.Figure(data=data, layout=layout)

# fig.update_layout(width=800, height=600)

pio.show(fig)

pio.write_html(fig, file='../reports/dynamic_visuals/low_hazard_bar.html', auto_open=False)


# Moderate Hazard Interactive Chart

In [140]:
# low_df = df[df['Product'] != 'No products found']
layout = go.Layout(
    title='Moderate Hazard Toxins & Products',
    template='plotly_dark',
    xaxis=dict(title='Toxin', tickangle=-45, automargin=True),
    yaxis=dict(title='Product'),
    font=dict(family="Redaction"),
    margin=dict(l=50, r=50, t=80, b=50)
)

data = [
    go.Bar(
        x=mod_df['Toxin'],
        y=mod_df['Product'],
        marker=dict(color='#ff8a00')
    )
]

fig = go.Figure(data=data, layout=layout)

# fig.update_layout(width=800, height=600)

pio.show(fig)

pio.write_html(fig, file='../reports/dynamic_visuals/moderate_hazard_bar.html', auto_open=False)

# High Hazard Interactive Chart

In [141]:
# low_df = df[df['Product'] != 'No products found']
layout = go.Layout(
    title='High Hazard Toxins & Products',
    template='plotly_dark',
    xaxis=dict(title='Toxin', tickangle=-45, automargin=True),
    yaxis=dict(title='Product'),
    font=dict(family="Redaction"),
    margin=dict(l=50, r=50, t=80, b=50)
)

data = [
    go.Bar(
        x=mod_df['Toxin'],
        y=mod_df['Product'],
        marker=dict(color='#ff0000')
    )
]

fig = go.Figure(data=data, layout=layout)

pio.show(fig)

pio.write_html(fig, file='../reports/dynamic_visuals/high_hazard_bar.html', auto_open=False)

### [Line Chart](https://plotly.com/python/line-charts/) of the variety of the categories in the df
- Product Type
- Skin Type
- Brand

## Does geographic regulations affect the number of toxins in the products?
Use a map to pin the origin locations of the brands to the relative area and make the bubble as a key (smallest to largest) of how many toxic products they hold.

In [142]:
import plotly.express as px

# Filter the data to get the desired year
df = px.data.gapminder().query("year==2007")

# Create a custom color scale using grayscale values
color_scale = px.colors.sequential.Greys[::-1]  # Reverse the grayscale color scale

# Create the bubble map
fig = px.scatter_geo(df, locations="iso_alpha", color="continent",
                     hover_name="country", size="pop",
                     projection="natural earth",
                     color_continuous_scale=color_scale)

fig.update_traces(hovertemplate="<b>%{hovertext}</b><br>Toxic Products: %{marker.size:,}<extra></extra>")

# Customize the figure layout
fig.update_layout(
    title="Toxin Levels by Geographic Region",
    template="plotly_dark",
    font=dict(family="Redaction"),
    plot_bgcolor="black",
    paper_bgcolor="black",
    geo=dict(
        bgcolor="black",
        showframe=False,
        showcoastlines=False,
        landcolor="white",
        oceancolor="white",
    )
)

# Display the plot
fig.show()


## Brand Rating vs. the Number of Toxins
Map the best brand rating overall with the number of toxins they carry