# Samples of charts uning `plotly`

The `plotly` charts can't been showing in Ipython files on Github or Gitlab. The result of these scripts are showing in other [report](./report.md)

### Imports

In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.io as pio 
import plotly.graph_objects as go
import sys

sys.version

'3.10.12 (main, Jun 11 2023, 05:26:28) [GCC 11.4.0]'

### Configs

In [2]:
persons_file = './datasets/persons.csv'
holidays_file = './datasets/holidays.csv'

images_output = './images'

labels_dict = {
    'gender': 'Gender',
    'height': 'Height (meters)',
    'weight_kg': 'Weigth (Kg)',
    'age': 'Age (years)', 
    'Pib per cápita': 'GDP per capita (Millon USD)',
    'Región': 'Region',
    'Dias Festivos/año': 'holidays by year',
    'Ranking felicidad': 'Happiness Ranking'
}

pio.templates.default = 'plotly_white'


## Persons

### Load data

A datasource with name, height, weight (in kilograms), gender and age. 

In [3]:
data = pd.read_csv(persons_file)

print("Shape: ", data.shape)
print("Fields: ", data.columns)

Shape:  (7, 5)
Fields:  Index(['name', 'height', 'weight_kg', 'gender', 'age'], dtype='object')


In [4]:
data.describe()

Unnamed: 0,height,weight_kg,gender,age
count,7.0,7.0,7.0,7.0
mean,1.782857,74.0,0.428571,34.714286
std,0.066261,5.066228,0.534522,3.352327
min,1.68,68.0,0.0,30.0
25%,1.745,71.5,0.0,32.5
50%,1.77,73.0,0.0,35.0
75%,1.84,75.0,1.0,37.0
max,1.86,84.0,1.0,39.0


In [5]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7 entries, 0 to 6
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   name       7 non-null      object 
 1   height     7 non-null      float64
 2   weight_kg  7 non-null      int64  
 3   gender     7 non-null      int64  
 4   age        7 non-null      int64  
dtypes: float64(1), int64(3), object(1)
memory usage: 408.0+ bytes


### Encoding label gender

In [6]:
data['gender'] = data['gender'].map({1:'Female', 0:'Male'})

### Plots

#### Heights and genders

In [7]:
fig = px.box(data, x='gender', y='height', color='gender', \
            title="Heights and genders", \
             labels=labels_dict)

output = fig.write_image(f"{images_output}/heights_and_genders.png")

#### Weight by age

In [17]:
fig = px.scatter(data, x='weight_kg', y='age', color='gender', \
                 labels=labels_dict,
                 title= 'Weight by age')
output = fig.write_image(f"{images_output}/weight_by_age.png")

#### Age average

Difference of every age from the mean of all ages (`age_diff_mean`)

In [9]:
age_mean = data['age'].mean()
data['age_diff_mean'] = round(data['age'] - age_mean,1)
data.head()

Unnamed: 0,name,height,weight_kg,gender,age,age_diff_mean
0,Jhon,1.85,76,Male,34,-0.7
1,Sophia,1.68,68,Female,38,3.3
2,Marian,1.74,71,Female,35,0.3
3,Louise,1.75,73,Female,31,-3.7
4,George,1.77,72,Male,30,-4.7


In [18]:
margin = 0.50
fig = go.Figure()
fig.add_trace(go.Bar(x=data['name'], y=data['age'], name="Ages"))

fig.add_shape(
    type='line',
    x0=margin - (2 * margin), 
    y0=age_mean,
    x1=len(data['name'])-margin, 
    y1=age_mean, 
    line=dict(color='black', width=1, dash='dash')
)

fig.update_layout(
    title="Ages relatives to mean [{} years]".format(round(age_mean, 1)) ,
    xaxis=dict(title='Persons'),
    yaxis=dict(title='Ages'),
    showlegend=False
)

output = fig.write_image(f"{images_output}/ages_relatives_to_mean.png")

## Holidays

### Load data

The holidays file is in spanish (holidays, festivos, feriados).

In [11]:
holidays = pd.read_csv(holidays_file)
print("Shape: ", holidays.shape)
print("Fields: ", holidays.columns)

Shape:  (22, 14)
Fields:  Index(['Pais', 'Región', 'Dias Festivos/año', 'Pib per cápita', '% Desempleo',
       'Muertes Vx100.000h', '% Informalidad', 'Ranking felicidad',
       'Puntaje felicidad', 'Ranking competitividad', 'puntaje competitividad',
       'Ranking doing business', 'Puntaje doing business', 'Expectativa vida'],
      dtype='object')


In [12]:
holidays.head()

Unnamed: 0,Pais,Región,Dias Festivos/año,Pib per cápita,% Desempleo,Muertes Vx100.000h,% Informalidad,Ranking felicidad,Puntaje felicidad,Ranking competitividad,puntaje competitividad,Ranking doing business,Puntaje doing business,Expectativa vida
0,Colombia,Sur America,18,14100,9.2,37.6,64.0,36,6537,61,4.3,54,70.4,74
1,Brasil,Sur America,12,15200,11.5,30.53,37.0,22,6635,81,4.06,116,57.7,75
2,Argentina,Sur America,16,20000,6.6,7.62,47.0,24,6599,104,3.81,121,56.8,76
3,Chile,Sur America,15,24100,6.6,3.88,33.0,20,6652,33,4.64,48,71.5,79
4,Venezuela,Sur America,14,13800,6.9,47.04,48.0,82,5250,130,3.27,186,35.5,74


### GDP _per capita_ by region

Is insterting to see that europe is the region that have the min distance between the mean and Q3, this means that the average of incomes is "close" to the roof of box, in all other regions the distance is grater. Unfortunelly, we haven't data from north América. 

In [19]:
fig = px.box(holidays, x='Región', y='Pib per cápita', color='Región', 
             title='GDP per capita by Region', labels=labels_dict)
fig.update_layout(title_font_family='Ubuntu')
output = fig.write_image(f"{images_output}/gdp_by_region.png")

### Holidays vs GDP/Year, by Region

The red line indicates the mean of each GDP by number of holidays. We can see that the best performance is between 14 and 16 holidays per year. 

In [14]:

gdp_mean = holidays.groupby('Dias Festivos/año').agg(gdp=('Pib per cápita', 'mean'))
gdp_mean.gdp = gdp_mean.gdp.astype(int)

In [20]:

fig = go.Figure()
fig.add_trace(go.Scatter(x = holidays['Dias Festivos/año'], \
                         y = holidays['Pib per cápita'], \
                        mode='markers', \
                        hoverinfo='all', \
                        text=holidays['Pais'])
        )

fig.add_trace(go.Scatter(x=gdp_mean.index, y=gdp_mean.gdp, name='GDP mean', text=gdp_mean.gdp, \
        mode='lines', line=dict(color='red', width=1, dash='dot')))


fig.update_layout(
    title="GDP of countries by number of holidays",
    xaxis=dict(title='Number of holidays'),
    yaxis=dict(title='GDP (US Million Dollars)'),
    showlegend=False
)

output = fig.write_image(f"{images_output}/gdp_by_numer_regions.png")

### Holidays 3D, GDP/Year, Happiness Ranking, by Region

In [21]:
fig = px.scatter_3d(holidays, 
                    x='Dias Festivos/año', 
                    y='Pib per cápita', 
                    z='Ranking felicidad',
                    color='Pais', labels=labels_dict, 
                 title='Holidays, GDP/Year, Happiness Ranking, by Region')
output = fig.write_image(f"{images_output}/holidays_happiness_rank.png")