## Import all necessary packages

In [10]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

In [11]:
## Import data from csv file

In [12]:
penguins = pd.read_csv('../data/penguins.csv')

## Check Data

In [13]:
print(penguins.head())
print(penguins.columns)
print(penguins.shape)

   Unnamed: 0 studyName  Sample Number                              Species  \
0           1   PAL0708              1  Adelie Penguin (Pygoscelis adeliae)   
1           2   PAL0708              2  Adelie Penguin (Pygoscelis adeliae)   
2           3   PAL0708              3  Adelie Penguin (Pygoscelis adeliae)   
3           4   PAL0708              4  Adelie Penguin (Pygoscelis adeliae)   
4           5   PAL0708              5  Adelie Penguin (Pygoscelis adeliae)   

   Region     Island               Stage Individual ID Clutch Completion  \
0  Anvers  Torgersen  Adult, 1 Egg Stage          N1A1               Yes   
1  Anvers  Torgersen  Adult, 1 Egg Stage          N1A2               Yes   
2  Anvers  Torgersen  Adult, 1 Egg Stage          N2A1               Yes   
3  Anvers  Torgersen  Adult, 1 Egg Stage          N2A2               Yes   
4  Anvers  Torgersen  Adult, 1 Egg Stage          N3A1               Yes   

     Date Egg  Culmen Length (mm)  Culmen Depth (mm)  Flipper Length

In [14]:
# Set up the color map
color_map = {'Adelie': 'rgb(235, 52, 52)' , 'Gentoo': 'rgb(235, 149, 52)', 'Chinstrap': 'rgb(67, 52, 235)'}

In [20]:
# Create a scatterplot
fig = px.scatter(data_frame=penguins, title="Penguin Culmen Statistics",
    x='Culmen Length (mm)',
    y='Culmen Depth (mm)',
    # Set the colors to use your color map
    color='Species',
    color_discrete_map=color_map,
    # Add columns to the hover information
    hover_data=['Culmen Length (mm)','Culmen Depth (mm)', 'Species'],
    # Add bold variable in hover information
    hover_name= 'Island'             
)

# Show your work
fig.show()

## Create a new data frame with attributes Culmen Length (mm), Culmen Depth (mm), Flipper Length (mm) and Body Mass (g)

In [16]:
penguines_modified = penguins[['Culmen Length (mm)', 'Culmen Depth (mm)', 'Flipper Length (mm)' , 'Body Mass (g)']]
print(penguines_modified.head())

   Culmen Length (mm)  Culmen Depth (mm)  Flipper Length (mm)  Body Mass (g)
0                39.1               18.7                181.0         3750.0
1                39.5               17.4                186.0         3800.0
2                40.3               18.0                195.0         3250.0
3                 NaN                NaN                  NaN            NaN
4                36.7               19.3                193.0         3450.0


## Create a correlation table with pandas

In [17]:
penguin_corr = penguines_modified.corr(method='pearson')
print(penguin_corr)

                     Culmen Length (mm)  Culmen Depth (mm)  \
Culmen Length (mm)             1.000000          -0.235053   
Culmen Depth (mm)             -0.235053           1.000000   
Flipper Length (mm)            0.656181          -0.583851   
Body Mass (g)                  0.595110          -0.471916   

                     Flipper Length (mm)  Body Mass (g)  
Culmen Length (mm)              0.656181       0.595110  
Culmen Depth (mm)              -0.583851      -0.471916  
Flipper Length (mm)             1.000000       0.871202  
Body Mass (g)                   0.871202       1.000000  


In [18]:
# Set up the correlation plot
fig = go.Figure(go.Heatmap(
  		# Set the appropriate x, y and z values
        z=penguin_corr.values.tolist(),
        x=penguin_corr.columns,
        y=penguin_corr.columns,
  		# Set the color scale,
        colorscale='rdylgn', 
  		# Set min and max values
        zmin=-1, zmax=1))

# Show the plot
fig.show()

## Create a new dataframe with average flip length

In [34]:
penguines_avg_flipper = penguins.groupby('Species', as_index=False)['Flipper Length (mm)'].mean()
penguines_avg_flipper = penguines_avg_flipper.rename(columns={'Species': 'spec', 'Flipper Length (mm)': 'av_flip_length'})
print(penguines_avg_flipper.head())


                                        spec  av_flip_length
0        Adelie Penguin (Pygoscelis adeliae)      189.953642
1  Chinstrap penguin (Pygoscelis antarctica)      195.823529
2          Gentoo penguin (Pygoscelis papua)      217.186992


In [36]:
# Create timestamp
from datetime import datetime
timestamp = datetime.now()

# Create plot
fig = px.bar(penguines_avg_flipper, x='spec', y='av_flip_length', color="spec", title='Flipper Length (mm) by Species')

# Change the axis titles
fig.update_layout({'xaxis': {'title': {'text': 'Species'}},
                  'yaxis': {'title': {'text': 'Average Flipper Length (mm)'}}})

# Add an annotation and show
fig.update_layout({'annotations': [{
  "text": f"This graph was generated at {timestamp}", 
  "showarrow": False, "x": 0.5, "y": 1.1, "xref": "paper", "yref": "paper"}]})
fig.show()