# Plotly

In [2]:
# Importing libraries
import pandas as pd
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go


In [3]:
# Now if there are any warning messages, we will just hide them, won't show anything on the screen.
import warnings
warnings.filterwarnings('ignore')


In [6]:
df = sns.load_dataset('titanic')
df

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.2500,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.9250,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.0500,S,Third,man,True,,Southampton,no,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,13.0000,S,Second,man,True,,Southampton,no,True
887,1,1,female,19.0,0,0,30.0000,S,First,woman,False,B,Southampton,yes,True
888,0,3,female,,1,2,23.4500,S,Third,woman,False,,Southampton,no,False
889,1,1,male,26.0,0,0,30.0000,C,First,man,True,C,Cherbourg,yes,True


In [10]:
print(df.shape)
df.info()

(891, 15)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 15 columns):
 #   Column       Non-Null Count  Dtype   
---  ------       --------------  -----   
 0   survived     891 non-null    int64   
 1   pclass       891 non-null    int64   
 2   sex          891 non-null    object  
 3   age          714 non-null    float64 
 4   sibsp        891 non-null    int64   
 5   parch        891 non-null    int64   
 6   fare         891 non-null    float64 
 7   embarked     889 non-null    object  
 8   class        891 non-null    category
 9   who          891 non-null    object  
 10  adult_male   891 non-null    bool    
 11  deck         203 non-null    category
 12  embark_town  889 non-null    object  
 13  alive        891 non-null    object  
 14  alone        891 non-null    bool    
dtypes: bool(2), category(2), float64(2), int64(4), object(5)
memory usage: 80.7+ KB


In [14]:
# We will remove any row from the DataFrame where all the values are missing (NaN).
# how='all' only remove rows where every single value is missing
# inplace=True makes the change directly to the original df
df.dropna(how='all', inplace = True)


## Survival Count

In [24]:
# Making a histogram of the distribution of survived
# use color_discrete_sequence for cours of your own choice
fig = px.histogram(df, x= 'survived',
                   title = 'Survived Distribution',
                   color = 'survived',
                   color_discrete_sequence=['green', 'blue'] )

# Show histogram
fig.show()


In [25]:
# We can also use color only and then specify the catagory, this will set the colour by default
fig = px.histogram(df, x='survived', title='Survival Count (0 = No, 1 = Yes)', color='survived')

# Show histogram
fig.show()


## Gender Distribution

In [26]:
# Making a pie chart for distribution of gender
fig = px.pie(df, names = 'sex', title = 'Gender Distribution')

# Show pie chart
fig.show()


## Survival Rate by Gender

In [31]:
# barmode = group - incase of multiple bars for the same category, it shows them side by side instead of stacking them.
fig = px.histogram(df, x='sex', y= 'survived',
                   title='Survival Rate by Gender',
                   color = 'sex',
                   barmode = 'group')

# Show histogram
fig.show()

## Passenger Class Distribution

In [36]:
# Making donut chart
fig = px.pie(df, names= 'class', title = 'Passenger Cass Distribution', hole = 0.4)

# Show donut chart
fig.show()

## Survival by Passenger Class

In [45]:
# Making a histogram of survival by passenger class
fig = px.histogram(df, x= 'class', y= 'survived',
                   title= ' Survival by Passenger Class',
                   barmode = 'group',
                   color = 'class')

# Show histogram
fig.show()


## Age Distribution

In [50]:
# Making a histogram for age distribution
fig = px.histogram(df, x= 'age', title = 'Age Distribution', nbins = 35)

# Show pie chart
fig.show()


## Age vs Fare

In [53]:
# Making a scatterplot for age vs fare
fig = px.scatter(df, x='age', y='fare', title = 'Age vs Fare', color = 'age')

# Show scatterplot
fig.show()


##  Age vs Fare ( Color by Survival )

In [56]:
# Making a scatterplot for age vs fare (color by survival)
fig = px.scatter(df, x='age', y='fare', title = 'Age vs Fare By Survival', color = 'survived')

# Show scatterplot
fig.show()


## Age vs Fare (Color by Class and Symbol)

In [59]:
# Making a scatterplot for age vs fare ( Color by class)
fig = px.scatter(df, x='age', y='fare', title = 'Age vs Fare By Class And Symbol', symbol = 'sex', color = 'class')

# Show scatterplot
fig.show()


## Box Plot Of Fare By Class

In [62]:
# Making a boxplot for Fare by class
fig = px.box(df, x= 'class', y='fare', title = 'Box Plot Of Fare By Class', color = 'class' )

# Show box plot
fig.show()


## Box Plot Of Age By Gender

In [65]:
# Creating a box plot of Age by gender
fig = px.box(df, x= 'sex', y = 'age', title = 'Box Plot Of Age By Gender', color = 'sex')

# Show box plot
fig.show()


##  Survival By Embark Town

In [70]:
# Making histogram for the Survival By Embark Town
fig = px.histogram(df, x= 'embark_town', y ='survived',
                  title = 'Survival By Embark Town',
                  color = 'embark_town',
                  barmode = 'group')

# Showing histogram
fig.show()


## Heatmap: Correlation
Correlation is a number between -1 and 1 that shows how related two columns are:

- +1 → very strongly positively related

- -1 → very strongly negatively related

- 0 → no relation

In [74]:
# This keeps only the columns with numbers (no text). These are the things we want to compare.
# dropna() drops all the null value rows in them

numeric = df[['age', 'fare', 'pclass', 'parch', 'sibsp', 'survived']].dropna()

# .corr() calculates the correlation between each pair of columns.
# px.imshow(...) creates a heatmap (a grid of colored squares).
# text_auto=True shows the correlation numbers inside the squares.
fig = px.imshow(numeric.corr(), text_auto = True, title='Correlation Heatmap')

# Show heatmap
fig.show()


## Bar Chart : Average Fare By Class




In [78]:
# Making a bar chart for Average Fare By class

avg = df.groupby('class', as_index = False)['fare'].mean()
fig = px.bar(avg, x= 'class', y='fare', title = 'Average Fare By Class', color = 'class')

# Show bar chart
fig.show()


## Survival By Age Bins

In [87]:
# Splitting the ages into ranges and turn those ranges into text.

df['age_bin'] = pd.cut(df['age'], bins=[0, 12, 20, 40, 60, 80]).astype(str)
fig = px.histogram(df, x ='age_bin', title = 'Survival By Age Bins', color = 'survived', barmode = 'group')

# Show histogram
fig.show()


## Strip plot: Age vs Survival by Gender
A strip plot shows dots (points) for individual data values — usually spread out along a line.

It's like a scatter plot, but used for categories.

In [88]:
# Making Strip plot for Age vs Survival by Gender
fig = px.strip(df, x='survived', y = 'age',
               color = 'sex',
               title = "Strip plot: Age vs Survival by Gender")

# Showing strip plot
fig.show()


## Sunburst Chart
A sunburst chart is a circular chart that shows data in levels — like layers of a tree.

- The center is the main category

- Each ring outside the center shows subcategories

- It looks like a sun with rays or slices

In [89]:
# Making a sunburst Chart
fig = px.sunburst(df,  path=['sex', 'class', 'survived'],
                  title = "Sunburst Chart",
                  color = "survived")

# Showing sunburst chart
fig.show()


## Animated Scatter Plot

In [90]:
# Creating an animated Scatterplot of age vs fare
# animation_frame='class' create an animation frame that shows one chart per passenger class (First, Second, Third)
# size ='fare' shows the dot size that how much the person paid for the ticket
# hover_name='sex' When you hover on a dot, it shows the person’s gender
# range_x=[0, 80]	Fix the x-axis from age 0 to 80
# range_y=[0, 550]	Fix the y-axis from fare 0 to 550

fig = px.scatter(
    df,
    x='age',
    y='fare',
    color='survived',
    animation_frame='class',
    title='Age vs Fare Animated by Passenger Class',
    size='fare',
    hover_name='sex',
    range_x=[0, 80],
    range_y=[0, 550]
)
fig.show()



## Animated Bar Chart

In [103]:
# Bar chart for Survival by Embark Town Animated by Class
fig = px.bar(
    df,
    x='embark_town',
    color='survived',
    animation_frame='class',
    barmode='group',
    title='Survival by Embark Town Animated by Class',
)

# Show bar chart
fig.show()


##  Mapping Embark Towns to Coordinates

In [124]:
# This makes a table df with 1 col, embark_town  with 3 towns
df = pd.DataFrame({'embark_town': ['Southampton', 'Cherbourg', 'Queenstown']})

# Making dictionary for longitude and latitude of each town
locations = {
    'Southampton': (50.9097, -1.4044),
    'Cherbourg': (49.6398, -1.6167),
    'Queenstown': (51.85, -8.294)
}

# .map() looks up the coordinates in the dictionary
# locations[x][0] = latitude, locations[x][1] = longitude. These are saved as new columns: lat and lon
df['lat'] = df['embark_town'].map(lambda x: locations[x][0])
df['lon'] = df['embark_town'].map(lambda x: locations[x][1])

# Plot map
fig = px.scatter_geo(df, lat='lat', lon='lon', hover_name='embark_town',
                     height=400,
                     title = "Embark Town with Coordinates",
                     projection='natural earth',)

# Show map
fig.show()
