# Lab 5 Data Visualisation on Python with Plotly

## 1. Import data

In [1]:
import plotly.express as px
import plotly.graph_objects as go
import plotly.subplots as sp
import plotly.figure_factory as ff
import pandas as pd
# to run in colab
# !git clone https://github.com/sigord/data_visualisation
# %cd /content/data_visualisation/LR5
df = pd.read_excel('lab5.xlsx')

## 2. Using plotly.express, construct a pie chart showing the proportion of respondents with different social status (status field).

In [2]:
s1 = df['status'].groupby(df['status']).count()
px.pie(s1, values='status', names=s1.index, title='Social status')

## 3. Using plotly.express, construct a scatter plot showing the respondent's income in relation to their age.
3.1. Change the name of the X-axis to "Age, full number of years".

In [3]:
px.scatter(df, x='age', y='income', title='Income in relation to age', labels={'age': 'Age, full number of years'}, color='sex')

## 4. Using plotly.express, construct a box plot showing the respondent's income as a function of their gender.
4.1 Add a name to the chart

In [4]:
px.box(df, x='sex', y='income', title='Income in relation to gender', color='sex')

## 5. Using plotly.express, construct a histogram showing the distribution of age of respondents.
5.1 Change the number of intervals to 21

In [5]:
px.histogram(df, x='age', nbins=21, title='Age distribution')

## 6. Using plotly.express construct a sunburst chart, showing the distribution of the income amount within sex and social status (first, find the sum of income for all respondents within each combination of sex and social status).

In [6]:
px.sunburst(df, path=['sex', 'status'], values='income')

## 7. Using plotly.graph_objects, construct a heat map showing the average number of flights per year for country and gender of respondent.
7.1. change the colour palette of the map

In [7]:
df7 = df.groupby(['geo', 'sex'])['flights'].mean().reset_index()
df7 = df7.pivot('geo', 'sex', 'flights')
fig = go.Figure(data=go.Heatmap(z = df7.values, x = df7.columns, y = df7.index, 
                                colorscale='Viridis', colorbar=dict(title='Average number of flights')))
fig.show()

## 8. Using plotly.graph_objects, construct a chart showing the change in the number of flights and income as a function of the respondent's age (linear graph with two axes).

In [8]:
df9 = df.groupby('age')[['income', 'flights']].mean().reset_index()
fig = sp.make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(go.Scatter(x=df9['age'], y=df9['flights'], name='Number of flights'), secondary_y=False)
fig.add_trace(go.Scatter(x=df9['age'], y=df9['income'], name='Income'), secondary_y=True)
fig.update_yaxes(title_text="Flights", secondary_y=False)
fig.update_yaxes(title_text="Income", secondary_y=True)
fig.update_layout(title_text='Number of flights and income as a function of age',
                  width=700, height=400, title_x=0.5)
fig.show()

## 9. Construct a grid of size 2 rows x 2 columns, and inscribe three graphs into it (the first graph takes up the entire first column, the other two graphs take up two rows in the second column). The first graph is the kernel of income density, the second graph is the number of people of different sexes in the pie chart, the third graph is the dependence of the number of flights on age (scatter plot)

In [9]:
fig = sp.make_subplots(rows=2, cols=2, specs=[[{"rowspan": 2}, {"type": "pie"}], [None, {"type": "scatter"}]], column_widths=[0.6, 0.4])
fig.update_layout(width=800, height=400, title_x=0.5)
# The first graph is the kernel of income density
fig1 = ff.create_distplot(hist_data=[df.income], group_labels=['income'], bin_size=1000, show_rug=False)
fig.add_trace(go.Histogram(fig1['data'][0], showlegend=False), row=1, col=1,)
fig.add_trace(go.Scatter(fig1['data'][1], line=dict(color='blue', width=1), showlegend=False), row=1, col=1)
fig.update_xaxes(title_text ='Income', tickmode='linear', tick0=0, dtick=10000, row=1, col=1)
fig.update_yaxes(title_text ='Density', row=1, col=1)
# Gender pie chart
df9 = df.groupby(['sex'], as_index=False)['age'].count().rename(columns={'age': 'gender_count'})
fig.add_trace(go.Pie(labels=df9.sex, values=df9.gender_count, showlegend=True), row=1, col=2)
# The third graph is the dependence of the number of flights on age (scatter plot)
df9_2 = df.groupby(['age'])['flights'].mean().reset_index()
fig.add_trace(go.Scatter(x=df9_2['age'], y=df9_2['flights'], name='Number of flights', showlegend=False), row=2, col=2)
fig.update_xaxes(title_text='Age', row=2, col=2)
fig.update_yaxes(title_text='Number of flights', row=2, col=2)

## 10. Replace the pandas matlotlib graphing engine with plotly, and construct a bar diagram of the average number of incomes within social status, using the tools of pandas DataFrame.

In [10]:
pd.options.plotting.backend = "plotly"
df.groupby('status')['income'].mean().plot(kind='bar', 
                                           title='Average income by social status',
                                           text_auto=True)