## Import all necessary packages

In [1]:
import pandas as pd
from plotly.subplots import make_subplots
import plotly.graph_objects as go

## Import data from csv file

In [45]:
revenue = pd.read_csv('../data/revenue_data2.csv')
revenue = revenue.dropna()
revenue = revenue[revenue['Revenue'] != 'Unknown']
revenue['Revenue'] = revenue['Revenue'].astype(float) /1000

## Create four seperate dataframe for industries like Tech, Professional_services, Retail and Oil

In [46]:
df_tech = revenue[revenue['Industry'] == 'Tech']
df_prof_serve = revenue[revenue['Industry'] == 'Professional Services']
df_retail = revenue[revenue['Industry'] == 'Retail']
df_oil = revenue[revenue['Industry'] == 'Oil']
#print(df_tech.dtypes)


In [47]:
# Set up the subplots grid
fig = make_subplots(rows=2, cols=2, 
                    # Set the subplot titles
                    subplot_titles=['Tech', 'Professional Services', 'Retail', 'Oil'])

# Add the Tech trace
fig.add_trace(go.Box(x=df_tech.Revenue, name='', showlegend=False), row=1, col=1)
# Add the Professional Services trace
fig.add_trace(go.Box(x=df_prof_serve.Revenue, name='', showlegend=False), row=1, col=2)
# Add the Retail trace
fig.add_trace(go.Box(x=df_retail.Revenue, name='', showlegend=False), row=2, col=1)
# Add the Oil trace
fig.add_trace(go.Box(x=df_oil.Revenue, name='', showlegend=False), row=2, col=2)

# Add a title (and show)
fig.update_layout({'title': {'text': 'Box plots of company revenues', 'x': 0.5, 'y': 0.9}})
fig.show()

In [48]:
# Create the subplots
fig = make_subplots(rows=3, cols=1, shared_xaxes=True)

# Loop through the industries
row_num = 1
for industry in ['Tech', 'Retail', 'Professional Services']:
    df = revenue[revenue.Industry == industry]
    # Add a histogram using subsetted df
    fig.add_trace(go.Histogram(x=df['Revenue'], name=industry),
    # Position the trace
    row=row_num, col=1)
    row_num +=1

# Show the plot
fig.show()