In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import plotly.express as px

In [5]:
df = pd.read_csv('UKMacroData.csv')
df.head()

Unnamed: 0,Date,GDP (£ m),CPI,Bank Rate,Gross Fixed Capital Formation (Investments)
0,2000 Q1,401242,1.1,5.875,69114.0
1,2000 Q2,404196,1.0,6.0,73074.0
2,2000 Q3,406795,1.2,6.0,68011.0
3,2000 Q4,409411,1.4,6.0,70115.0
4,2001 Q1,413054,1.3,5.75,70186.0


In [6]:
df['Date'] = pd.to_datetime(df['Date'].str[:4] + '-' + df['Date'].str[-2:])
print(df.shape)
df.head()

(96, 5)


  df['Date'] = pd.to_datetime(df['Date'].str[:4] + '-' + df['Date'].str[-2:])


Unnamed: 0,Date,GDP (£ m),CPI,Bank Rate,Gross Fixed Capital Formation (Investments)
0,2000-01-01,401242,1.1,5.875,69114.0
1,2000-04-01,404196,1.0,6.0,73074.0
2,2000-07-01,406795,1.2,6.0,68011.0
3,2000-10-01,409411,1.4,6.0,70115.0
4,2001-01-01,413054,1.3,5.75,70186.0


# Line Plot

In [14]:
fig = px.line(data_frame=df, x='Date', y='GDP (£ m)', title='GDP (£ m) values over time')
fig.update_xaxes(rangeslider_visible=True) # Add range slider 
fig.show()


The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result



# Area Plot

In [15]:
df = px.data.stocks().head(50)
df.head(3)

Unnamed: 0,date,GOOG,AAPL,AMZN,FB,NFLX,MSFT
0,2018-01-01,1.0,1.0,1.0,1.0,1.0,1.0
1,2018-01-08,1.018172,1.011943,1.061881,0.959968,1.053526,1.015988
2,2018-01-15,1.032008,1.019771,1.05324,0.970243,1.04986,1.020524


In [19]:
df = df.melt(var_name='Company', value_name='Price',
             value_vars=['GOOG', 'AAPL', 'AMZN', 'FB', 'NFLX', 'MSFT'],
             id_vars='date')

df.head()

Unnamed: 0,date,Company,Price
0,2018-01-01,GOOG,1.0
1,2018-01-08,GOOG,1.018172
2,2018-01-15,GOOG,1.032008
3,2018-01-22,GOOG,1.066783
4,2018-01-29,GOOG,1.008773


In [20]:
fig = px.area(data_frame=df, x='date', y='Price', color='Company')
fig.show()

In [21]:
fig = px.area(data_frame=df, x='date', y='Price', color='Company',
              facet_col='Company', facet_col_wrap=2) # Create facets
fig.show()

#### Stacked area plot
When you have many variables in a stacked area plot, the following two concerns are raised:
* colouring
* the lines being cluttered 

In [22]:
df = px.data.gapminder().query("continent == 'Americas'")
df.head(3)

Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap,iso_alpha,iso_num
48,Argentina,Americas,1952,62.485,17876956,5911.315053,ARG,32
49,Argentina,Americas,1957,64.399,19610538,6856.856212,ARG,32
50,Argentina,Americas,1962,65.142,21283783,7133.166023,ARG,32


In [23]:
fig = px.area(df,x='year',y='pop', color='country')
fig.show()

The graph above has several stacked area plot with repeating colours, which renders it difficult to read by the users. Best option is to manage the colour pallette using `color_discrete_sequence` argument.
* Check possible argument with `px.colors.qualitative.swatches()`  

In [24]:
px.colors.qualitative.swatches()

In [25]:
fig = px.area(df,x='year',y='pop', color='country',
              color_discrete_sequence=px.colors.qualitative.Alphabet,
              )
fig.show()

# Histogram

In [26]:
df = px.data.tips().sample(n=150, random_state=1)
print(df.shape)
df.head()

(150, 7)


Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
67,3.07,1.0,Female,Yes,Sat,Dinner,1
243,18.78,3.0,Female,No,Thur,Dinner,2
206,26.59,3.41,Male,Yes,Sat,Dinner,3
122,14.26,2.5,Male,No,Thur,Lunch,2
89,21.16,3.0,Male,No,Thur,Lunch,2


In [27]:
fig = px.histogram(data_frame=df, x="tip")
fig.show()

In [28]:
"""
Histogram with Boxplot using Marginal Boxplot.
    * Helps to see otliers, Median, Q1, and Q3 at a glance
"""

fig = px.histogram(df, x="tip", marginal="box")
fig.show()

In [29]:
fig = px.histogram(df, x="tip", color="sex", marginal="box")
fig.show()

In [30]:
fig = px.histogram(df, x="tip", color="sex", facet_col='day', facet_row='time')
fig.show()

In [32]:
df_practice = pd.read_csv('insurance.csv').sample(n=150)
print(df_practice.shape)
df_practice.head()

(150, 8)


Unnamed: 0.1,Unnamed: 0,age,sex,bmi,children,smoker,region,charges
574,574,57,female,34.295,2,no,northeast,13224.05705
731,731,53,male,21.4,1,no,southwest,10065.413
661,661,57,female,23.98,1,no,southeast,22192.43711
976,976,48,male,40.15,0,no,southeast,7804.1605
434,434,31,male,28.595,1,no,northwest,4243.59005


In [36]:
df_practice.drop(columns=['Unnamed: 0'], inplace=True)
df_practice.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
574,57,female,34.295,2,no,northeast,13224.05705
731,53,male,21.4,1,no,southwest,10065.413
661,57,female,23.98,1,no,southeast,22192.43711
976,48,male,40.15,0,no,southeast,7804.1605
434,31,male,28.595,1,no,northwest,4243.59005


In [40]:
fig = px.histogram(df_practice, x='charges', color='smoker', marginal='box')
fig.show()

# Boxplot

In [41]:
df = px.data.tips().sample(n=150, random_state=1)
print(df.shape)
df.head()

(150, 7)


Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
67,3.07,1.0,Female,Yes,Sat,Dinner,1
243,18.78,3.0,Female,No,Thur,Dinner,2
206,26.59,3.41,Male,Yes,Sat,Dinner,3
122,14.26,2.5,Male,No,Thur,Lunch,2
89,21.16,3.0,Male,No,Thur,Lunch,2


In [42]:
# In arguments, use x for group or categories and y for the level you intereste in plotting
fig = px.box(df, x="day", y="tip")
fig.show()

In [43]:
# Use colour argument to divide plot further
fig = px.box(df, x="day", y="tip", color="smoker")
fig.show()

A box plot helps with descriptive statistics figures, but a box plot itself doesn't give the full information on how much data there is in a group and its distribution shape.

* You can add `points='all'` to give you a better sense of the distribution
* Note you have fewer datapoints on Friday and Thursday



In [44]:
fig = px.box(df, x="day", y="total_bill", points='all')
fig.show()

In Plotly Express, the arguments `facet_col` and `facet_row` are used to split your data into multiple smaller plots (called facets), arranged in a grid.
* facet_col → splits plots horizontally into columns.
* facet_row → splits plots vertically into rows.

So instead of having one big plot with everything mixed together, you get small plots side by side (or stacked), each showing data for a subgroup.

In [45]:
fig = px.box(df, x="day", y="total_bill", points='all', facet_col='sex')
fig.show()

In [46]:
fig = px.scatter(df, x="total_bill", y="tip", facet_row="day")
fig.show()

In [47]:
fig = px.scatter(df, x="total_bill", y="tip", facet_row="day", facet_col="sex")
fig.show()

In [50]:
df_practice = pd.read_csv("BankChurners.csv").sample(n=200)
df_practice = df_practice[['Attrition_Flag','Customer_Age','Gender','Education_Level','Months_on_book',
                           'Credit_Limit','Total_Trans_Amt']]
print(df_practice.shape)
df_practice.head()

(200, 7)


Unnamed: 0,Attrition_Flag,Customer_Age,Gender,Education_Level,Months_on_book,Credit_Limit,Total_Trans_Amt
6424,Existing Customer,47,F,Graduate,34,2190.0,4416
9487,Existing Customer,29,M,Uneducated,23,8147.0,14912
6902,Existing Customer,46,M,Graduate,39,3299.0,4904
2529,Existing Customer,37,F,Uneducated,29,4671.0,2468
5806,Existing Customer,41,M,High School,30,1614.0,4293


In [52]:
fig = px.box(df_practice, x='Education_Level', y='Total_Trans_Amt', color='Gender', points='all')
fig.show()