# Plotly

In [1]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go 

In [2]:
import warnings 
warnings.filterwarnings("ignore")

In [3]:
df = pd.read_csv("diamonds.csv")
print(df.shape)
df = df.sample(frac=0.1, random_state=10)
print(df.shape)

(53940, 10)
(5394, 10)


In [4]:
df.head()

Unnamed: 0,carat,cut,color,clarity,depth,table,price,x,y,z
8018,1.01,Premium,E,SI1,61.7,56.0,4330,6.44,6.39,3.96
1583,0.7,Very Good,D,VS1,60.4,58.0,3008,5.71,5.78,3.47
9138,1.13,Very Good,H,SI2,59.8,59.0,4537,6.75,6.82,4.06
2787,0.76,Ideal,F,VS2,61.0,55.0,3257,5.89,5.92,3.6
52429,0.7,Premium,I,VVS1,61.2,59.0,2513,5.65,5.69,3.47


In [5]:
# reindex the data 
df.reset_index(drop=True, inplace=True)
df.head()

Unnamed: 0,carat,cut,color,clarity,depth,table,price,x,y,z
0,1.01,Premium,E,SI1,61.7,56.0,4330,6.44,6.39,3.96
1,0.7,Very Good,D,VS1,60.4,58.0,3008,5.71,5.78,3.47
2,1.13,Very Good,H,SI2,59.8,59.0,4537,6.75,6.82,4.06
3,0.76,Ideal,F,VS2,61.0,55.0,3257,5.89,5.92,3.6
4,0.7,Premium,I,VVS1,61.2,59.0,2513,5.65,5.69,3.47


In [None]:
# Scatter plot
fig = px.scatter(df,x= 'carat',y='price')
fig.show()

In [10]:
# Line plot using pyplot 
fig = px.scatter(df,x='carat',y='price',color = 'cut')
fig.show()

In [13]:
fig = px.scatter(df,x='carat',y='price',color='cut',
                 title="Diamond price vs Carat Weight",
                 labels={'carat':'Carat Weight','price':'Price ($)'}
                 )
fig.show()

In [14]:
# Plot a line chart (examplr : mean price by carat)
df_mean = df.groupby('carat')['price'].mean().reset_index()
fig = px.line(df_mean, x = 'carat', y= 'price',
               title='Mean Price by Carat'
               )
fig.show()

In [15]:
# Plot a line chart (examplr : mean price by carat)
df_mean = df.groupby('carat')['price'].mean().reset_index()
fig = px.line(df_mean, x = 'carat', y= 'price',
               title='Mean Price by Carat'
               )
fig.show()

In [17]:
fig = px.bar(df,x='clarity',y='price',
             title='Average Price by Cut', color='cut')
fig.show()

In [18]:
df_stats = df.groupby('cut')['price'].agg(['mean','median','std']).reset_index()

In [19]:
df_stats.head()

Unnamed: 0,cut,mean,median,std
0,Fair,4721.005952,3661.0,3952.69272
1,Good,4011.286652,2863.0,3775.099642
2,Ideal,3530.320639,1822.0,3858.748131
3,Premium,4495.760194,2890.0,4342.828339
4,Very Good,3987.675314,2763.0,3929.76732


In [23]:
fig = px.bar(df_stats, x='cut', y = 'mean',error_y='std',
             title='Average Price by Cut',
             color='cut'
             )
fig.show()

In [30]:
fig = px.histogram(df, x='price',nbins=50,color='cut',
                   title="Distribution")
fig.show()

In [31]:
# Density Contour Plot
fig=px.density_contour(df,x='carat',y='price',
                       title="Density Contour Plot")
fig.show()

In [32]:
fig = px.box(df, x = 'cut', y= 'price',
             title="Price Distribution by Cut")
fig.show()

In [44]:
fig = px.box(df, x = 'cut', y= 'price',color='clarity',
             points='outliers',
             title="Price Distribution by Cut",
             labels={'cut':'Cut','price':'Price in ($)','clarity':"Clarity"}
             )
fig.show()

In [45]:
# Violin pllot using Pyplot 
fig = px.violin(df, x = 'cut', y= 'price',
                # color='cut',
             points='outliers',
             title="Price Distribution by Cut",
             labels={'cut':'Cut','price':'Price in ($)'
                    #  ,'clarity':"Clarity"
                     }
             )

fig.show()

In [55]:
# facet using clarity
fig = px.scatter(df,x='carat',y='price',
                 color='cut',
                 facet_col='clarity',
                 size='price',
                 animation_frame='cut',
                 title='price vs carat by cut and clarity'
                 )
fig.show()

In [56]:
fig = px.density_heatmap(df,x="carat",y='price',
                         title='Density Heatmap')
fig.show()

# Subplots

In [57]:
from plotly.subplots import make_subplots

In [60]:
fig = make_subplots(rows = 1, cols=2, 
                    subplot_titles=('Carat Vs Price',
                                    'Density Heatmap'))

scatter = px.scatter(df, x='carat', y='price').data[0]
fig.add_trace(scatter, row= 1, col=1)

heatmap = px.density_heatmap(df,x='carat', y ='price').data[0]
fig.add_trace(heatmap, row=1,col=2)

fig.update_layout(title_text = 'Carat vs Price and Density heatmap of carat vs price')
fig.show()

In [63]:
# Create a heatmap of crelation matrix of only numerical columns 
corr = df[['carat','depth', 'table', 'price', 'x', 'y', 'z']].corr()
fig = go.Figure(data=go.Heatmap(x=corr.index.values, y=corr.columns.values,z=corr.values, colorscale='Picnic' ))

# fig = px.density_heatmap(corr, title='Correlation Matrix', text_auto = True)
fig.show()