In [1]:
import plotly
import numpy as np
import pandas as pd
import plotly.graph_objs as go
import plotly.figure_factory as ff

# command for work offline
plotly.offline.init_notebook_mode(connected=True)

In [6]:
heart_df = pd.read_csv("../input/heart-disease-uci/heart.csv")

In [7]:
heart_df.cp.unique()

array([3, 2, 1, 0])

In [8]:
len(heart_df[(heart_df["cp"] == 0) & (heart_df["target"]==0)])

104

In [9]:
x = ['Chest pain type 0', 'Chest pain type 1', 'Chest pain type 2', 'Chest pain type 3']
y = [len(heart_df[(heart_df["cp"] == 0) & (heart_df["target"]==0)]), 
      len(heart_df[(heart_df["cp"] == 1) & (heart_df["target"]==0)]),
      len(heart_df[(heart_df["cp"] == 2) & (heart_df["target"]==0)]),
      len(heart_df[(heart_df["cp"] == 3) & (heart_df["target"]==0)])]

y2 = [len(heart_df[(heart_df["cp"] == 0) & (heart_df["target"]==1)]), 
      len(heart_df[(heart_df["cp"] == 1) & (heart_df["target"]==1)]),
      len(heart_df[(heart_df["cp"] == 2) & (heart_df["target"]==1)]),
      len(heart_df[(heart_df["cp"] == 3) & (heart_df["target"]==1)])]

trace1 = go.Bar(
    x=x,
    y=y,
    text=y,
    textposition = 'auto',
    name='target 0',
    marker=dict(
        color='rgba(255, 135, 141,0.7)',
        line=dict(
            color='rgba(255, 135, 141,1)',
            width=1.5),
        ),
    opacity=1
)

trace2 = go.Bar(
    x=x,
    y=y2,
    text=y2,
    textposition = 'auto',
    name='target 1',
    marker=dict(
        color='rgba(50, 171, 96, 0.7)',
        line=dict(
            color='rgba(50, 171, 96, 1.0)',
            width=1.5),
        ),
    opacity=1
)

data = [trace1,trace2]

plotly.offline.iplot(data, filename='bar-chart')

## Pie Chart

In [10]:
labels = ['Chest Pain Type 0','Chest Pain Type 1','Chest Pain Type 2','Chest Pain Type 3']
values = [
      len(heart_df[(heart_df["cp"] == 0)]), 
      len(heart_df[(heart_df["cp"] == 1)]),
      len(heart_df[(heart_df["cp"] == 2)]),
      len(heart_df[(heart_df["cp"] == 3)])
]
colors = ['#FEBFBB', '#E13966', '#96D388', '#D0F9BB']

trace = go.Pie(labels=labels, values=values,
               hoverinfo='label+percent', textinfo='value', 
               textfont=dict(size=20),
               marker=dict(colors=colors, 
                           line=dict(color='#000000', width=2)))

plotly.offline.iplot([trace], filename='styled_pie_chart')

In [11]:
fig = {
  "data": [
    {
      "values":  [len(heart_df[(heart_df["cp"] == 0) & (heart_df["target"]==0)]), 
                  len(heart_df[(heart_df["cp"] == 1) & (heart_df["target"]==0)]),
                  len(heart_df[(heart_df["cp"] == 2) & (heart_df["target"]==0)]),
                  len(heart_df[(heart_df["cp"] == 3) & (heart_df["target"]==0)])],
      "labels" : ['Chest Pain Type 0','Chest Pain Type 1','Chest Pain Type 2','Chest Pain Type 3'],
      "marker": {'colors': ['rgba(127, 177, 38, 0.6)',
                            'rgba(152, 205, 36, 0.6)',
                            'rgba(79, 99,  37, 0.6)',
                            'rgb(100, 119,  69)',
                            ]},
      "domain": {"x": [0, .48]},
      "name": "target 0",
      "hoverinfo":"label+percent+name",
      "hole": .4,
      "type": "pie"
    },
    {
      "values":  [len(heart_df[(heart_df["cp"] == 0) & (heart_df["target"]==1)]), 
                  len(heart_df[(heart_df["cp"] == 1) & (heart_df["target"]==1)]),
                  len(heart_df[(heart_df["cp"] == 2) & (heart_df["target"]==1)]),
                  len(heart_df[(heart_df["cp"] == 3) & (heart_df["target"]==1)])],
      "labels" : ['Chest Pain Type 0','Chest Pain Type 1','Chest Pain Type 2','Chest Pain Type 3'],
      "marker": {'colors': ['rgba(255, 135, 141, 0.6)',
                            'rgba(239, 91, 98, 0.9)',
                            'rgba(255, 38, 63, 1)',
                            'rgba(242, 94, 111,0.7)',
                            ]},
      "text":["target 1"],
      "textposition":"inside",
      "domain": {"x": [.52, 1]},
      "name": "target 1",
      "hoverinfo":"label+percent+name",
      "hole": .4,
      "type": "pie"
    }],
  "layout": {
        "title":"Patients Chest Pain Type",
        "annotations": [
            {
                "font": {
                    "size": 20
                },
                "showarrow": False,
                "text": "target 0",
                "x": 0.20,
                "y": 0.5
            },
            {
                "font": {
                    "size": 20
                },
                "showarrow": False,
                "text": "target 1",
                "x": 0.8,
                "y": 0.5
            }
        ]
    }
}

plotly.offline.iplot(fig, filename='donut')

In [12]:
x1 = heart_df["age"].values
x2 = heart_df["trestbps"].values
x3 = heart_df.thalach.values

hist_data = [x1, x2, x3]

group_labels = ['age', 'trestbps', 'thalach']
colors = ['#A56CC1', '#A6ACEC', '#63F5EF']

fig = ff.create_distplot(hist_data, group_labels, colors=colors, bin_size=.2, show_rug=False)

fig['layout'].update(title='Hist and Curve Plot')

plotly.offline.iplot(fig, filename='Hist and Curve')

## Violin Plot

In [13]:
data = []

for i in range(0,len(pd.unique(heart_df['cp']))):
    trace = {
            "type": 'violin',
            "x": heart_df['cp'][heart_df['cp'] == pd.unique(heart_df['cp'])[i]],
            "y": heart_df['age'][heart_df['cp'] == pd.unique(heart_df['cp'])[i]],
            "name": pd.unique(heart_df['cp'])[i],
            "box": {
                "visible": True
            },
            "meanline": {
                "visible": True
            }
        }
    data.append(trace)

fig = {
    "data": data,
    "layout" : {
        "title": "Chest Pain Type",
        "yaxis": {
            "zeroline": False,
        }
    }
}

plotly.offline.iplot(fig, filename='violin/multiple', validate = False)

In [14]:
fig = {
    "data": [
        {
            "type": 'violin',
            "x": heart_df['cp'][heart_df['target'] == 0 ],
            "y": heart_df['age'][heart_df['target'] == 0 ],
            "legendgroup": '0',
            "scalegroup": '0',
            "name": '0',
            "box": {
                "visible": True
            },
            "meanline": {
                "visible": True
            },
            "line": {
                "color": 'rgb(79, 198, 99)'
            }
        },
        {
            "type": 'violin',
            "x": heart_df['cp'][heart_df['target'] == 1 ],
            "y": heart_df['age'][heart_df['target'] == 1 ],
            "legendgroup": '1',
            "scalegroup": '1',
            "name": '1',
            "box": {
                "visible": True
            },
            "meanline": {
                "visible": True
            },
            "line": {
                "color": 'rgb(255, 99, 99)'
            }
        }
    ],
    "layout" : {
        "yaxis": {
            "zeroline": False,
        },
        "violinmode": "group"
    }
}

plotly.offline.iplot(fig, filename='violin/grouped', validate = False)

## Scatter plot

In [15]:
# Basic Scatter Plot

N = 2000

random_x = np.random.randn(N)
random_y = np.random.randn(N)

# Create a trace
trace = go.Scatter(
    x = random_x,
    y = random_y,
    mode = 'markers'
)

data = [trace]

plotly.offline.iplot(data, filename='basic-scatter')

### Line and Scatter plot in Plotly

In [16]:
random_x = np.linspace(0, 1, 200)
random_y0 = np.random.randn(200)+10
random_y1 = np.random.randn(200)
random_y2 = np.random.randn(200)-10

random_x

array([0.        , 0.00502513, 0.01005025, 0.01507538, 0.0201005 ,
       0.02512563, 0.03015075, 0.03517588, 0.04020101, 0.04522613,
       0.05025126, 0.05527638, 0.06030151, 0.06532663, 0.07035176,
       0.07537688, 0.08040201, 0.08542714, 0.09045226, 0.09547739,
       0.10050251, 0.10552764, 0.11055276, 0.11557789, 0.12060302,
       0.12562814, 0.13065327, 0.13567839, 0.14070352, 0.14572864,
       0.15075377, 0.15577889, 0.16080402, 0.16582915, 0.17085427,
       0.1758794 , 0.18090452, 0.18592965, 0.19095477, 0.1959799 ,
       0.20100503, 0.20603015, 0.21105528, 0.2160804 , 0.22110553,
       0.22613065, 0.23115578, 0.2361809 , 0.24120603, 0.24623116,
       0.25125628, 0.25628141, 0.26130653, 0.26633166, 0.27135678,
       0.27638191, 0.28140704, 0.28643216, 0.29145729, 0.29648241,
       0.30150754, 0.30653266, 0.31155779, 0.31658291, 0.32160804,
       0.32663317, 0.33165829, 0.33668342, 0.34170854, 0.34673367,
       0.35175879, 0.35678392, 0.36180905, 0.36683417, 0.37185

In [17]:
trace0 = go.Scatter(
    x = random_x,
    y = random_y0,
    mode = 'markers',
    name = 'markers'
)

trace1 = go.Scatter(
    x = random_x,
    y = random_y1,
    mode = 'lines+markers',
    name = 'lines+markers'
)

trace2 = go.Scatter(
    x = random_x,
    y = random_y2,
    mode = 'lines',
    name = 'lines'
)

data = [trace0, trace1, trace2]
plotly.offline.iplot(data, filename='scatter-mode')

In [18]:
df = pd.read_csv("../input/life-expectancy-who/Life Expectancy Data.csv")
df.head()

Unnamed: 0,Country,Year,Status,Life expectancy,Adult Mortality,infant deaths,Alcohol,percentage expenditure,Hepatitis B,Measles,BMI,under-five deaths,Polio,Total expenditure,Diphtheria,HIV/AIDS,GDP,Population,thinness 1-19 years,thinness 5-9 years,Income composition of resources,Schooling
0,Afghanistan,2015,Developing,65.0,263.0,62,0.01,71.279624,65.0,1154,19.1,83,6.0,8.16,65.0,0.1,584.25921,33736494.0,17.2,17.3,0.479,10.1
1,Afghanistan,2014,Developing,59.9,271.0,64,0.01,73.523582,62.0,492,18.6,86,58.0,8.18,62.0,0.1,612.696514,327582.0,17.5,17.5,0.476,10.0
2,Afghanistan,2013,Developing,59.9,268.0,66,0.01,73.219243,64.0,430,18.1,89,62.0,8.13,64.0,0.1,631.744976,31731688.0,17.7,17.7,0.47,9.9
3,Afghanistan,2012,Developing,59.5,272.0,69,0.01,78.184215,67.0,2787,17.6,93,67.0,8.52,67.0,0.1,669.959,3696958.0,17.9,18.0,0.463,9.8
4,Afghanistan,2011,Developing,59.2,275.0,71,0.01,7.097109,68.0,3013,17.2,97,68.0,7.87,68.0,0.1,63.537231,2978599.0,18.2,18.2,0.454,9.5


## Life expectancy vs Adult Mortality  across all the Country

In [19]:
df.Country.unique()

array(['Afghanistan', 'Albania', 'Algeria', 'Angola',
       'Antigua and Barbuda', 'Argentina', 'Armenia', 'Australia',
       'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain', 'Bangladesh',
       'Barbados', 'Belarus', 'Belgium', 'Belize', 'Benin', 'Bhutan',
       'Bolivia (Plurinational State of)', 'Bosnia and Herzegovina',
       'Botswana', 'Brazil', 'Brunei Darussalam', 'Bulgaria',
       'Burkina Faso', 'Burundi', "Côte d'Ivoire", 'Cabo Verde',
       'Cambodia', 'Cameroon', 'Canada', 'Central African Republic',
       'Chad', 'Chile', 'China', 'Colombia', 'Comoros', 'Congo',
       'Cook Islands', 'Costa Rica', 'Croatia', 'Cuba', 'Cyprus',
       'Czechia', "Democratic People's Republic of Korea",
       'Democratic Republic of the Congo', 'Denmark', 'Djibouti',
       'Dominica', 'Dominican Republic', 'Ecuador', 'Egypt',
       'El Salvador', 'Equatorial Guinea', 'Eritrea', 'Estonia',
       'Ethiopia', 'Fiji', 'Finland', 'France', 'Gabon', 'Gambia',
       'Georgia', 'Germany'

In [20]:
x_data = df["Life expectancy "]
y_data = df["Adult Mortality"]
colors = np.random.rand(2938)
sz = np.random.rand(N)*30

fig = go.Figure()
fig.add_scatter(x = x_data,
                y = y_data,
                mode = 'markers',
                marker = {'size': sz,
                         'color': colors,
                         'opacity': 0.6,
                         'colorscale': 'Portland'
                       })
plotly.offline.iplot(fig)

In [21]:
my_country_bangladesh = df[df["Country"] == "Bangladesh"]
neighbours_india = df[df["Country"] == "India"] 
neighbours_nepal = df[df["Country"] == "Nepal"] 
neighbours_bhutan = df[df["Country"] == "Bhutan"] 
neighbours_pakistan = df[df["Country"] == "Pakistan"]

In [22]:
# GDP of bangladesh
my_country_bangladesh.GDP

192    121.158120
193    184.565430
194    951.889454
195    856.342857
196    835.789341
197    757.671757
198    681.125368
199    615.777541
200    541.651484
201    494.514660
202    484.155471
203     46.757917
204    432.738897
205      4.613575
206     42.598115
207     45.633710
Name: GDP, dtype: float64

In [23]:
my_country_bangladesh.columns

Index(['Country', 'Year', 'Status', 'Life expectancy ', 'Adult Mortality',
       'infant deaths', 'Alcohol', 'percentage expenditure', 'Hepatitis B',
       'Measles ', ' BMI ', 'under-five deaths ', 'Polio', 'Total expenditure',
       'Diphtheria ', ' HIV/AIDS', 'GDP', 'Population',
       ' thinness  1-19 years', ' thinness 5-9 years',
       'Income composition of resources', 'Schooling'],
      dtype='object')

In [24]:
bangladesh = my_country_bangladesh
india = neighbours_india
nepal = neighbours_india
bhutan = neighbours_bhutan
pakistan = neighbours_pakistan

fig = {
    'data': [
        # bangladesh
        {
        'x': bangladesh.GDP, 
        'y': bangladesh["Life expectancy "], 
        'text': bangladesh.Country, 
        'mode': 'markers', 
        'name': "Bangladesh"
        },
        # india
        {
        'x': india.GDP, 
        'y': india["Life expectancy "],
        'text': india.Country, 
        'mode': 'markers', 
        'name': 'India'
        },
        #nepal 
        {
        'x': nepal.GDP, 
        'y': nepal["Life expectancy "],
        'text': nepal.Country, 
        'mode': 'markers', 
        'name': 'Nepal'
        },
        #bhutan
        {
        'x': bhutan.GDP, 
        'y': bhutan["Life expectancy "],
        'text': bhutan.Country, 
        'mode': 'markers', 
        'name': 'Bhutan'
        },
        #pakistan
        {
        'x': pakistan.GDP, 
        'y': pakistan["Life expectancy "],
        'text': pakistan.Country, 
        'mode': 'markers', 
        'name': 'Pakistan'
        }
    ],
    'layout': {
        'xaxis': {'title': 'GDP per Capita', 'type': 'log'},
        'yaxis': {'title': "Life Expectancy"}
    }
}

plotly.offline.iplot(fig)

## Boxplot with Plotly

In [25]:
y0 = np.linspace(0.1, 10, 100)
y1 = np.linspace(1.1, 11, 100)
y2 = np.linspace(2.1, 12, 100)
y3 = np.linspace(3.1, 13, 100)

In [26]:
y0

array([ 0.1,  0.2,  0.3,  0.4,  0.5,  0.6,  0.7,  0.8,  0.9,  1. ,  1.1,
        1.2,  1.3,  1.4,  1.5,  1.6,  1.7,  1.8,  1.9,  2. ,  2.1,  2.2,
        2.3,  2.4,  2.5,  2.6,  2.7,  2.8,  2.9,  3. ,  3.1,  3.2,  3.3,
        3.4,  3.5,  3.6,  3.7,  3.8,  3.9,  4. ,  4.1,  4.2,  4.3,  4.4,
        4.5,  4.6,  4.7,  4.8,  4.9,  5. ,  5.1,  5.2,  5.3,  5.4,  5.5,
        5.6,  5.7,  5.8,  5.9,  6. ,  6.1,  6.2,  6.3,  6.4,  6.5,  6.6,
        6.7,  6.8,  6.9,  7. ,  7.1,  7.2,  7.3,  7.4,  7.5,  7.6,  7.7,
        7.8,  7.9,  8. ,  8.1,  8.2,  8.3,  8.4,  8.5,  8.6,  8.7,  8.8,
        8.9,  9. ,  9.1,  9.2,  9.3,  9.4,  9.5,  9.6,  9.7,  9.8,  9.9,
       10. ])

In [27]:
trace0 = go.Box(
    y=y0,
    name = 'Sample A',
    marker = dict(
        color = 'navy',
    )
)

trace1 = go.Box(
    y=y1,
    name = 'Sample B',
    marker = dict(
        color = 'darkcyan',
    )
)

trace2 = go.Box(
    y=y2,
    name = 'Sample C',
    marker = dict(
        color = 'purple',
    )
)

trace3 = go.Box(
    y=y3,
    name = 'Sample D',
    marker = dict(
        color = 'brown',
    )
)

data = [trace0, trace1, trace2, trace3]
plotly.offline.iplot(data)

## Boxplot for Outlier Detection
__Lets detect outlier on a real dataset__

In [28]:
diabetes_df = pd.read_csv("../input/pima-indians-diabetes-database/diabetes.csv")

In [29]:
diabetes_df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [30]:
trace0 = go.Box(
    name = 'Pregnancies',
    y = diabetes_df["Pregnancies"]
)

trace1 = go.Box(
    name = "Glucose",
    y = diabetes_df["Glucose"]
)

trace2 = go.Box(
    name = "BloodPressure",
    y = diabetes_df["BloodPressure"]
)

trace3 = go.Box(
    name = "SkinThickness",
    y = diabetes_df["SkinThickness"]
)

trace4 = go.Box(
    name = "Insulin",
    y = diabetes_df["Insulin"]
)

trace5 = go.Box(
    name = "DiabetesPedigreeFunction",
    y = diabetes_df["DiabetesPedigreeFunction"]
)

trace6 = go.Box(
    name = "Age",
    y = diabetes_df["Age"]
)

trace7 = go.Box(
    name = "Outcome",
    y = diabetes_df["Outcome"]
)

data = [trace0, trace1, trace2, trace3, trace4, trace5, trace6, trace7]
plotly.offline.iplot(data)

### An efficient way to generate box plot

In [31]:
column_names = diabetes_df.columns

y_data = diabetes_df[diabetes_df.columns].values

colors = ['rgba(93, 164, 214, 0.5)', 'rgba(255, 144, 14, 0.5)', 'rgba(44, 160, 101, 0.5)',
          'rgba(255, 65, 54, 0.5)', 'rgba(207, 114, 255, 0.5)', 'rgba(127, 96, 0, 0.5)',
          'rgba(255, 140, 184, 0.5)', 'rgba(79, 90, 117, 0.5)', 'rgba(222, 223, 0, 0.5)']

traces = []

for col_name, yd, color in zip(column_names, y_data, colors):
        traces.append(go.Box(
            y = yd,
            name = col_name,
            boxpoints = 'all',
            jitter = 0.5,
            whiskerwidth = 0.2,
            fillcolor = color,
            marker = dict(
                size = 2,
            ),
            line = dict(width = 1),
        ))

data=traces
plotly.offline.iplot(data)