In [8]:
from sklearn import datasets
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.preprocessing import LabelEncoder 
import plotly.graph_objs as go 

df = pd.read_csv("https://raw.githubusercontent.com/bcdunbar/datasets/master/iris.csv")
y_name = 'species'

In [2]:

def pcp2 (df, y_name):
    clsnum = 10  #범주가 이거보다 많으면 이걸로 퉁침 

    q_labels = [i/clsnum for i in range(clsnum)] 
    dims_co, dims_co_le, dims_ca = [], [], []    #세가지 그래프에 대한 축 리스트

    colorscale = [ [0, '#00868B'], [0.5, 'gray'], [1, 'red']   ]

    for col in df.columns:
        cates = df[col].unique() #컬럼에서 유일값 추출
        cates_num = len(cates) #유일값의 개수

        if df[col].dtype== 'object': #범주형 컬럼일 경우

            #기본형 - 수치로 바꿔야함
            value2dummy = dict(zip(cates, range(cates_num))) 
            df[col+'q'] = [value2dummy[i] for i in df[col]] #
            dim_co= dict( label=col 
                        , tickvals=list(value2dummy.values())
                        , ticktext=list(value2dummy.keys()) 
                        , values=df[col+'q']    )

            #순서형 - 등수 매기고 수치로 바꿔야함
            dim_co_le = dict( label=col 
                             , tickvals=list(value2dummy.values()) 
                             , ticktext=list(value2dummy.keys() )
                             , values=df[col+'q']   )

            #범주형 - 범주형으로 바꿔야함
            dim_ca = dict( label=col 
                          , ticktext=list(value2dummy.keys()) 
                          , values=df[col+'q'] 
                          , categoryorder = 'array')

        else:   # 수치형 컬럼이면

            #기본형 - 수치로 바꿔야함
            dim_co = dict( label=col ,values=df[col]    )

            #순서형
            le=LabelEncoder()
            df_temp = le.fit_transform(df[col]) #순서를 매김
            dim_co_le = dict( label=col
                            ,values=df_temp )

            #범주형
            if cates_num > clsnum :  #유일값이 너무 많으면
                df[col+'q'] = pd.qcut(df[col], clsnum, labels = False, duplicates = 'drop') #구간으로 나눔
            else: df[col+'q'] = df[col].astype('category')  #같은수가 반복되면 그냥 사용
            dim_ca = dict( label = col
                            , values = df[col+'q']
                            , categoryorder = 'category descending')
            
        dims_co.append(dim_co)
        dims_co_le.append(dim_co_le)
        dims_ca.append(dim_ca)

    #기본형
    fig_co = go.Figure(
        data=go.Parcoords( 
            line = dict( 
                color = df[y_name+'q']
                ,colorscale = colorscale    )
            , dimensions = dims_co 
            , labelfont = dict(size=14) 
            , tickfont = dict(size = 12) 
            , unselected = dict(line = dict(color = 'lightgray', opacity= 0.3))
        )
        , layout=go.Layout( 
            height=300 
            , margin={"r": 100, "t": 50, "l": 100, "b": 10}
        )
    )

    fig_le = go.Figure(
        data=go.Parcoords( 
            line = dict( 
                color = df[y_name+'q']
                , colorscale = colorscale   )
            , dimensions = dims_co_le
            , labelfont = dict(size=14) 
            , tickfont = dict(size = 12)
            , unselected = dict(line = dict(color = 'gray', opacity= 0.3))
        )
        , layout = go.Layout( 
            height=300 
            ,margin={"r": 100, "t": 10, "l": 100, "b": 10}  
        )
    )

    fig_ca = go.Figure(
        data = go.Parcats(
            line = dict(
                color = df[y_name+'q']
                , colorscale = colorscale
                , shape = 'hspline'     )
            ,dimensions=dims_ca
        )
        , layout=go.Layout(
            height=300 
            ,margin={"r": 50, "t": 20, "l": 50, "b": 10}    
        )
    )

    fig1 = go.FigureWidget(fig_co)
    fig2 = go.FigureWidget(fig_le)
    fig3 = go.FigureWidget(fig_ca)

    return fig_co

In [9]:
a = pcp2(df, 'species')

In [10]:
a.show()