In [298]:
import plotly.figure_factory as ff
import plotly.graph_objs as go
import pandas as pd
import numpy as np

In [299]:
data = pd.read_csv('../data/bubble_chart.csv')

In [300]:
data["genre"] = data["genre"].str.split()

In [301]:
data2 = data.explode('genre')

In [302]:
def move_particle(x):
    if x[-1] == ',':
        return x[:-1]
    else:
        return x

In [303]:
data2['genre'] = data2['genre'].apply(move_particle)

In [304]:
data2['genre'].unique()

array(['Action', 'Adventure', 'Fantasy', 'Drama', 'Comedy', 'Sci-Fi',
       'Mystery', 'Thriller', 'Crime', 'Animation', 'Family', 'Horror',
       'Romance', 'Western', 'History', 'Biography', 'Music', 'Musical',
       'Sport', 'Documentary', 'War', 'Short', 'News', 'Reality-TV',
       'Game-Show', 'Talk-Show', 'Film-Noir'], dtype=object)

In [305]:
s = pd.crosstab(data2['title'],data2['genre'])
s = s.T.dot(s).astype(float)
s.values[np.triu_indices(len(s))]=np.nan
films_intersection = s.stack()

In [307]:
genre_list = data2.groupby('genre')['title'].count().sort_values(ascending=False)[:10].index.tolist()

In [308]:
data3 = data2[data2['genre'].isin(genre_list)]


In [310]:
male = data3[data3['star_index']<15]
female = data3[data3['star_index']>=15]

In [311]:
s = pd.crosstab(male[
    'title'], male['genre'])
s = s.T.dot(s).astype(float)
s.values[np.triu_indices(len(s))] = np.nan
male_intersection = s.stack()


In [313]:
number_of_genres= len(male['genre'].unique())
genre_matrix = np.zeros((number_of_genres,number_of_genres))
# genre_list = male['genre'].unique().tolist()

In [314]:
print(genre_list)

['Drama', 'Comedy', 'Action', 'Romance', 'Adventure', 'Crime', 'Thriller', 'Horror', 'Mystery', 'Sci-Fi']


In [315]:
for i, v in male_intersection.iteritems():
    to = genre_list.index(i[0])
    frm = genre_list.index(i[1])
    value = v
    genre_matrix[to,frm] = v
    genre_matrix[frm,to] = v
    
genre_matrix = genre_matrix.astype(int)

In [316]:
print(genre_matrix.shape)

(10, 10)


In [317]:
# np.savetxt('../data/male_matrix.csv', genre_matrix, delimiter = ',')

In [318]:
#make ideogram
PI=np.pi


def check_data(data_matrix):
    L, M = data_matrix.shape
    if L != M:
        raise ValueError('Data array must have (n,n) shape')
    return L


L = check_data(genre_matrix)

def moduloAB(x, a, b): #maps a real number onto the unit circle identified with 
                       #the interval [a,b), b-a=2*PI
        if a>=b:
            raise ValueError('Incorrect interval ends')
        y=(x-a)%(b-a)
        return y+b if y<0 else y+a

def test_2PI(x):
    return 0<= x <2*PI

In [319]:
row_sum=[np.sum(genre_matrix[k,:]) for k in range(L)]

#set the gap between two consecutive ideograms
gap=2*PI*0.005
ideogram_length=2*PI*np.asarray(row_sum)/sum(row_sum)-gap*np.ones(L)
print(ideogram_length)

[1.46559079 0.60501946 0.80332904 0.37903878 0.64714103 0.65267525
 0.36858525 0.53307459 0.26036049 0.25421136]


In [320]:
def get_ideogram_ends(ideogram_len, gap):
    ideo_ends = []
    left = 0
    for k in range(len(ideogram_len)):
        right = left+ideogram_len[k]
        ideo_ends.append([left, right])
        left = right+gap
    return ideo_ends


ideo_ends = get_ideogram_ends(ideogram_length, gap)
print(ideo_ends)


def make_ideogram_arc(R, phi, a=50):
    # R is the circle radius
    # phi is the list of ends angle coordinates of an arc
    # a is a parameter that controls the number of points to be evaluated on an arc
    if not test_2PI(phi[0]) or not test_2PI(phi[1]):
        phi = [moduloAB(t, 0, 2*PI) for t in phi]
    length = (phi[1]-phi[0]) % 2*PI
    nr = 5 if length <= PI/4 else int(a*length/PI)

    if phi[0] < phi[1]:
        theta = np.linspace(phi[0], phi[1], nr)
    else:
        phi = [moduloAB(t, -PI, PI) for t in phi]
        theta = np.linspace(phi[0], phi[1], nr)
    return R*np.exp(1j*theta)


z = make_ideogram_arc(1.3, [11*PI/6, PI/17])
print(z)


[[0, 1.465590790074858], [1.497006716610756, 2.1020261779130722], [2.13344210444897, 2.9367711431211645], [2.9681870696570622, 3.3472258497704472], [3.378641776306345, 4.0257828067554415], [4.057198733291339, 4.709873984504246], [4.741289911040144, 5.109875163044109], [5.141291089580006, 5.674365680952786], [5.705781607488683, 5.966142097889242], [5.99755802442514, 6.251769380643687]]
[1.12583302-0.65j       1.14814501-0.60972373j 1.16901672-0.5686826j
 1.18842197-0.5269281j  1.20633642-0.48451259j 1.22273759-0.44148929j
 1.23760491-0.39791217j 1.25091973-0.3538359j  1.26266534-0.30931575j
 1.27282702-0.26440759j 1.28139202-0.21916775j 1.28834958-0.17365297j
 1.29369099-0.12792036j 1.29740954-0.08202728j 1.29950058-0.0360313j
 1.29996146+0.01000988j 1.29879163+0.0560385j  1.29599253+0.10199682j
 1.2915677 +0.1478272j  1.28552267+0.19347214j 1.27786503+0.23887437j]


In [321]:
ideo_colors=['rgba(187, 255, 255, 0.75)',
             'rgba(127, 255, 212, 0.75)',
             'rgba(155, 205, 155, 0.75)',
             'rgba(217, 239, 139, 0.75)',
             'rgba(255, 246, 143, 0.75)',
             'rgba(255, 193, 193, 0.75)',
             'rgba(255, 130, 71, 0.75)',
             'rgba(255, 48, 48, 0.75)',
             'rgba(255, 20, 147, 0.75)',
             'rgba(72, 118, 255, 0.75)',
             'rgba(176, 226, 255, 0.75)',
             'rgba(224, 255, 255, 0.75)',
             'rgba(144, 238, 144, 0.75)',
             'rgba(106, 90, 205, 0.75)',
             'rgba(47, 79, 79, 0.75)']#brewe

In [322]:
def map_data(data_matrix, row_value, ideogram_length):
    mapped = np.zeros(data_matrix.shape)
    for j in range(L):
        mapped[:, j] = ideogram_length*data_matrix[:, j]/row_value
    return mapped


mapped_data = map_data(genre_matrix, row_sum, ideogram_length)
mapped_data


array([[0.        , 0.16464945, 0.17277657, 0.18240871, 0.15050224,
        0.25615481, 0.09511741, 0.32689086, 0.08428125, 0.03280949],
       [0.15987712, 0.        , 0.08183838, 0.10814358, 0.10960497,
        0.0757005 , 0.00759928, 0.02601291, 0.01519856, 0.02104416],
       [0.16983826, 0.08284793, 0.        , 0.01952844, 0.23670837,
        0.1159871 , 0.07693022, 0.02219141, 0.01124365, 0.06805366],
       [0.17205805, 0.10505195, 0.018739  , 0.        , 0.03094774,
        0.02328178, 0.00738203, 0.00851773, 0.0070981 , 0.00596241],
       [0.14661102, 0.10995826, 0.23457763, 0.0319612 , 0.        ,
        0.02463065, 0.02551032, 0.01407466, 0.00850344, 0.05131386],
       [0.24962995, 0.07597433, 0.11498818, 0.02405365, 0.02464032,
        0.        , 0.07714768, 0.01554687, 0.06394751, 0.00674676],
       [0.0895257 , 0.00736604, 0.07366039, 0.00736604, 0.0246479 ,
        0.07451032, 0.        , 0.03654689, 0.0371135 , 0.01784848],
       [0.31531536, 0.02584076, 0.0217759

In [323]:
idx_sort = np.argsort(mapped_data, axis=1)
idx_sort


array([[0, 9, 8, 6, 4, 1, 2, 3, 5, 7],
       [1, 6, 8, 9, 7, 5, 2, 3, 4, 0],
       [2, 8, 3, 7, 9, 6, 1, 5, 0, 4],
       [3, 9, 8, 6, 7, 2, 5, 4, 1, 0],
       [4, 8, 7, 5, 6, 3, 9, 1, 0, 2],
       [5, 9, 7, 3, 4, 8, 1, 6, 2, 0],
       [6, 1, 3, 9, 4, 7, 8, 2, 5, 0],
       [7, 3, 4, 5, 2, 1, 6, 8, 9, 0],
       [8, 3, 4, 2, 9, 1, 6, 7, 5, 0],
       [9, 3, 5, 8, 6, 1, 0, 4, 7, 2]])

In [324]:
def make_ribbon_ends(mapped_data, ideo_ends,  idx_sort):
    L = mapped_data.shape[0]
    ribbon_boundary = np.zeros((L, L+1))
    for k in range(L):
        start = ideo_ends[k][0]
        ribbon_boundary[k][0] = start
        for j in range(1, L+1):
            J = idx_sort[k][j-1]
            ribbon_boundary[k][j] = start+mapped_data[k][J]
            start = ribbon_boundary[k][j]
    return [[(ribbon_boundary[k][j], ribbon_boundary[k][j+1]) for j in range(L)] for k in range(L)]


ribbon_ends = make_ribbon_ends(mapped_data, ideo_ends,  idx_sort)
print('ribbon ends starting from the ideogram[2]\n', ribbon_ends[2])


ribbon ends starting from the ideogram[2]
 [(2.13344210444897, 2.13344210444897), (2.13344210444897, 2.144685752135726), (2.144685752135726, 2.1642141928548293), (2.1642141928548293, 2.186405602762901), (2.186405602762901, 2.254459259814321), (2.254459259814321, 2.3313894808289692), (2.3313894808289692, 2.414237411152437), (2.414237411152437, 2.5302245136052917), (2.5302245136052917, 2.7000627707684), (2.7000627707684, 2.9367711431211645)]


In [325]:
def control_pts(angle, radius):
    #angle is a  3-list containing angular coordinates of the control points b0, b1, b2
    #radius is the distance from b1 to the  origin O(0,0)

    if len(angle) != 3:
        raise InvalidInputError('angle must have len =3')
    b_cplx = np.array([np.exp(1j*angle[k]) for k in range(3)])
    b_cplx[1] = radius*b_cplx[1]
    # print(b_cplx.real)
    # print(b_cplx.imag)
    # print(zip(b_cplx.real, b_cplx.imag))
    return zip(b_cplx.real, b_cplx.imag)


In [326]:
def ctrl_rib_chords(l, r, radius):
    # this function returns a 2-list containing control poligons of the two quadratic Bezier
    #curves that are opposite sides in a ribbon
    #l (r) the list of angular variables of the ribbon arc ends defining
    #the ribbon starting (ending) arc
    # radius is a common parameter for both control polygons
    if len(l) != 2 or len(r) != 2:
        raise ValueError('the arc ends must be elements in a list of len 2')
    return [control_pts([l[j], (l[j]+r[j])/2, r[j]], radius) for j in range(2)]

In [327]:
ribbon_color = [L*[ideo_colors[k]] for k in range(L)]


In [328]:
def make_q_bezier(b):  # defines the Plotly SVG path for a quadratic Bezier curve defined by the
    #list of its control points
    # if len(b) != 3:
    #     raise valueError('control poligon must have 3 points')
    A, B, C = b
    return 'M '+str(A[0])+',' + str(A[1])+' '+'Q ' +\
        str(B[0])+', '+str(B[1]) + ' ' +\
        str(C[0])+', '+str(C[1])


# b = [(1, 4), (-0.5, 2.35), (3.745, 1.47)]

# make_q_bezier(b)


'M 1,4 Q -0.5, 2.35 3.745, 1.47'

In [329]:
def make_ribbon_arc(theta0, theta1):
    
    if test_2PI(theta0) and test_2PI(theta1):
        if theta0 < theta1:
            theta0= moduloAB(theta0, -PI, PI)
            theta1= moduloAB(theta1, -PI, PI)
            if theta0*theta1>0:
                raise ValueError('incorrect angle coordinates for ribbon')

        nr=int(40*(theta0-theta1)/PI)
        if nr<=2: nr=3
        theta=np.linspace(theta0, theta1, nr)
        pts=np.exp(1j*theta)# points on arc in polar complex form

        string_arc=''
        for k in range(len(theta)):
            string_arc+='L '+str(pts.real[k])+', '+str(pts.imag[k])+' '
        return   string_arc
    else:
        raise ValueError('the angle coordinates for an arc side of a ribbon must be in [0, 2*pi]')

make_ribbon_arc(np.pi/3, np.pi/6)

'L 0.5000000000000001, 0.8660254037844386 L 0.5877852522924732, 0.8090169943749473 L 0.6691306063588583, 0.7431448254773941 L 0.7431448254773942, 0.6691306063588581 L 0.8090169943749475, 0.5877852522924731 L 0.8660254037844387, 0.49999999999999994 '

In [348]:
def make_layout(title, plot_size):
    axis = dict(showline=False,  # hide axis line, grid, ticklabels and  title
                zeroline=False,
                showgrid=False,
                showticklabels=False,
                title=''
                )

    return go.Layout(title=dict(
                    text=title, font = dict(
                        family = 'Droid',
                        size = 20)),
                     xaxis=dict(axis),
                     yaxis=dict(axis),
                     showlegend=True,
                     width=plot_size+75,
                    #  paper_bgcolor='#FFFFFF	',
                     plot_bgcolor='#FFFFFF',
                     height=plot_size,
                     margin=dict(t=25, b=25, l=25, r=25),
                     hovermode='closest'  # to this list one appends below the dicts defining the ribbon,
                     #respectively the ideogram shapes
                     )


In [349]:
def make_ideo_shape(path, line_color, fill_color):
    #line_color is the color of the shape boundary
    #fill_collor is the color assigned to an ideogram
    return dict(
        line=dict(
            color=line_color,
            width= 1
        ),

        path=path,
        type='path',
        fillcolor=fill_color,
        layer='below'
    )


In [350]:
def make_ribbon(l, r, line_color, fill_color, radius=0.2):
    #l=[l[0], l[1]], r=[r[0], r[1]]  represent the opposite arcs in the ribbon 
    #line_color is the color of the shape boundary
    #fill_color is the fill color for the ribbon shape
    poligon=ctrl_rib_chords(l,r, radius)
    b,c =poligon
    b = list(b)
    c = list(c)
    # print(b,c)

    return  dict(
                line=dict(
                color=line_color, width=0.5
            ),
            path=  make_q_bezier(b)+make_ribbon_arc(r[0], r[1])+
                   make_q_bezier(c[::-1])+make_ribbon_arc(l[1], l[0]),
            type='path',
            fillcolor=fill_color,
            layer='below'
        )

def make_self_rel(l, line_color, fill_color, radius):
    #radius is the radius of Bezier control point b_1
    b=control_pts([l[0], (l[0]+l[1])/2, l[1]], radius)
    return  dict(
                line=dict(
                color=line_color, width=0.5
            ),
            path=  make_q_bezier(b)+make_ribbon_arc(l[1], l[0]),
            type='path',
            fillcolor=fill_color,
            layer='below'
        )

def invPerm(perm):
    # function that returns the inverse of a permutation, perm
    inv = [0] * len(perm)
    for i, s in enumerate(perm):
        inv[s] = i
    return inv

layout=make_layout('Chord diagram of genres', 800)
# layout['shapes']

In [351]:
# these value are set after a few trials
radii_sribb = [0.4, 0.30, 0.35, 0.39, 0.12]


In [352]:
ribbon_info = []
for k in range(L):

    sigma = idx_sort[k]
    sigma_inv = invPerm(sigma)
    for j in range(k, L):
        if genre_matrix[k][j] == 0 and genre_matrix[j][k] == 0:
            continue
        eta = idx_sort[j]
        eta_inv = invPerm(eta)
        l = ribbon_ends[k][sigma_inv[j]]

        if j == k:
            # print(make_self_rel(l, 'rgb(175,175,175)',
            #                    ideo_colors[k], radius=radii_sribb[k]))
            layout['shapes'] = layout['shapes'] + (make_self_rel(l, 'rgb(139,137,137)',
                                                                ideo_colors[k], radius=radii_sribb[k]),)
            z = 0.9*np.exp(1j*(l[0]+l[1])/2)
            #the text below will be displayed when hovering the mouse over the ribbon
            text = genre_list[k]+' has ' + \
                '{:d}'.format(genre_matrix[k][k])+' number of coocurence with itself ',
            ribbon_info.append(go.Scatter(x=[z.real],
                                          y=[z.imag],
                                          mode='markers',
                                          showlenged = False,
                                          marker=dict(
                                              size=0.5, color=ideo_colors[k]),
                                          text=text,
                                          hoverinfo='text'
                                          )
                               )
        else:
            r = ribbon_ends[j][eta_inv[k]]
            zi = 0.9*np.exp(1j*(l[0]+l[1])/2)
            zf = 0.9*np.exp(1j*(r[0]+r[1])/2)
            #texti and textf are the strings that will be displayed when hovering the mouse
            #over the two ribbon ends
            texti = genre_list[k]+' genre has ' + '{:d}'.format(genre_matrix[k][j])+' occurences with ' +\
                genre_list[j] + ' genre',

            textf = genre_list[j]+' genre has ' + '{:d}'.format(genre_matrix[j][k])+' occurences with ' +\
                genre_list[k] + ' genre',
            ribbon_info.append(go.Scatter(x=[zi.real],
                                          y=[zi.imag],
                                          mode='markers',
                                          showlegend= False,
                                          marker=dict(
                                              size=0.5, color=ribbon_color[k][j]),
                                          text=texti,
                                          hoverinfo='text'
                                          )
                               ),
            ribbon_info.append(go.Scatter(x=[zf.real],
                                          y=[zf.imag],
                                          mode='markers',
                                          showlegend = False,
                                          marker=dict(
                                              size=0.5, color=ribbon_color[k][j]),
                                          text=textf,
                                          hoverinfo='text'
                                          )
                               )
            # IMPORTANT!!!  Reverse these arc ends because otherwise you get
            r = (r[1], r[0])
            # a twisted ribbon
            # print(make_ribbon(
            #     l, r, 'rgb(175,175,175)', ribbon_color[k][j]))
            #append the ribbon shape
            layout['shapes'] = layout['shapes'] + (make_ribbon(
                l, r, 'rgb(139,137,137)', ribbon_color[k][j]),)


In [355]:
# import plotly.offline as off
ideograms = []
for k in range(len(ideo_ends)):
    z = make_ideogram_arc(1.1, ideo_ends[k])
    zi = make_ideogram_arc(1.0, ideo_ends[k])
    m = len(z)
    n = len(zi)
    ideograms.append(go.Scatter(x=z.real,
                                y=z.imag,
                                mode='lines',
                                showlegend = True,
                                name = genre_list[k],
                                line=dict(
                                    color=ideo_colors[k], shape='spline', width=1),
                                text=genre_list[k]+'<br>' +
                                '{:d}'.format(row_sum[k]),
                                hoverinfo='text'
                                )
                     )

    path = 'M '
    for s in range(m):
        path += str(z.real[s])+', '+str(z.imag[s])+' L '

    Zi = np.array(zi.tolist()[::-1])

    for s in range(m):
        path += str(Zi.real[s])+', '+str(Zi.imag[s])+' L '
    path += str(z.real[0])+' ,'+str(z.imag[0])

    layout['shapes']= layout['shapes'] + ((make_ideo_shape(
        path, 'rgb(139,137,137)', ideo_colors[k])),)

data = go.Data(ideograms+ribbon_info)
fig = go.Figure(data=data, layout=layout)

fig.show()


plotly.graph_objs.Data is deprecated.
Please replace it with a list or tuple of instances of the following types
  - plotly.graph_objs.Scatter
  - plotly.graph_objs.Bar
  - plotly.graph_objs.Area
  - plotly.graph_objs.Histogram
  - etc.


