In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sklearn
from sklearn.decomposition import TruncatedSVD
from sklearn.feature_extraction.text import TfidfVectorizer 
from sklearn.feature_extraction.text import CountVectorizer 
from sklearn.preprocessing import Normalizer
from sklearn import metrics
import re

In [2]:
artsy = pd.read_csv('/Users/meghajain/Desktop/full_art.csv', encoding = "ISO-8859-1")

In [3]:
artsy.head()

Unnamed: 0,artist,brief_descript,dates,full_descript,image,link,location,title,venue,COLOR_PROFILE_RGB
0,Luis Barragan,'Architecture of Color: The Legacy of Luis Bar...,Sep 23rd _ëÔëÊ_ Nov 19th,Luis BarragaéÎn (1902-1988) was one of the g...,http://www.timothytaylor.com/site/assets/files...,https://www.artsy.net/show/timothy-taylor-arch...,"New York, Timothy Taylor 16Õ_ëÕ_ÕÎ34 515 West...",Architecture of Color: The Legacy of Luis Barr...,TIMOTHY TAYLOR,"[[151, 115, 108], [98, 59, 47], [190, 185, 183]]"
1,IL LEE,IL LEE: New Paintings / 40 Years in New York i...,Sep 22nd _ëÔëÊ_ Nov 19th,IL LEE: New Paintings / 40 Years in New York\r...,https://d32dm0rphc51dk.cloudfront.net/ToAAoBrw...,https://www.artsy.net/show/art-projects-intern...,"New York, 434 Greenwich Street Ground Floor",IL LEE: New Paintings / 40 Years in New York,ART PROJECTS INTERNATIONAL,"[[55, 37, 137], [205, 195, 190], [126, 122, 173]]"
2,"CHRISTOPHE AVELLA-BAGUR, LAUREN MARSOLIER",Created from many photographs captured in a va...,Oct 12th _ëÔëÊ_ Nov 20th,"Galerie Richard, New York is pleased to presen...",https://d32dm0rphc51dk.cloudfront.net/O1zHrSEn...,https://www.artsy.net/show/galerie-richard-lau...,"New York, 121 Orchard Street",Lauren Marsolier : Dislocation,GALERIE RICHARD,"[[95, 99, 72], [210, 222, 233], [243, 243, 243]]"
3,BERNIE TAUPIN,Bernie Taupin is a British-born American artis...,Nov 4th _ëÔëÊ_ 30th,There is some obvious synergy between Waterhou...,https://d32dm0rphc51dk.cloudfront.net/tATPB1TH...,https://www.artsy.net/show/waterhouse-and-dodd...,"New York, 960 Madison Avenue 2nd Floor",Bernie Taupin: Antiphona,WATERHOUSE & DODD,"[[136, 120, 117], [33, 31, 35], [235, 235, 235]]"
4,Multiple,Rehs Contemporary is proud to work side by sid...,Oct 22nd _ëÔëÊ_ Nov 18th,"NEW YORK CITY, 6/9/16 - Rehs Contemporary is p...",https://d32dm0rphc51dk.cloudfront.net/PAPHBMNZ...,https://www.artsy.net/show/rehs-contemporary-g...,"New York, 5 E 57th St",ARC Select 2016,REHS CONTEMPORARY GALLERIES,"[[186, 185, 175], [75, 50, 39], [146, 124, 102]]"


## Cleaning

In [4]:
artsy.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 230 entries, 0 to 229
Data columns (total 10 columns):
artist               229 non-null object
brief_descript       213 non-null object
dates                230 non-null object
full_descript        230 non-null object
image                229 non-null object
link                 181 non-null object
location             230 non-null object
title                230 non-null object
venue                229 non-null object
COLOR_PROFILE_RGB    230 non-null object
dtypes: object(10)
memory usage: 18.0+ KB


In [5]:
#based on researching missing values from line 4
artsy.loc[artsy['artist'].isnull(), 'artist'] = 'Multiple'

In [6]:
#based on researching missing value from line 4
artsy.loc[artsy['venue'].isnull(), 'venue'] = 'Eykyn Maclean'

In [7]:
#standardize formatting
artsy['artist'] = artsy['artist'].apply(lambda x: x.title())
artsy['venue'] = artsy['venue'].apply(lambda x: x.title())
artsy['title'] = artsy['title'].apply(lambda x: x.title())

### Clean exhibit end date

In [8]:
def end_date(s):
    if ' _ëÔëÊ_ ' in s:
        return s.split(" _ëÔëÊ_ ")[1]
    elif 'to ' in s:
        if '\r' in s:
            extra = s.split(" to ")[1]
            return extra.split('\r')[0]
        else:
            return s.split(" to ")[1]
    elif 'Õ_ë¢ëÔÕ_ëÔÕ±' in s:
        return s.split("Õ_ë¢ëÔÕ_ëÔÕ±")

In [9]:
artsy['end_date'] = artsy['dates'].apply(end_date)

## Textblob preprocessing

In [10]:
from textblob import TextBlob
from textblob import Word

In [11]:
import re
months = ['january', 'february', 'march', 'april', 'may', 'june', 'july', 'august', 'september', 'october', 'november', 'december']
days = ['monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday']

def split_into_lemmas(message):
    message = re.sub(r'[^a-zA-Z0-9\s\p{P}]', "", message)
    message = message.lower()
    words = list(TextBlob(message).words) + list(TextBlob(message).noun_phrases)
    cleaned_words = []
    for word in words:
        if re.search(r'^art|exhibit|collection[a-zA-Z]*|museum[a-zA-Z]*|county|gallery|opening|present[a-zA-Z]*|york|ny[c]*|department|fund|partnership|foundation|event|^work|commission|award|university|college|center|bfa|mfa|exhibition|recent|solo|currently|pleased', word):
            continue
        elif word in months or word in days or word == 'pm' or word == "New York":
            continue
        else:
            try: 
                if int(word) > 2000 or int(word) < 32: 
                    continue
                else:
                    cleaned_words.append(word)
            except:
                cleaned_words.append(word)
    # for each word, take its "base form" = lemm
    words = TextBlob(' '.join(cleaned_words)).words
    return [word.lemma for word in words]

In [12]:
vectorizer_lemm = TfidfVectorizer(strip_accents = "unicode", tokenizer = split_into_lemmas, ngram_range = (1,2), stop_words = 'english', min_df = 0.05, max_df = 0.5)

In [13]:
dtm_lemm = vectorizer_lemm.fit_transform(artsy['full_descript'])



In [14]:
vectorizer_lemm.get_feature_names()

['1960s',
 '20th',
 'abstract',
 'abstraction',
 'accompanied',
 'act',
 'action',
 'addition',
 'address',
 'aesthetic',
 'age',
 'alongside',
 'american',
 'ancient',
 'animal',
 'announce',
 'appear',
 'appears',
 'approach',
 'architectural',
 'architecture',
 'archive',
 'art',
 'artist',
 'artwork',
 'aspect',
 'associated',
 'attempt',
 'attention',
 'b',
 'based',
 'beauty',
 'began',
 'beginning',
 'best',
 'black',
 'body',
 'bold',
 'book',
 'born',
 'boundary',
 'box',
 'bring',
 'brooklyn',
 'build',
 'building',
 'california',
 'came',
 'canvas',
 'capture',
 'captured',
 'career',
 'catalogue',
 'celebrated',
 'celebrates',
 'central',
 'century',
 'change',
 'character',
 'city',
 'collaboration',
 'collage',
 'collective',
 'color',
 'combine',
 'come',
 'commitment',
 'common',
 'community',
 'complete',
 'complex',
 'composition',
 'comprehensive',
 'conceptual',
 'condition',
 'connection',
 'contemporary',
 'contemporary art',
 'content',
 'context',
 'continue',
 

### Looking at what the top words are

In [15]:
vocab = pd.DataFrame(dtm_lemm.toarray(), index=artsy['full_descript'], columns=vectorizer_lemm.get_feature_names()).head(10)

In [16]:
vocab.loc['Total']= vocab.sum()

In [17]:
total = vocab.loc['Total']

In [18]:
total.sort_values(ascending=False).head(10)

painting        0.951811
color           0.824969
practice        0.794917
space           0.761907
visual          0.748372
human           0.711177
landscape       0.675289
wall            0.673884
form            0.637751
contemporary    0.629627
Name: Total, dtype: float64

# NMF

In [19]:
from sklearn.decomposition import NMF

In [20]:
nmf_model = NMF(10)
dtm_nmf = nmf_model.fit_transform(dtm_lemm)
dtm_nmf = Normalizer(copy=False).fit_transform(dtm_nmf)

In [21]:
### create similarity matrix to find 'related' exhibits
similarity = np.asarray(np.asmatrix(dtm_nmf) * np.asmatrix(dtm_nmf).T) 
sim_df = pd.DataFrame(similarity.round(6),index=artsy[['title', 'venue']], columns=artsy[['title', 'venue']])

In [22]:
### rename index and columns with venue and exhibit title 
sim_df.index = sim_df.index.map(lambda x:(x[0].title(), x[1].title()))
sim_df.columns = sim_df.columns.map(lambda x:(x[0].title(), x[1].title()))

In [23]:
sim_df.to_csv('/Users/meghajain/Desktop/final_sim.csv')

### Find the top 3 related shows

In [24]:
sim_df.head()

Unnamed: 0_level_0,"('Architecture Of Color: The Legacy Of Luis Barragaõ_Õ_Ëôë\x81N', 'Timothy Taylor')","('Il Lee: New Paintings / 40 Years In New York', 'Art Projects International')","('Lauren Marsolier : Dislocation', 'Galerie Richard')","('Bernie Taupin: Antiphona', 'Waterhouse & Dodd')","('Arc Select 2016', 'Rehs Contemporary Galleries')","('Elliott Erwitt: Kolor', 'Edwynn Houk Gallery')","('Jane Lee: Wall Matters', 'Sundaram Tagore Gallery')","('Jeremy Penn La Petite Mort', ""Galleria Ca' D'Oro"")","('Raising A Riot | Jc Lenochan', 'Project For Empty Space')","('Nathan Chantob Temporary Exhibition', ""Art D'Aurelle"")",...,"('John Mclaughlin Ëôë_Õ__Õ_Ë\x8fMarvelous Voidëôë_Õ__Ëôë\x9d', 'Van Doren Waxter')","('Laura Mcclanahan Ëôë_Õ__Õ_Ë\x8fCosmic Consciousnessëôë_Õ__Ëôë\x9d', 'Ashok Jain Gallery')","('Annie Kevans Exhibition', 'Danziger Gallery')","('Yurim Cha Ëôë_Õ__Õ_Ë\x8fMy Essayëôë_Õ__Ëôë\x9d', 'Gallery Dëôë_Õ__Ëôõ_Arte')","('Conrad Ventur Ëôë_Õ__Õ_Ë\x8fIvyëôë_Õ__Ëôë\x9d', 'Baxter Street/ The Camera Club Of Ny')","('Ëôë_Õ__Õ_Ë\x8fThe Animal Mirrorëôë_Õ__Ëôë\x9d Exhibition', 'The International Studio & Curatorial Program')","('Liu Wei Exhibition', 'Lehmann Maupin (536 W 22Nd Street)')","('Alison Knowles & Jessica Dickinson Exhibition', 'James Fuentes Llc')","('Ëôë_Õ__Õ_Ë\x8f12Th Annual National Alternative Processes Competitionëôë_Õ__Ëôë\x9d Exhibition', 'Soho Photo Gallery')","('Ëôë_Õ__Õ_Ë\x8fParanoia Man In A Rat Fink Roomëôë_Õ__Ëôë\x9d Exhibition', 'Storefront For Art And Architecture')"
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"('Architecture Of Color: The Legacy Of Luis Barragaõ_Õ_Ëôë\x81N', 'Timothy Taylor')",1.0,0.44305,0.199221,0.219994,0.665943,0.11159,0.472758,0.432411,0.753973,0.54186,...,0.065453,0.085435,0.319474,0.002106,0.218794,0.840978,0.020006,0.809239,0.022967,0.701119
"('Il Lee: New Paintings / 40 Years In New York', 'Art Projects International')",0.44305,1.0,0.034619,0.113063,0.54035,0.231403,0.562195,0.447933,0.116426,0.208587,...,0.306327,0.759119,0.323509,0.215445,0.137023,0.146807,0.173438,0.267155,0.023895,0.17744
"('Lauren Marsolier : Dislocation', 'Galerie Richard')",0.199221,0.034619,1.0,0.425901,0.411019,0.865876,0.040655,0.758703,0.004235,0.496716,...,0.107292,0.253998,0.68078,0.002324,0.440804,0.21579,0.0,0.475732,0.549574,0.075079
"('Bernie Taupin: Antiphona', 'Waterhouse & Dodd')",0.219994,0.113063,0.425901,1.0,0.521016,0.257284,0.092707,0.399154,0.093755,0.471687,...,0.558381,0.171792,0.242703,0.005438,0.075568,0.232628,0.000797,0.469124,0.644437,0.051966
"('Arc Select 2016', 'Rehs Contemporary Galleries')",0.665943,0.54035,0.411019,0.521016,1.0,0.181197,0.100905,0.464869,0.151227,0.724177,...,0.176804,0.199275,0.352251,0.025679,0.140512,0.41312,0.018073,0.750515,0.088115,0.168416


In [27]:
top_3 = []
for index,item in enumerate(sim_df["('Shozo Shimamoto | Do Something Interesting, See Something Odd', 'De Buck Gallery')"].sort_values(ascending=False).head(4).index):
    print (index, item)
    if index > 0:
        top_3.append(item)

0 ('Shozo Shimamoto | Do Something Interesting, See Something Odd', 'De Buck Gallery')
1 ('Francis Picabia: Our Heads Are Round So Our Thoughts Can Change Direction', 'The Museum Of Modern Art')
2 ('The Indian Summer Show', 'Dm Contemporary')
3 ('Voulkos: The Breakthrough Years', 'Museum Of Arts And Design')


In [28]:
top_3

["('Francis Picabia: Our Heads Are Round So Our Thoughts Can Change Direction', 'The Museum Of Modern Art')",
 "('The Indian Summer Show', 'Dm Contemporary')",
 "('Voulkos: The Breakthrough Years', 'Museum Of Arts And Design')"]

### Find extracted topics and top exhibits for each topic

In [30]:
def print_top_words(model, feature_names, n_top_words):
    for topic_idx, topic in enumerate(model.components_):
        print("Topic #%d:" % topic_idx)
        print(", ".join([feature_names[i]
                        for i in topic.argsort()[:-n_top_words - 1:-1]]))
    print()

In [31]:
def nmf_mod(corp):
    max_val = .25
    min_val = 0.05
    ngram_range_val = (1,2)
    n_topics = 10
    n_top_words = 30
    
    # Use tf-idf features for NMF.
    print("Extracting tf-idf features for NMF...")
    vectorizer_nmf = TfidfVectorizer(tokenizer = split_into_lemmas, ngram_range = ngram_range_val, stop_words = 'english', min_df = min_val, max_df = max_val)
    dtm_nmf = vectorizer_nmf.fit_transform(corp)


    # Fit the NMF model
    print("Fitting the NMF model with tf-idf features")

    nmf = NMF(n_components=n_topics, random_state=1).fit(dtm_nmf)
    
    print(nmf.components_)
    
    print("\nTopics in NMF model:")
    tfidf_feature_names = vectorizer_nmf.get_feature_names()
    return print_top_words(nmf, tfidf_feature_names, n_top_words), nmf.components_

In [32]:
artsy['full_descript'].head(1)

0    Luis BarragaéÎn (1902-1988) was one of the g...
Name: full_descript, dtype: object

In [33]:
nmf_mod(artsy['full_descript'])

Extracting tf-idf features for NMF...
Fitting the NMF model with tf-idf features
[[ 0.          0.01786784  0.03687963 ...,  0.03597128  0.          0.02598221]
 [ 0.03015685  0.01715875  0.01485527 ...,  0.          0.          0.        ]
 [ 0.01638094  0.00673816  0.         ...,  0.04246458  0.00184799  0.        ]
 ..., 
 [ 0.00125136  0.          0.         ...,  0.04114987  0.01419915
   0.01155647]
 [ 0.02083824  0.0197261   0.00440316 ...,  0.10396882  0.07201462
   0.0090064 ]
 [ 0.00023899  0.01803756  0.05147248 ...,  0.00857881  0.00766181
   0.01549872]]

Topics in NMF model:
Topic #0:
installation, video, wall, research, social, b, film, dream, project, technology, culture, experience, medium, popular, viewer, floor, political, visual, date, cultural, critical, collective, sculptural, process, box, common, environment, practice, particular, piece
Topic #1:
american, painter, visual, culture, john, black, gallery, expressionism, america, history, abstraction, watercolor, 

(None, array([[ 0.        ,  0.01786784,  0.03687963, ...,  0.03597128,
          0.        ,  0.02598221],
        [ 0.03015685,  0.01715875,  0.01485527, ...,  0.        ,
          0.        ,  0.        ],
        [ 0.01638094,  0.00673816,  0.        , ...,  0.04246458,
          0.00184799,  0.        ],
        ..., 
        [ 0.00125136,  0.        ,  0.        , ...,  0.04114987,
          0.01419915,  0.01155647],
        [ 0.02083824,  0.0197261 ,  0.00440316, ...,  0.10396882,
          0.07201462,  0.0090064 ],
        [ 0.00023899,  0.01803756,  0.05147248, ...,  0.00857881,
          0.00766181,  0.01549872]]))

In [34]:
topics, components = nmf_mod(artsy['full_descript'])

Extracting tf-idf features for NMF...
Fitting the NMF model with tf-idf features
[[ 0.          0.01786784  0.03687963 ...,  0.03597128  0.          0.02598221]
 [ 0.03015685  0.01715875  0.01485527 ...,  0.          0.          0.        ]
 [ 0.01638094  0.00673816  0.         ...,  0.04246458  0.00184799  0.        ]
 ..., 
 [ 0.00125136  0.          0.         ...,  0.04114987  0.01419915
   0.01155647]
 [ 0.02083824  0.0197261   0.00440316 ...,  0.10396882  0.07201462
   0.0090064 ]
 [ 0.00023899  0.01803756  0.05147248 ...,  0.00857881  0.00766181
   0.01549872]]

Topics in NMF model:
Topic #0:
installation, video, wall, research, social, b, film, dream, project, technology, culture, experience, medium, popular, viewer, floor, political, visual, date, cultural, critical, collective, sculptural, process, box, common, environment, practice, particular, piece
Topic #1:
american, painter, visual, culture, john, black, gallery, expressionism, america, history, abstraction, watercolor, 

In [42]:
topic_4= [[x,y] for x,y in zip(artsy.title, components[4])]
topic_4.sort(key=lambda x:x[1], reverse=True)
topic_4

[['John Aslanidis: Sonic New Wave', 0.14171098585423333],
 ['Arc Select 2016', 0.12791228564308424],
 ['Ieva Epnere: Sea Of Living Memories', 0.1207934587172468],
 ['City Abstractions', 0.11875277602509862],
 ['To Have And Not To Hold', 0.095592578257888319],
 ['Fahamu Pecou: #Blackmatterlives', 0.094287086443430895],
 ['Memory And Identity', 0.088722338109600105],
 ['Hiding In Plain Sight', 0.085847933424417447],
 ['Jan Van Der Ploeg: Lyrics', 0.081301693484601117],
 ['Observer', 0.079603994459849864],
 ['Elmgreen And Dragset, Changing Subjects', 0.077161133580827046],
 ['Max Beckmann', 0.074795934171074357],
 ['Diane Arbus', 0.073561379386969758],
 ['Slim Aarons: Photographer', 0.070351657421796401],
 ['R. Luke Dubois: "The Choice Is Yours"', 0.068575738373029951],
 ['Wanderlust', 0.064912917106419485],
 ['Conrad Ventur Ëôë_Õ__Õ_Ë\x8fIvyëôë_Õ__Ëôë\x9d', 0.06052074908087874],
 ['Ivan Navarro: Mute Parade', 0.060166487539417023],
 ['Adolfo Doring Posted 162 Grams', 0.058797604022493476

In [43]:
topic_1 = [[x,y] for x,y in zip(artsy.title, components[4])]
topic_1.sort(key=lambda x:x[1], reverse=True)
topic_1

[['John Aslanidis: Sonic New Wave', 0.14171098585423333],
 ['Arc Select 2016', 0.12791228564308424],
 ['Ieva Epnere: Sea Of Living Memories', 0.1207934587172468],
 ['City Abstractions', 0.11875277602509862],
 ['To Have And Not To Hold', 0.095592578257888319],
 ['Fahamu Pecou: #Blackmatterlives', 0.094287086443430895],
 ['Memory And Identity', 0.088722338109600105],
 ['Hiding In Plain Sight', 0.085847933424417447],
 ['Jan Van Der Ploeg: Lyrics', 0.081301693484601117],
 ['Observer', 0.079603994459849864],
 ['Elmgreen And Dragset, Changing Subjects', 0.077161133580827046],
 ['Max Beckmann', 0.074795934171074357],
 ['Diane Arbus', 0.073561379386969758],
 ['Slim Aarons: Photographer', 0.070351657421796401],
 ['R. Luke Dubois: "The Choice Is Yours"', 0.068575738373029951],
 ['Wanderlust', 0.064912917106419485],
 ['Conrad Ventur Ëôë_Õ__Õ_Ë\x8fIvyëôë_Õ__Ëôë\x9d', 0.06052074908087874],
 ['Ivan Navarro: Mute Parade', 0.060166487539417023],
 ['Adolfo Doring Posted 162 Grams', 0.058797604022493476

In [44]:
topic_5 = [[x,y] for x,y in zip(artsy.title, components[5])]
topic_5.sort(key=lambda x:x[1], reverse=True)
topic_5

[['Cecily Brown: Rehearsal', 0.24308787175601915],
 ['Pipilotti Rist: Pixel Forest', 0.16272419434449509],
 ['Just What Is It, Part 1', 0.15250875950437598],
 ['True Colors', 0.15068317708137052],
 ['Gioielli In Fermento', 0.13838684121292261],
 ['Sons Of Capital', 0.12791389485088495],
 ['Gcc: Positive Pathways (+)', 0.12080465929539874],
 ["Take Me (I'M Yours)", 0.10890807057510195],
 ['Popel', 0.085614745921551105],
 ["Women! Women! (Of The 50'S)", 0.083013895096034393],
 ['Pipilotti Rist: Pixel Forest', 0.078698934570632845],
 ['April Gornik - Recent Paintings And Drawings', 0.077139468218196811],
 ['Ivan Navarro: Mute Parade', 0.07233551015667021],
 ['A Portrait Of The Noguchi Museum: Tina Barney And Stephen Shore',
  0.071261565201436589],
 ['Code', 0.065286912091790592],
 ['Dimorestudio + Callidus Guild', 0.065263225039347728],
 ['Depend On The Morning Sun', 0.06442702514555082],
 ['Claude Lalanne: Bijoux', 0.064384993171551788],
 ['Ena Swansea: New Paintings', 0.063687632400806

In [45]:
topic_9 = [[x,y] for x,y in zip(artsy.title, components[9])]
topic_9.sort(key=lambda x:x[1], reverse=True)
topic_9

[['About New York', 1.0190885328755552],
 ['Pipilotti Rist: Pixel Forest', 0.15018952474236646],
 ['Hurvin Anderson Ëôë_Õ__Õ_Ë\x8fForeign Bodyëôë_Õ__Ëôë\x9d',
  0.13978113907436168],
 ['Ëôë_Õ__Õ_Ë\x8fThe Animal Mirrorëôë_Õ__Ëôë\x9d Exhibition',
  0.11577711539615151],
 ['Gustav Klimt And The Women Of Viennaõ_Ë¢Ëôõ_Ëôë_S Golden Age, 1900-1918',
  0.10911465501625048],
 ['Popel', 0.10830035695393875],
 ['Cecily Brown: Rehearsal', 0.092441219633882715],
 ['Ëôë_Õ__Õ_Ë\x8f12Th Annual National Alternative Processes Competitionëôë_Õ__Ëôë\x9d Exhibition',
  0.084513370720099401],
 ['Matt Mullican Ëôë_Õ__Õ_Ë\x8fPantagraphëôë_Õ__Ëôë\x9d',
  0.084387316120545994],
 ['Ieva Epnere: Sea Of Living Memories', 0.077132945116406101],
 ['Mike Kelley Ëôë_Õ__Õ_Ë\x8fMemory Wareëôë_Õ__Ëôë\x9d', 0.076839607831913195],
 ['Kai Althoff: And Then Leave Me To The Common Swifts', 0.076222865525509026],
 ['Agnes Martin', 0.075011575185421051],
 ['Christophe Von Hohenberg: Another Planet: Photographs Of The New York 

In [46]:
topic_2 = [[x,y] for x,y in zip(artsy.title, components[2])]
topic_2.sort(key=lambda x:x[1], reverse=True)
topic_2
topic_2[:5]

[['Carmen Herrera: Lines Of Sight', 0.85455397583032588],
 ['Beverly Buchanan_Ëôëêõ_Ëîruins And Rituals', 0.14285111713513401],
 ['In And Out Of Context: Asia Society Celebrates The Collections At 60',
  0.078583672055392947],
 ['Who Shot Sports: A Photographic History, 1843 To The Present',
  0.066876937947300749],
 ['Fdic Insured', 0.059180882515638966]]