In [1]:
from gensim import corpora, models, similarities
import pandas as pd

In [2]:
dict_fabric = corpora.Dictionary.load("../lsaModels/dictionary_fabric")
lsi_fabric = models.LsiModel.load("../lsaModels/lsi_300_fabric")

In [3]:
lsi_fabric.print_topics()

[u'-0.493*"change" + -0.337*"color" + -0.289*"clothing" + -0.271*"clothes" + -0.198*"fabric" + -0.182*"make" + -0.181*"display" + -0.161*"colors" + -0.147*"mood" + -0.133*"changing"',
 u'-0.470*"movies" + -0.354*"watch" + 0.317*"change" + -0.266*"show" + 0.245*"color" + -0.218*"screen" + 0.206*"clothes" + -0.171*"fabric" + -0.160*"movie" + -0.155*"display"',
 u'0.538*"clothing" + -0.375*"movies" + 0.237*"display" + -0.235*"watch" + -0.221*"clothes" + -0.212*"change" + -0.201*"show" + 0.198*"advertising" + -0.196*"color" + 0.194*"interactive"',
 u'0.561*"clothing" + 0.352*"movies" + 0.273*"show" + -0.264*"movie" + -0.228*"screen" + -0.224*"make" + 0.170*"watch" + -0.155*"fabric" + -0.147*"wall" + -0.138*"portable"',
 u'0.424*"screen" + 0.418*"movie" + -0.335*"show" + 0.311*"clothing" + -0.251*"interactive" + -0.205*"games" + -0.194*"make" + -0.164*"game" + 0.156*"tv" + 0.138*"color"']

In [4]:
lsi_fabric.num_topics

200

In [5]:
# method 1: put all into one big bag of words and do queries that way. corpus is the paths
paths = pd.read_csv("../topicWords/fabric_paths_smaller.csv")

In [6]:
paths['allwords'] = [p.encode('utf-8', 'ignore') for p in paths['allwords']]

In [7]:
path_corpus = [dict_fabric.doc2bow(p.split()) for p in paths['allwords']]
path_corpus[0]

[(9, 1),
 (28, 1),
 (32, 1),
 (173, 2),
 (385, 1),
 (481, 1),
 (505, 1),
 (586, 1),
 (602, 1),
 (664, 1),
 (1289, 1),
 (1432, 2),
 (1433, 1),
 (1434, 1),
 (1443, 1),
 (1632, 1),
 (1891, 1)]

In [8]:
index = similarities.MatrixSimilarity(lsi_fabric[path_corpus])
index



<gensim.similarities.docsim.MatrixSimilarity at 0x11178a390>

In [9]:
index.index

array([[-0.29538864,  0.03366122,  0.0257112 , ..., -0.02557454,
         0.00967514, -0.02686297],
       [-0.12921008, -0.09690642,  0.15108968, ...,  0.09126988,
        -0.0397026 ,  0.04123164],
       [-0.1019632 , -0.17350984, -0.03546882, ...,  0.02731007,
         0.05460421,  0.00503987],
       [-0.17824575,  0.05087417, -0.04662385, ...,  0.01324272,
        -0.00159962,  0.01131967],
       [-0.24498549, -0.16139869,  0.29300216, ..., -0.03072298,
        -0.01128992, -0.00553972],
       [-0.17061396, -0.13212545,  0.1837122 , ...,  0.01545442,
         0.02593173, -0.00515088]], dtype=float32)

In [10]:
# get a sample vector
def choose_best_path(ideaBag, dictionary, lsimodel, simIndex, paths):
    """Given a bag of words that comprises a set of ideas,
    return a rank ordered list of the most similar paths
    """
    
    # preprocess and project into lsi space
    vec_bow = dictionary.doc2bow(ideaBag.lower().split())
    vec_lsi = lsimodel[vec_bow]
    # print vec_lsi
    
    # get similarities
    # print simIndex
    sims = simIndex[vec_lsi]
    sims = sorted(enumerate(sims), key=lambda item: -item[1])
    
    paths['rank'] = 0
    paths['sim'] = 0.0
    for rank, docSim in enumerate(sims):
        # print docSim
        paths.set_value(docSim[0], 'rank', rank)
        paths.set_value(docSim[0], 'sim', docSim[1])
        
    paths.sort_values("rank", inplace=True)
    
    return paths

In [11]:
sim_paths = choose_best_path(paths.loc[1, 'allwords'], dict_fabric, lsi_fabric, index, paths)

In [12]:
sim_paths.sort_values("rank", inplace=True)
sim_paths

Unnamed: 0,id,path,ex1,ex2,allwords,rank,sim
1,p03,enhance the customer experience at restaurants,chef coats could display food images of the me...,Interactive menus at restaurants - press to le...,enhance the customer experience at restaurants...,0,0.941487
4,p06,make and display more effective advertisements...,Airplane draggable banner that can display any...,Personalized billboards from companies that re...,make and display more effective advertisements...,1,0.230224
5,p10,make physically interactive games for the ente...,Make soft interactive baby toys that are safe ...,Dance floor carpet that flashes the next foot ...,make physically interactive games for the ente...,2,0.21027
0,p01,create novel ways for visual artists to expres...,"Use it as a canvas for art, as it reacts to to...",It would be great for making a canvas for arti...,create novel ways for visual artists to expres...,3,0.128236
2,p04,enhance the educational experience of students...,It can be used in school as a way to show kids...,Interactive text books for students,enhance the educational experience of students...,4,0.005833
3,p05,increase productivity/efficiency in everyday life,a shirt that you can load data into the sleeve...,ability to write notes on clothes with touch s...,increase productivity/efficiency in everyday l...,5,-0.006527


In [13]:
print paths[['id', 'rank', 'sim']].to_json(orient="records")

[{"id":"p03","rank":0,"sim":0.9414870739},{"id":"p06","rank":1,"sim":0.2302244902},{"id":"p10","rank":2,"sim":0.2102696896},{"id":"p01","rank":3,"sim":0.1282364875},{"id":"p04","rank":4,"sim":0.0058327429},{"id":"p05","rank":5,"sim":-0.0065265894}]


In [14]:
potential_matches.head()

NameError: name 'potential_matches' is not defined

In [15]:
potential_matches = pd.read_csv("/Users/jchan/Google Drive/Research Docs/IdeaGens/Productive-Fixation/prototyping automated matching.csv")
potential_matches['match_p01'] = [float(i) for i in potential_matches['p01']]
potential_matches['match_p03'] = [float(i) for i in potential_matches['p03']]
potential_matches['match_p04'] = [float(i) for i in potential_matches['p04']]
potential_matches['match_p05'] = [float(i) for i in potential_matches['p05']]
potential_matches['match_p06'] = [float(i) for i in potential_matches['p06']]
potential_matches['match_p10'] = [float(i) for i in potential_matches['p10']]
for i, row in potential_matches.iterrows():
    ideaBag = " ".join([row['idea 1'], row['idea 2'], row['idea 3'], row['idea 4'], row['idea 5'], row['idea 6']])
    best_paths = choose_best_path(ideaBag, dict_fabric, lsi_fabric, index, paths)
    for pathID in best_paths['id']:
        column = "match_%s" %pathID
        value = best_paths[best_paths['id'] == pathID]['sim'].values[0] + potential_matches.loc[i, column]
        potential_matches.set_value(i, column, float(value))
    #print ideaBag
    #print best_paths[['path', 'sim']]

In [16]:
potential_matches

Unnamed: 0,idea 1,idea 2,idea 3,idea 4,idea 5,idea 6,p01,p03,p04,p05,p06,p10,match_p01,match_p03,match_p04,match_p05,match_p06,match_p10
0,"Using the fabric on jackets, to help with cold...","Using it on camping gears, like tents. Allow i...",Using the material on cars. Maybe on the car s...,Airbags on car. Maybe the material can be bett...,Use it on blankets for little children. It can...,Public transportation. Use it as a better seat.,4,3,1,3,3,1,4.080323,3.015587,1.03786,2.978731,3.15587,0.994629
1,Hang on the wall and display family pictures t...,Use as a stealth device for soldiers to get be...,"Hang on the side of a house, use as a projecto...","Dress your children in them, let them keep eac...","Be able to use your computer on the go, on any...",Use for small children to play video games on ...,2,3,4,3,3,3,2.048248,3.072212,3.987101,3.006807,3.131394,3.215973
2,Use the fabric as a projector to watch movies,Use the fabric for office presentations.,Hang the fabric on a wall as artwork.,Use the fabric as an interactive play set for ...,Use the fabric to support your team during spo...,Use the fabric as a virtual drawing board.,1,3,4,3,3,1,1.01551,3.011634,3.996046,2.999689,2.985805,1.074702
3,An electronic shirt that connects to a compute...,A kids' backpack with changeable images and co...,Wearable picture frames that change pictures t...,WiFi connectivity which could be used to searc...,Built in tools such as calculators and organiz...,Post-It Notes that would remind you of importa...,1,1,3,3,1,1,1.123065,1.216565,3.040478,3.263555,1.134559,1.002665
4,Having your laptop on the displayed on your co...,Being able to change the patterns on your clot...,Watching movies or using a tablet on the fabri...,Changing the color of your carpet or curtains ...,Using this at work to present things on your c...,To create patterns on diapers,3,2,3,1,1,1,3.29693,2.023031,3.090648,0.990136,0.979459,1.066662
5,Create an entire wardrobe form a single suit. ...,"Have walking billboards. Have commercials, and...",Use for warnings and public announcements duri...,Synchronization of clothing for public display...,Mobile social displays. Wearers can choose to ...,Emergency applications that will cause the fab...,3,4,4,1,4,4,3.177287,4.159947,4.019974,1.003061,4.409213,4.087617
6,put it on a wall as a poster that can change t...,put a picture of your spouse on it and was you...,put a picture of the family on it and use it f...,put weird pictures on it to make a great costu...,"using the touch feature, have it change pictur...",you can have different colored shirts or pants...,1,1,4,4,4,3,1.119689,1.026203,4.024745,4.116953,4.103632,3.182248
7,"When purchasing something on line, this featur...","scratch that last idea, i just realized the de...","People can make clothes, or bags with detailed...",it would be easier for people to make designs ...,people could be very creative and make things ...,the clothing industry would be able to use thi...,4,1,4,3,3,1,4.146161,1.222403,4.024818,3.047048,3.173672,1.138935
8,Wear your big work presentation on your work c...,Change the color of your clothes if you're out...,A rug shaped like a piano that plays in tune w...,Personalized billboards from companies that re...,Help advertise your new movie by wearing the t...,Have an entire room in the house be a movie pr...,1,1,1,2,4,2,1.168519,0.995969,1.029374,2.210205,4.122792,2.004479
9,It could be used as a cheaper means of televis...,It could be used to make underwater screens wh...,There are many possible military applications....,It could be used for cheap and makeshift educa...,It could usher in a new age of digital interio...,It could introduce many new possibilities for ...,4,1,2,4,2,4,4.148058,1.196253,2.032621,4.041125,2.231599,4.10801


In [17]:
potential_matches.to_csv("/Users/jchan/Google Drive/Research Docs/IdeaGens/Productive-Fixation/prototyping automated matching (algo).csv")