In [76]:
import pandas as pd
import numpy as np
import nltk 
from nltk import pos_tag
import re
import itertools
from apriori_python import apriori

clusters30 = pd.read_csv('clusters_30.csv')
clusters25 = pd.read_csv('clusters25.csv')
clusters20 = pd.read_csv('clusters_20.csv')

In [77]:
def get_clust_description(df):
    clust = np.transpose(df['cluster'].value_counts().index)
    freq = np.transpose(np.array(df['cluster'].value_counts()))
    df_freq = pd.DataFrame({"cluster" : clust, "frequency" : freq}).sort_values(by = ['frequency'], ascending=False)
    df_freq['percent'] = df_freq['frequency'] / df_freq['frequency'].sum() * 100
    df_freq_ = df_freq.head(10)
    df_freq_['percent_sum_top_n'] = df_freq_['percent'].sum()
    return df_freq_

In [78]:
def get_tokenize_noun(text):
    text_clean = text.split(', ')
    texts = []
    filters = ['NN','NNS','NNP']
    nouns = []
    for element in text_clean:
        if element is not '':
            texts.append(re.sub('[^a-zA-Z]+','',element))
    try:
        pos_texts = pos_tag(texts)
        for element in pos_texts:
            if element[1] in filters:
                nouns.append(element[0])
    except:
        nouns.append('noNoun')
    return nouns

In [79]:
def get_tokenize_adj(text):
    text_clean = text.split(', ')
    texts = []
    filters = ['JJ','JJR','JJS']
    adj = []
    for element in text_clean:
        if element is not '':
            texts.append(re.sub('[^a-zA-Z]+','',element))
    try:
        pos_texts = pos_tag(texts)
        for element in pos_texts:
            if element[1] in filters:
                adj.append(element[0])
    except:
        adj.append('noAdj')
    return adj

In [80]:
def create_adj_nouns(df):
    df['tokenize_adj'] = list(map(get_tokenize_adj,df['text']))
    df['tokenize_noun'] = list(map(get_tokenize_noun,df['text']))
    return df

In [81]:
def get_dict_desc_top(pandas_column,n):
    noun_adj = list(itertools.chain(*pandas_column))
    noun_adj = pd.DataFrame({'adj': noun_adj})
    noun_adj = pd.DataFrame(noun_adj['adj'].value_counts())
    noun_adj = noun_adj.reset_index()
    noun_adj_dict = dict(noun_adj.head(n).values)
    return noun_adj_dict

In [82]:
def get_clust_desc(df):
    noun = get_dict_desc_top(df['tokenize_noun'],20)
    adj = get_dict_desc_top(df['tokenize_adj'],20)
    return noun, adj

In [83]:
def get_tokens(text):
    try:
        tokens = text.split(' ')
    except:
        tokens = 'no, Object'
    return tokens

In [84]:
def get_rules_properties(rules):
    df = pd.DataFrame(rules)
    df = df[df[2] != 1.0]
    df = df.sort_values(by = [2], ascending=False)
    return df

In [85]:
def get_cluster_properties(cluster_df, n_cluster, min_sup, min_conf):
    df = cluster_df[cluster_df['cluster']==n_cluster]
    df = create_adj_nouns(df)
    df['tokens'] = list(map(get_tokens,df['tokens']))
    df_noun, df_adj = get_clust_desc(df)
    freqItems, rules = apriori(df['tokens'], minSup=min_sup, minConf=min_conf)
    try:
        rules_df = get_rules_properties(rules)
    except:
        rules_df = 'no rules found'
    return df_noun, df_adj, df, rules_df

In [86]:
clust20 = get_clust_description(clusters20)
clust20

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import sys


Unnamed: 0,cluster,frequency,percent,percent_sum_top_n
0,12,58755,10.842226,72.22172
1,1,52684,9.721927,72.22172
2,19,42086,7.766249,72.22172
3,6,40380,7.451436,72.22172
4,8,36250,6.689315,72.22172
5,7,34886,6.437612,72.22172
6,3,34768,6.415837,72.22172
7,16,33066,6.101762,72.22172
8,11,31932,5.892502,72.22172
9,13,26569,4.902853,72.22172


In [87]:
noun20_12, adj20_12, clust20_12, rules20_12_df = get_cluster_properties(clusters20,12,0.02,0.02)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.


In [88]:
rules20_12_df

Unnamed: 0,0,1,2
20,{reel},{ribbon},0.99858
19,{doll},{feltcraft},0.967914
18,{herb},{marker},0.868217
17,{rack},{coat},0.839111
16,{marker},{herb},0.798246
15,{block},"{building, word}",0.765892
14,{block},{building},0.765892
13,{block},{word},0.765892
12,{rose},{english},0.670244
11,{key},{fob},0.598909


In [89]:
noun20_12

{'feltcraft': 4773,
 'cushion': 3177,
 'cover': 3013,
 'ribbon': 2526,
 'block': 2281,
 'pink': 2250,
 'doll': 2244,
 'postage': 1961,
 'marker': 1824,
 'garden': 1817,
 'word': 1747,
 'building': 1747,
 'flower': 1729,
 'herb': 1677,
 'home': 1662,
 'rack': 1529,
 'nan': 1454,
 'reel': 1408,
 'doormat': 1391,
 'mat': 1376}

In [15]:
adj20_12

{'key': 2017,
 'doormat': 1707,
 'black': 1660,
 'edwardian': 1216,
 'blue': 1184,
 'poppy': 1050,
 'wooden': 781,
 'new': 697,
 'white': 649,
 'magic': 617,
 'purple': 615,
 'green': 574,
 'manual': 572,
 'natural': 558,
 'decorative': 507,
 'bundle': 500,
 'french': 464,
 'garden': 464,
 'floral': 424,
 'yellow': 348}

In [16]:
noun20_1, adj20_1, clust20_1, rules20_1_df = get_cluster_properties(clusters20,1,0.05,0.05)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.


In [17]:
rules20_1_df

Unnamed: 0,0,1,2
55,{woodland},{bag},0.967963
54,{pink},{bag},0.958996
53,{skull},{bag},0.951664
52,{apple},{bag},0.946364
51,{lunch},{bag},0.937215
50,{suki},{bag},0.91501
49,{jumbo},{bag},0.897393
48,{vintage},{bag},0.845494
47,"{bag, red}",{retrospot},0.818197
46,{red},{bag},0.809971


In [18]:
noun20_1

{'bag': 47808,
 'lunch': 14717,
 'design': 9532,
 'vintage': 8744,
 'retrospot': 6031,
 'paisley': 5953,
 'charlotte': 5882,
 'suki': 3845,
 'apple': 3300,
 'woodland': 3215,
 'polkadot': 3165,
 'pink': 2968,
 'spaceboy': 2818,
 'skull': 2442,
 'strawberry': 2271,
 'alphabet': 2190,
 'blue': 2028,
 'storage': 1934,
 'leaf': 1624,
 'shoulder': 1610}

In [19]:
adj20_1

{'jumbo': 21139,
 'black': 2310,
 'red': 1745,
 'girl': 1585,
 'scandinavian': 1320,
 'shopper': 1202,
 'white': 1086,
 'marble': 702,
 'floral': 661,
 'suki': 532,
 'skull': 413,
 'blue': 363,
 'cosmetic': 189,
 'overnight': 186,
 'save': 177,
 'soft': 103,
 'brown': 97,
 'grey': 84,
 'circular': 60,
 'lolita': 55}

In [20]:
noun20_19, adj20_19, clust20_19, rules20_19_df = get_cluster_properties(clusters20,19,0.05,0.05)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.


In [21]:
noun20_19

{'heart': 27228,
 'decoration': 8322,
 'wicker': 7032,
 'ivory': 2693,
 'bell': 2466,
 'chalkboard': 2218,
 'zinc': 2188,
 'star': 2026,
 'butterfly': 1708,
 'mirror': 1587,
 'pot': 1403,
 'wood': 1364,
 'pink': 1356,
 'trinket': 1269,
 'hanging': 1148,
 'bird': 1127,
 'basket': 1110,
 'slate': 1104,
 'wreath': 965,
 'picnic': 943}

In [22]:
adj20_19

{'small': 6470,
 'large': 6089,
 'white': 4045,
 'natural': 2643,
 'slate': 1280,
 'metal': 1113,
 'red': 1028,
 'crystal': 777,
 'decorative': 608,
 'doormat': 599,
 'green': 565,
 'cream': 536,
 'rustic': 533,
 'ivory': 487,
 'sleigh': 476,
 'hottie': 449,
 'gingham': 427,
 'single': 390,
 'filigree': 347,
 'zinc': 317}

In [23]:
rules20_19_df 

Unnamed: 0,0,1,2
37,"{slate, natural}",{chalkboard},0.930369
36,{slate},"{natural, chalkboard}",0.930369
35,{slate},{chalkboard},0.930369
34,{natural},{slate},0.902005
33,{natural},"{slate, chalkboard}",0.839198
32,{natural},{chalkboard},0.839198
31,"{small, wicker}",{heart},0.780772
30,"{large, wicker}",{heart},0.746827
29,{white},{heart},0.724598
28,{ivory},{heart},0.718239


In [68]:
noun20_6, adj20_6, clust20_6, rules20_6_df = get_cluster_properties(clusters20,6,0.05,0.05)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.


In [69]:
noun20_6

{'tin': 14350,
 'design': 11550,
 'pantry': 6212,
 'mug': 4993,
 'vintage': 4555,
 'plaster': 3733,
 'cake': 3580,
 'childrens': 3559,
 'spaceboy': 2519,
 'coffee': 2221,
 'pack': 1921,
 'wrap': 1898,
 'london': 1601,
 'apple': 1396,
 'circus': 1373,
 'biscuit': 1249,
 'knack': 1243,
 'mini': 1160,
 'tea': 1156,
 'parade': 1155}

In [70]:
adj20_6

{'girl': 4955,
 'knick': 1243,
 'magnetic': 1071,
 'magic': 884,
 'spaceboy': 776,
 'napkin': 709,
 'green': 524,
 'tea': 509,
 'ceramic': 463,
 'skull': 413,
 'save': 374,
 'poppy': 355,
 'tonic': 304,
 'queen': 279,
 'cat': 270,
 'boy': 247,
 'doormat': 235,
 'lola': 219,
 'mitt': 218,
 'slate': 203}

In [71]:
rules20_6_df

Unnamed: 0,0,1,2
62,{dolly},{girl},0.985029
61,"{set, design}",{pantry},0.977639
60,"{tin, cake}",{set},0.923182
59,{girl},{dolly},0.907997
58,"{tin, design}","{pantry, set}",0.860764
57,"{tin, design}",{pantry},0.860764
56,"{tin, design}",{set},0.860764
55,"{pantry, set}",{design},0.853820
54,"{pantry, tin, set}",{design},0.821529
53,"{pantry, tin}","{set, design}",0.821529


In [72]:
noun20_8, adj20_8, clust20_8, rules20_8_df = get_cluster_properties(clusters20,8,0.05,0.05)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.


In [73]:
noun20_8

{'christmas': 15635,
 'decoration': 5968,
 'light': 3690,
 'ribbon': 3373,
 'star': 3321,
 'tree': 3012,
 'garland': 2746,
 'heart': 2569,
 'vintage': 1967,
 'craft': 1917,
 'bell': 1455,
 'scandinavian': 1443,
 'wrap': 1264,
 'rabbit': 1260,
 'wooden': 1236,
 'sticker': 1203,
 'roll': 1201,
 'night': 1051,
 'charm': 1021,
 'top': 971}

In [74]:
adj20_8

{'wooden': 4486,
 'traditional': 3519,
 'white': 1344,
 'happy': 1065,
 'rustic': 986,
 'tree': 884,
 'light': 684,
 'green': 651,
 'd': 635,
 'little': 565,
 'red': 496,
 'table': 486,
 'angel': 402,
 'magic': 376,
 'bunny': 371,
 'bauble': 355,
 'giant': 346,
 'lantern': 341,
 'sleigh': 318,
 'wish': 274}

In [75]:
rules20_8_df

Unnamed: 0,0,1,2
11,{tree},{christmas},0.651065
10,{star},{christmas},0.649763
9,{ribbon},{christmas},0.566854
8,{decoration},{wooden},0.394605
7,{wooden},{decoration},0.333003
6,{decoration},{christmas},0.31937
5,{wooden},{christmas},0.292845
4,{christmas},{tree},0.194716
3,{christmas},{star},0.138155
2,{christmas},{wooden},0.130588


In [64]:
noun20_13, adj20_13, clust20_13, rules20_13_df = get_cluster_properties(clusters20,13,0.05,0.05)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.


In [65]:
noun20_13

{'box': 24398,
 'lunch': 2927,
 'trinket': 2810,
 'recipe': 2570,
 'book': 2196,
 'design': 1970,
 'cutlery': 1876,
 'snack': 1721,
 'round': 1690,
 'tissue': 1573,
 'retrospot': 1521,
 'pink': 1439,
 'gingham': 1372,
 'harmonica': 1291,
 'pack': 1230,
 'pantry': 1180,
 'craft': 1171,
 'woodland': 1166,
 'blue': 1153,
 'vintage': 1093}

In [66]:
adj20_13

{'red': 1877,
 'ceramic': 1648,
 'yellow': 1400,
 'victorian': 943,
 'strawberry': 770,
 'small': 697,
 'wooden': 620,
 'blue': 620,
 'sweetheart': 543,
 'green': 467,
 'bundle': 359,
 'candle': 291,
 'large': 287,
 'abc': 239,
 'pen': 223,
 'gymkhana': 215,
 'ocean': 207,
 'picnic': 203,
 'swallow': 166,
 'cosy': 159}

In [67]:
rules20_13_df

Unnamed: 0,0,1,2
71,{set},{box},0.995775
70,"{round, set, box}",{snack},0.994675
69,"{set, round}","{box, snack}",0.994675
68,"{set, round}",{snack},0.994675
64,{snack},"{set, round}",0.976758
61,{snack},{set},0.976758
62,{snack},{round},0.976758
63,{snack},{box},0.976758
65,{snack},"{round, box}",0.976758
66,{snack},"{set, box}",0.976758


In [24]:
clust25 = get_clust_description(clusters25)
clust25

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import sys


Unnamed: 0,cluster,frequency,percent,percent_sum_top_n
0,24,46460,8.573395,56.505059
1,3,35426,6.53726,56.505059
2,13,34293,6.328184,56.505059
3,11,33769,6.231489,56.505059
4,9,31149,5.748013,56.505059
5,22,29412,5.42748,56.505059
6,2,26854,4.955445,56.505059
7,21,24302,4.484517,56.505059
8,23,22454,4.1435,56.505059
9,14,22087,4.075777,56.505059


In [27]:
noun25_24, adj25_24, clust25_24, rules25_24_df = get_cluster_properties(clusters25,24,0.05,0.05)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.


In [28]:
noun25_24

{'bag': 39656,
 'lunch': 15699,
 'design': 9532,
 'vintage': 7493,
 'charlotte': 4814,
 'paisley': 4652,
 'suki': 3845,
 'box': 3545,
 'woodland': 3215,
 'polkadot': 3165,
 'apple': 3123,
 'strawberry': 2869,
 'spaceboy': 2818,
 'pink': 2749,
 'skull': 2715,
 'alphabet': 2190,
 'storage': 1934,
 'blue': 1872,
 'cutlery': 1740,
 'leaf': 1624}

In [29]:
adj25_24

{'jumbo': 18980,
 'girl': 2578,
 'black': 2310,
 'scandinavian': 1280,
 'shopper': 1202,
 'white': 960,
 'marble': 702,
 'suki': 532,
 'skull': 413,
 'blue': 257,
 'save': 177,
 'red': 134,
 'soft': 103,
 'modern': 99,
 'circular': 60,
 'lolita': 55,
 'du': 53,
 'sud': 53,
 'cosmetic': 53,
 'metalic': 37}

In [30]:
rules25_24_df

Unnamed: 0,0,1,2
43,{girl},{dolly},0.99533
42,{woodland},{bag},0.967963
41,{suki},{bag},0.91501
40,{jumbo},{bag},0.885722
39,{vintage},{bag},0.827439
38,{skull},{bag},0.819373
37,{lunch},{bag},0.774189
36,{paisley},{bag},0.734738
35,"{jumbo, vintage}",{bag},0.732354
34,{paisley},{jumbo},0.701849


In [31]:
noun25_3, adj25_3, clust25_3, rules25_3_df = get_cluster_properties(clusters25,3,0.05,0.05)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.


In [32]:
noun25_3

{'paper': 8366,
 'set': 7490,
 'christmas': 4000,
 'chain': 3420,
 'vintage': 3094,
 'jam': 2663,
 'retrospot': 2650,
 'cutter': 2575,
 'heart': 2554,
 'making': 2411,
 'cookie': 2335,
 'piece': 1666,
 'jar': 1481,
 'sticker': 1328,
 'tlights': 1208,
 'magnet': 1132,
 'stationery': 1071,
 'gift': 1053,
 'napkin': 1022,
 'kid': 1000}

In [33]:
adj25_3

{'white': 939,
 'traditional': 895,
 'kit': 850,
 'floral': 821,
 'lantern': 713,
 'd': 669,
 'light': 629,
 'napkin': 534,
 'pink': 515,
 'black': 487,
 'retrospot': 462,
 'soldier': 455,
 'skittle': 455,
 'collage': 408,
 'modern': 376,
 'colour': 376,
 'retro': 367,
 'teatime': 339,
 'girl': 283,
 'lucky': 260}

In [34]:
rules25_3_df

Unnamed: 0,0,1,2
57,{chain},"{paper, kit}",0.97193
56,{chain},{kit},0.97193
55,{chain},{paper},0.97193
54,{heart},{set},0.951449
53,"{cutter, set}",{cookie},0.906796
52,{cutter},"{cookie, set}",0.906796
51,{cutter},{cookie},0.906796
50,"{set, jam}",{making},0.90537
49,{jam},"{making, set}",0.90537
48,{jam},{making},0.90537


In [35]:
noun25_13, adj25_13, clust25_13, rules25_13_df = get_cluster_properties(clusters25,13,0.05,0.05)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.


In [36]:
noun25_13

{'heart': 19945,
 'wicker': 5995,
 'ivory': 2947,
 'photo': 2848,
 'chalkboard': 2218,
 'frame': 2142,
 'hook': 1977,
 'butterfly': 1640,
 'sweetheart': 1611,
 'mirror': 1566,
 'pink': 1559,
 'cream': 1326,
 'wood': 1320,
 'egg': 1300,
 'mini': 1290,
 'wire': 1183,
 'slate': 1104,
 'shape': 1037,
 'hanging': 888,
 'antique': 886}

In [37]:
adj25_13

{'small': 5676,
 'large': 5598,
 'white': 5018,
 'natural': 2712,
 'red': 1602,
 'slate': 1280,
 'crystal': 745,
 'ivory': 702,
 'decorative': 614,
 'doormat': 599,
 'hottie': 449,
 'magic': 409,
 'single': 390,
 'classic': 381,
 'antique': 379,
 'filigree': 346,
 'green': 324,
 'triple': 313,
 'victorian': 294,
 'cupid': 293}

In [38]:
rules25_13_df

Unnamed: 0,0,1,2
39,"{small, wicker}",{heart},0.94491
38,"{large, wicker}",{heart},0.93058
37,"{slate, natural}",{chalkboard},0.930369
36,{slate},"{natural, chalkboard}",0.930369
35,{slate},{chalkboard},0.930369
34,{frame},{photo},0.89169
33,{natural},{slate},0.879056
32,{love},{heart},0.860417
31,{wicker},{heart},0.831359
30,{red},{heart},0.82574


In [39]:
clust30 = get_clust_description(clusters30)
clust30

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import sys


Unnamed: 0,cluster,frequency,percent,percent_sum_top_n
0,3,36516,6.738401,54.258741
1,2,36044,6.651301,54.258741
2,1,35259,6.506443,54.258741
3,12,30822,5.687671,54.258741
4,7,30338,5.598357,54.258741
5,9,27509,5.076314,54.258741
6,0,25698,4.742125,54.258741
7,4,25680,4.738803,54.258741
8,20,23476,4.332093,54.258741
9,10,22691,4.187234,54.258741


In [52]:
noun30_3, adj30_3, clust30_3, rules30_3_df = get_cluster_properties(clusters30,3,0.025,0.025)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.


In [53]:
noun30_3

{'ribbon': 4693,
 'reel': 2346,
 'postage': 1961,
 'rack': 1548,
 'notebook': 1504,
 'nan': 1454,
 'pink': 1286,
 'coat': 1283,
 'charm': 1272,
 'block': 1190,
 'magnet': 1186,
 'letter': 1172,
 'ring': 1164,
 'fob': 1069,
 'babushka': 1010,
 'parasol': 1009,
 'tray': 910,
 'heart': 905,
 'purse': 855,
 'gumball': 842}

In [54]:
adj30_3

{'key': 2116,
 'black': 1797,
 'edwardian': 1216,
 'rustic': 995,
 'purple': 721,
 'white': 677,
 'yellow': 628,
 'traditional': 622,
 'blue': 620,
 'small': 607,
 'manual': 572,
 'decorative': 569,
 'natural': 558,
 'wooden': 534,
 'red': 454,
 'large': 410,
 'acrylic': 344,
 'grand': 303,
 'brown': 300,
 'snowy': 283}

In [55]:
rules30_3_df

Unnamed: 0,0,1,2
16,{reel},{ribbon},0.999254
15,{rustic},"{ribbon, charm}",0.990955
14,{rustic},{ribbon},0.990955
13,{rustic},{charm},0.990955
12,{fob},{key},0.934261
11,{parasol},{edwardian},0.930624
10,{rack},{coat},0.828811
9,{charm},"{rustic, ribbon}",0.775157
8,{charm},{ribbon},0.775157
7,{charm},{rustic},0.775157


In [56]:
noun30_2, adj30_2, clust30_2, rules30_2_df = get_cluster_properties(clusters30,2,0.03,0.03)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.


In [57]:
noun30_2

{'set': 8993,
 'retrospot': 3142,
 'vintage': 3070,
 'cutter': 2575,
 'paper': 2363,
 'cookie': 2335,
 'heart': 2284,
 'tea': 2146,
 'pantry': 1888,
 'piece': 1799,
 'spaceboy': 1794,
 'pencil': 1539,
 'stationery': 1524,
 'case': 1385,
 'design': 1267,
 'tlights': 1217,
 'london': 1198,
 'christmas': 1198,
 'colour': 1187,
 'jam': 1182}

In [58]:
adj30_2

{'wooden': 1530,
 'ceramic': 1080,
 'floral': 821,
 'skittle': 608,
 'white': 597,
 'english': 588,
 'napkin': 534,
 'girl': 523,
 'pink': 515,
 'black': 487,
 'retrospot': 462,
 'soldier': 455,
 'modern': 400,
 'light': 393,
 'tea': 389,
 'colour': 376,
 'retro': 367,
 'teatime': 339,
 'lucky': 260,
 'traditional': 244}

In [59]:
rules30_2_df

Unnamed: 0,0,1,2
108,"{set, printed}","{making, jam}",0.996627
107,{printed},"{making, set, jam}",0.996627
106,"{set, printed}",{making},0.996627
105,{printed},"{making, set}",0.996627
104,{printed},"{making, jam}",0.996627
103,"{set, printed}",{jam},0.996627
102,{printed},"{set, jam}",0.996627
101,{printed},{making},0.996627
100,{printed},{jam},0.996627
99,{vintage},{set},0.992182


In [60]:
noun30_1, adj30_1, clust30_1, rules30_1_df = get_cluster_properties(clusters30,1,0.03,0.03)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.


In [61]:
noun30_1

{'bag': 32274,
 'vintage': 8281,
 'paisley': 5829,
 'lunch': 5003,
 'polkadot': 3165,
 'woodland': 2982,
 'charlotte': 2862,
 'pink': 2749,
 'strawberry': 2271,
 'retrospot': 2231,
 'storage': 1934,
 'blue': 1872,
 'shopper': 1461,
 'alphabet': 1388,
 'shoulder': 1260,
 'christmas': 1235,
 'suki': 1214,
 'car': 1197,
 'picnic': 1143,
 'airline': 1096}

In [62]:
adj30_1

{'jumbo': 19983,
 'scandinavian': 1320,
 'shopper': 1202,
 'white': 1086,
 'black': 960,
 'marble': 702,
 'red': 677,
 'suki': 532,
 'blue': 445,
 'skull': 413,
 'floral': 311,
 'cosmetic': 189,
 'overnight': 186,
 'sud': 119,
 'du': 119,
 'soft': 103,
 'brown': 97,
 'circular': 60,
 'metalic': 37,
 'paperweight': 33}

In [63]:
rules30_1_df

Unnamed: 0,0,1,2
379,"{scandinavian, paisley}",{bag},0.969697
378,{scandinavian},"{bag, paisley}",0.969697
377,{scandinavian},{bag},0.969697
372,"{bag, retrospot}",{red},0.967727
368,{retrospot},{jumbo},0.967727
369,{retrospot},{red},0.967727
370,{retrospot},"{jumbo, red}",0.967727
371,{retrospot},"{bag, red}",0.967727
373,{retrospot},"{bag, jumbo}",0.967727
374,"{bag, retrospot}",{jumbo},0.967727
