<a href="https://colab.research.google.com/github/pastrop/kaggle/blob/master/In_Conjunction_modified.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# In Conjunction With
## Also can be called: In Conjunction With

This is a script to find the concepts that are not synonyms, but things that are uniquely associated with a specific concept.  For instance "disappointed" would not return any of the concepts that are normally associated (frustrated, upset, sad), but return things like "screen" or "camera".

In [None]:
from luminoso_api import V5LuminosoClient as LuminosoClient
import csv
import numpy as np
import pack64
import statistics

In [None]:
project_url = "https://daylight.luminoso.com/app/projects/p87t862f/prt38rfw/highlights" # kf-demo


In [None]:
def split_url(project_url):
    workspace_id = project_url.strip('/').split('/')[5]
    project_id = project_url.strip('/').split('/')[6]
    api_url = '/'.join(project_url.strip('/').split('/')[:3]).strip('/') + '/api/v5'
    proj_api = '{}/projects/{}'.format(api_url, project_id)
    
    return(workspace_id, project_id, api_url, proj_api)


url_parts = split_url(project_url)

workspace_id = url_parts[0]
project_id = url_parts[1]
api_url = url_parts[2]
proj_apiv5 = url_parts[3]

print("api_url="+api_url)
print("url="+proj_apiv5)
print("prjid="+project_id)
print("workspace_id="+workspace_id)

api_url=https://daylight.luminoso.com/api/v5
url=https://daylight.luminoso.com/api/v5/projects/prt38rfw
prjid=prt38rfw
workspace_id=p87t862f


In [None]:
client = LuminosoClient.connect(proj_apiv5)
client

<LuminosoClient for https://daylight.luminoso.com/api/v5/projects/prt38rfw/>

In [None]:
def read_documents_by_concepts(client, concepts, max_docs=0, filter=None):
    docs = []
    while True:
        if filter:
            result = client.get(
                "/docs", search={"texts": concepts}, filter=filter, limit=5000, offset=len(docs)
            )
        else:
            result = client.get(
                "/docs", search={"texts": concepts}, limit=5000, offset=len(docs)
            )

        if result["result"]:
            docs.extend(result["result"])
        else:
            break
        if 0 < max_docs <= len(docs):
            break
    return docs


def read_documents(client, max_docs=0, filter=None):
    docs = []
    while True:
        if filter:
            result = client.get(
                "/docs", filter=filter, limit=5000, offset=len(docs)
            )
        else:
            result = client.get(
                "/docs", limit=5000, offset=len(docs)
            )

        if result["result"]:
            docs.extend(result["result"])
        else:
            break
        if 0 < max_docs <= len(docs):
            break
    return docs

def write_documents(client,docs):
    offset = 0
    while offset<len(docs):
        end = min(len(docs),offset+1000)
        print("offs={},  end={},  len={}".format(offset,end,len(docs)))
        result = client.post('upload', docs=docs[offset:end])
        offset = end


In [None]:
concept = "disappointed"

In [None]:
# get the details on this specific concept so we can get the term value
cs = {"type": "specified", "concepts": [{"texts": [concept]}]}

concept_details = client.get("/concepts", concept_selector=cs)["result"][0]
concept_details

{'relevance': 377.0,
 'texts': ['disappointed'],
 'exact_term_ids': ['disappoint|en'],
 'excluded_term_ids': [],
 'vectors': ['XRyoFP4_NX41--TDAolAc-By9GwhBCw_eMCDP_tfAv2BW9ASwBtq8uX-RzBFF-afBsQ-Kc-0r-oN_ZWAaMA2L_Gs-NDAdI8-bA_lAB3_6LAJY-dg_az_Lb-8a_VGAZD_U___1A2R_e8-tFBSjAvpCyi-LE_KS_OyBSAAN4_jsAKC_Eq_erAL9_ET89v-1tAwFEAdBGR-ARD_k-NK9ffBlXA-s_xg_BnB6mDvvBzX-xe_KtCds_o4BS8AYABIJBl3_dYBsp9MjAI8--sAJNBzQ_wYAFdClk9j4BGCAGG9iJ-t7AnN_KXAeXCIk_94B5B-WX_k3-Uz_4p_GKAjO-v7Asl_pB9hz_9C-nO-YPAJzDWxCjTAyxARd-Su_JrAilBeeBm_ARw_m_BU4-or-9hBTR_jfAl9-kNAEzCz8BMV_ICDE7A8hBRd_jO_WY-b09ypCw0BV5CDFBUbDGO_0i_DJ-y0AS-A7CAOI-tb_Lw-g2CWY9_8-Dw9-A-JZ_Ht_qi-iJAQy9FV_bqBayCg3ARt_VvBf5AB9Amk_tTC6_Ac0_tV_3M-q_9lt-Wu9jp-zf-qN-bJAHh_27EHa_tw_M9CwC_N6AdIA4vAxM_hHAkTCrk-aN_cMCBDAnLAymCQDAqV-5EAl8_wGA96AJQCLWA6bBAjAT0_sfCVr_NYA7g_y7-mXBGo_pzAxZ_xiBrN-PL_bh_9W_FEDXp_TnANpCFdA6tA0fBEICrc-x4-XF_ou_uD-mg_Uw-v2AbBAfm_k1_Ya-7IAnoBAIAhL-5BBnSBGiAN-_NtAsOBDgAqP-Y5_dq_CH_lw_l1BpA_AK-nnBzPAtM9W0Anu--1Bx7-juAC9_fH_ni_enARqB-jBZPAb

In [None]:
# get the term value on the concept we are searching
concept_term = concept_details['exact_term_ids'][0]
concept_term

'disappoint|en'

In [None]:
# get the concepts related to the concept we are searching
# this is so we know which concepts are not interesting
cs = {"type": "related", "search_concept": {"texts": [concept]}, "limit": 50}
related_concepts = client.get("/concepts", concept_selector=cs)["result"]


In [None]:
related_concepts[10]

{'relevance': 22.0,
 'match_score': 0.6212236285209656,
 'texts': ['pleasantly surprised'],
 'exact_term_ids': ['pleasantly|en surprise|en'],
 'excluded_term_ids': [],
 'vectors': ['XXmeCU9I9C5596DEBHx_ukCJaFm__06A4_CaU8xG_78_o2AS4AI9_II_0qCCOCAtBhO_r5BE39VO_fZCVhCDZA7T_1--SB-Jn_wc-Af-uO9T1BkP-Vd_bDBIYAhG_ZB-thAzkAj9_mwA-rAY2BMVB1z9iUAMG9dPCIh-X2BVE98b-waBKj-xA-FO9smAlSBhJB3lAcJ-fjBskBoL-eCA44A-B_ayAlEAoZApq-hs_liAAiB5J9FK_Q7Br3AorA6rBvL-cJ-mmAh4AcpBZ3_-wAbfAQVCoz_HIAP7--u-bfBjt_j-APq955AdNAv5BSqAWa-lPAAJBMkAdWBDS_w7BJRAM__oTAIe-bR_5gANDAwCAgs_pq_uo_5PAPF_8lBFr_2g-1UAY3A4vAKdBIQ_sKBYMAJ2_N-_q0CRPBdu_fC_PZAO2ADA_-UAdx_rjAwWA4I_u-AYz_6GAS__NbAdr_zaAeZAyoAPN_nzBHZ_J-_vOATDAnV_rlAMV_rDBH-AdSAKF_qk_cLAirBaBAYC_Hi_kP-a1_WVAvjBO7_l8_thAYlAAZ_hQ_Xd_87_Vj-6c_Xe_Xw_eHAhk_uNAD2A_kAmw_J7AD8AC2A0AAL_Bk8AfyAPu_2gANiAKkAWbAWi_a4_OVAyyAYg_0X_nRA6m_-kBHiAAi_6NASTAFN_0_Am5_gGA98AnP_A2_tzA1e_26AtMAqxAEOAtWAUw_oM_4yAW9_mkAVBAiTACJ_5lAjWAZPAEE_NDARZAEt_voAmMAqhAaxAp1A7YBSSBDDAt2Avo_4VAqZAPcArI_Y1AmDACT_uC_

In [None]:
# convert the list to a list of term_ids
related_term_ids = [c['exact_term_ids'][0] for c in related_concepts]
related_term_ids

['disappoint|en',
 'dissapointed|en',
 'disappointment|en',
 'dismay|en',
 'upset|en',
 'letdown|en',
 'surprise|en',
 'frustrate|en',
 'mislead|en',
 'impress|en',
 'pleasantly|en surprise|en',
 'deceive|en',
 'sad|en',
 'excite|en',
 'discourage|en',
 'bummer|en',
 'unhappy|en',
 'embarrass|en',
 'astonish|en',
 'disillusion|en',
 'unimpressed|en',
 'disapointed|en',
 'offend|en',
 'fascinate|en',
 'tempt|en',
 'admit|en',
 'shock|en',
 'needless|en',
 'fortunately|en',
 'not|en impress|en',
 'not|en fault|en',
 'bother|en',
 'suprise|en',
 'unfortunately|en',
 'satisfy|en',
 'proud|en',
 'sadly|en',
 'doubt|en',
 'baffle|en',
 'not|en stand|en',
 'ruin|en',
 'pitiful|en',
 'sincerely|en',
 'await|en',
 'depress|en',
 'sorely|en',
 'dissatisfy|en',
 'irk|en',
 'hopeful|en',
 'not|en rate|en']

In [None]:
# get the documents for the concept
related_docs = read_documents_by_concepts(client, [concept])
len(related_docs)

1037

In [None]:

cs = {"type": "top", "limit": 100}
top_concepts_100 = client.get("/concepts", concept_selector=cs)["result"]
top_concepts_100 = [tc['exact_term_ids'][0] for tc in top_concepts_100]
top_concepts_100

['kindle|en',
 'amazon|en',
 'tablet|en',
 'kindle|en fire|en',
 'app|en',
 'kindle|en fire|en hd|en',
 'purchase|en',
 'ipad|en',
 'device|en',
 'download|en',
 'hd|en',
 'screen|en',
 'buy|en',
 'fire|en hd|en',
 'ad|en',
 'speaker|en',
 'disappoint|en',
 'charger|en',
 'amazon|en prime|en',
 'laptop|en',
 'recommend|en',
 'reader|en',
 'kindle|en fire|en hd|en 7|en',
 'wifi|en',
 'product|en',
 'android|en',
 'upgrade|en',
 'charge|en',
 'email|en',
 'skype|en',
 'movie|en',
 'battery|en life|en',
 'load|en',
 'watch|en movie|en',
 'display|en',
 'button|en',
 'gift|en',
 'kindle|en hd|en',
 'book|en',
 'cloud|en',
 'netflix|en',
 'item|en',
 'connect|en',
 'love|en kindle|en',
 'feature|en',
 'ipad|en mini|en',
 'annoy|en',
 'complaint|en',
 'plug|en',
 'keyboard|en',
 'battery|en',
 'update|en',
 'cheap|en',
 'camera|en',
 'read|en',
 'ship|en',
 'highly|en recommend|en',
 'love|en kindle|en fire|en hd|en',
 'nexus|en 7|en',
 'new|en kindle|en',
 'fast|en',
 'stream|en',
 'sound|e

In [None]:
related_docs[0]

{'text': 'I was so excited that my husband bought this for me for Christmas, I couldn\'t wait to get it! I researched and asked everyone if I should get this or the Nook and hands down it was the Kindle Fire Hd. I am excited for all that it has to offer including the music and movies for when I\'m up north, there are endless possiblities!! I think unfortunately, I got a "glitchy" one. I have called Amazon 2x regarding problems so far and if I have to call again, I will return it! I am so upset. I absolutely love the fact that I have my books wherever I go, that\'s my favorite thing. I am a major reader, but then to be able to go on Facebook or the web is just so awesome! The books for readers out there are just amazing and Amazon has an awesome library to choose from! How can you get better than that? I am so upset though because I do not think that the "glitches" I have are due to user error. I was at the library downloading my books, it kept stopping midway and then when it was done,

In [None]:
# given a list of locations, find the closest
def find_closest_proximity(term_loc, related_locations):
    if len(related_locations)<=0:
        return None
    proximity = related_locations[0] - term_loc
    for cur_loc in related_locations:
        prox_temp = cur_loc - term_loc
        if abs(prox_temp)<abs(proximity):
            proximity = prox_temp
    return proximity

# in_conjunction_with_raw
# Description: Find the concepts not directly associated with the concept_term_id
#              but in the same documents as concept_term_id and it's conceptual matches.
#              _raw takes raw concept lists
#
# Parameters:
#   docs: The documents to you want to measure
#   concept_term_id: The concept you are looking for conjunctions: disappoint|en
#   related_term_ids: The list of concept terms associated with concept_term_id 
#        ['disappoint|en',
#         'dissapointed|en',
#         'disappointment|en',
#         'etc']
#   top_concept_filter: The list of top concepts to filter by.
#       ['kindle|en',
#        'amazon|en',
#        'tablet|en',]
def in_conjunction_with_raw(docs, concept_term_id, related_terms_ids, top_concept_filter=None):
    in_conjunctions = {}

    for d in docs:
        exact_match_locations = []
        conceptual_match_locations = []
        doc_in_conjunctions = {}
        for t in d['terms']:
            # only add terms if they are in the top_concept_filter
            # print("checking: t {} in tcf {}".format(t['term_id'],t['term_id'] in top_concept_filter))
            if (not top_concept_filter) or (t['term_id'] in top_concept_filter):
                if concept_term_id == t['term_id']:
                    # use to calculate proximity of exact match
                    exact_match_locations.append(t['start']) 
                elif t['term_id'] in related_term_ids:
                    # use to calculate proximity of conceptual match
                    conceptual_match_locations.append(t['start']) 
                else:
                    #print("addining term: {} at {}".format(t['term_id'], t['start']))
                    # found a non-exact/conceptual concept
                    if t['term_id'] in doc_in_conjunctions:
                        # add the location to the list
                        doc_in_conjunctions[t['term_id']]['locations'].append(t['start'])
                    else:
                        doc_in_conjunctions[t['term_id']] = {
                            'term_id': t['term_id'],
                            'locations': [t['start']]  # use this to calculate the proximity to exact/conceptual match
                        }
        # calculate the proximity to exact and conceptual matches
        # keep those values as a list so average can be calculated next
        for k2,d2 in doc_in_conjunctions.items():
            total_exact_loc = 0
            total_conceptual_loc = 0
            for loc in d2['locations']:
                loc2 = find_closest_proximity(loc, exact_match_locations)
                if loc2:
                    total_exact_loc += loc2
                loc2 = find_closest_proximity(loc, conceptual_match_locations)
                if loc2:
                    total_conceptual_loc += loc2
            if (len(d2['locations']))>0:
                d2['in_conjunction_with_exact_average'] = total_exact_loc/len(d2['locations'])
                d2['in_conjunction_with_conceptual_average'] = total_conceptual_loc/len(d2['locations'])

            if k2 in in_conjunctions:
                in_conjunctions[k2]['in_conjunction_with_exact_avg_list'].append(d2['in_conjunction_with_exact_average'])
                in_conjunctions[k2]['in_conjunction_with_conceptual_avg_list'].append(d2['in_conjunction_with_conceptual_average'])
                in_conjunctions[k2]['in_conjunction_with_count'] += len(d2['locations'])
            else:
                in_conjunctions[k2] = {
                    'concept': k2,
                    'in_conjunction_with_exact_avg_list' :[d2['in_conjunction_with_exact_average']],
                    'in_conjunction_with_conceptual_avg_list' :[d2['in_conjunction_with_conceptual_average']],
                    'in_conjunction_with_count': len(d2['locations'])
                }

    # calculate the averages now that we have a list of positions
    for k2,d2 in in_conjunctions.items():
        #print("calc in_conjunctions: {} count:{}".format(k2,in_conjunctions[k2]['in_conjunction_with_count']))

        if 'in_conjunction_with_exact_avg_list' in d2:
            d2['in_conjunction_with_exact_avg_pos'] = statistics.mean(d2['in_conjunction_with_exact_avg_list'])
            # leave this value if you want the details on exact location
            d2.pop('in_conjunction_with_exact_avg_list')
        if 'in_conjunction_with_conceptual_avg_list' in d2:
            d2['in_conjunction_with_conceptual_avg_pos'] = statistics.mean(d2['in_conjunction_with_conceptual_avg_list'])
            # leave this value if you want the details on exact location
            d2.pop('in_conjunction_with_conceptual_avg_list')
    # we've done all the docs
    return in_conjunctions


    


## Change the value "concept" to choose a different term then re-run from this next line

In [None]:
concept = "disappointed"

In [None]:
icw = in_conjunction_with_raw(related_docs[0:100], concept_term, related_term_ids, top_concepts_100)
icw

{'buy|en': {'concept': 'buy|en',
  'in_conjunction_with_count': 95,
  'in_conjunction_with_exact_avg_pos': 283.26100628930817,
  'in_conjunction_with_conceptual_avg_pos': -3.2169811320754715},
 'christmas|en': {'concept': 'christmas|en',
  'in_conjunction_with_count': 23,
  'in_conjunction_with_exact_avg_pos': 544.3214285714286,
  'in_conjunction_with_conceptual_avg_pos': -173.21428571428572},
 'nook|en': {'concept': 'nook|en',
  'in_conjunction_with_count': 23,
  'in_conjunction_with_exact_avg_pos': -160.02380952380952,
  'in_conjunction_with_conceptual_avg_pos': 0.0},
 'kindle|en fire|en hd|en': {'concept': 'kindle|en fire|en hd|en',
  'in_conjunction_with_count': 57,
  'in_conjunction_with_exact_avg_pos': 590.3171717171717,
  'in_conjunction_with_conceptual_avg_pos': 85.9060606060606},
 'movie|en': {'concept': 'movie|en',
  'in_conjunction_with_count': 33,
  'in_conjunction_with_exact_avg_pos': 114.41176470588235,
  'in_conjunction_with_conceptual_avg_pos': -113.83333333333333},
 'b

In [None]:
sorted_icw = sorted(icw.items(), key=lambda x:x[1]['in_conjunction_with_count'], reverse=True)
sorted_icw = [icw for k,icw in dict(sorted_icw).items()]

In [None]:
sorted_icw

[{'concept': 'kindle|en',
  'in_conjunction_with_count': 216,
  'in_conjunction_with_exact_avg_pos': 33.48098438300052,
  'in_conjunction_with_conceptual_avg_pos': -36.27222222222222},
 {'concept': 'app|en',
  'in_conjunction_with_count': 201,
  'in_conjunction_with_exact_avg_pos': -0.5864416833166862,
  'in_conjunction_with_conceptual_avg_pos': -113.66886446886447},
 {'concept': 'amazon|en',
  'in_conjunction_with_count': 192,
  'in_conjunction_with_exact_avg_pos': -111.5646505376344,
  'in_conjunction_with_conceptual_avg_pos': -154.2068548387097},
 {'concept': 'device|en',
  'in_conjunction_with_count': 154,
  'in_conjunction_with_exact_avg_pos': 63.500579975579974,
  'in_conjunction_with_conceptual_avg_pos': -56.78472222222223},
 {'concept': 'tablet|en',
  'in_conjunction_with_count': 112,
  'in_conjunction_with_exact_avg_pos': 57.94833333333334,
  'in_conjunction_with_conceptual_avg_pos': -55.23199404761905},
 {'concept': 'book|en',
  'in_conjunction_with_count': 99,
  'in_conjunct

In [None]:
# write csv sample
# output the labeled results

# when you need a calculated list of fields
# fields = list(set(val for dic in result_data for val in dic.keys())) 
output_csv_file = "icw2_{}.csv".format(concept)
fields = sorted_icw[0].keys()
with open(output_csv_file, 'w') as f:
        writer = csv.DictWriter(f, fields)
        writer.writeheader()
        writer.writerows(sorted_icw)

# get in_conjunction data from the shared concept lists

In [None]:
# in_conjunction_with_raw
# Description: Find the concepts not directly associated with the concept_term_id
#              but in the same documents as concept_term_id and it's conceptual matches.
#              _raw takes raw concept lists
#
# Parameters:
#   docs: The documents to you want to measure
#   concept_term_id: The concept you are looking for conjunctions: disappoint|en
#   related_term_ids: The list of concept terms associated with concept_term_id 
#        ['disappoint|en',
#         'dissapointed|en',
#         'disappointment|en',
#         'etc']
def in_conjunction_with(concept, doc_filter = {}, top_concept_filter_count=-1):
    # get the details on this specific concept so we can get the term value
    cs = {"type": "specified", "concepts": [{"texts": [concept]}]}

    concept_details = client.get("/concepts", concept_selector=cs)["result"][0]
    # get the term value on the concept we are searching
    concept_term = concept_details['exact_term_ids'][0]

    # get the concepts related to the concept we are searching
    # this is so we know which concepts are not interesting
    cs = {"type": "related", "search_concept": {"texts": [concept]}, "limit": 50}
    related_concepts = client.get("/concepts", concept_selector=cs)["result"]

    # convert the list to a list of term_ids
    related_term_ids = [c['exact_term_ids'][0] for c in related_concepts]
    
    # get the documents for the concept
    related_docs = read_documents_by_concepts(client, [concept], filter=doc_filter)

    if top_concept_filter_count>0:
        cs = {"type": "top", "limit": top_concept_filter_count}
        top_concepts = client.get("/concepts", concept_selector=cs)["result"]
        top_concepts = [tc['exact_term_ids'][0] for tc in top_concepts]
    else:
        top_concepts = None
        
    icw = in_conjunction_with_raw(related_docs, concept_term, related_term_ids, top_concepts_100)
    sorted_icw = sorted(icw.items(), key=lambda x:x[1]['in_conjunction_with_count'], reverse=True)
    sorted_icw = [icw for k,icw in dict(sorted_icw).items()]
    
    return sorted_icw

In [None]:
concept_lists = client.get('concept_lists/')
concept_lists

[{'name': 'cluster7x4',
  'concept_list_id': '70974115-be92-4249-9870-47591a158079',
  'concepts': [{'shared_concept_id': 'edc90f25-9008-4244-82d8-fd3b72e1f461',
    'name': 'download',
    'texts': ['download'],
    'color': '#808080'},
   {'shared_concept_id': 'de9d7951-375c-4a4e-ac4e-1559468b7db2',
    'name': 'Android',
    'texts': ['Android'],
    'color': '#808080'},
   {'shared_concept_id': 'b49ceebc-8f7d-42e7-b49a-52f5e7400b92',
    'name': 'email',
    'texts': ['email'],
    'color': '#808080'},
   {'shared_concept_id': '3ebf081d-644c-49de-86b1-9a2da108e301',
    'name': 'Skype',
    'texts': ['Skype'],
    'color': '#808080'},
   {'shared_concept_id': '37461649-79fc-49b2-af8a-e96983fb59b4',
    'name': 'iPad',
    'texts': ['iPad'],
    'color': '#808080'},
   {'shared_concept_id': '4efa1e3d-53fe-497a-abf9-20f683065f06',
    'name': 'device',
    'texts': ['device'],
    'color': '#808080'},
   {'shared_concept_id': '2ebd8eee-8918-4ee2-987a-4fe2285e2f1a',
    'name': 'charg

In [None]:
# run a single concept as a test
in_conjunction_with(concept_lists[0]['concepts'][0]['name'], top_concept_filter_count=100)

[{'concept': 'kindle|en',
  'in_conjunction_with_count': 1555,
  'in_conjunction_with_exact_avg_pos': 55.622244135992126,
  'in_conjunction_with_conceptual_avg_pos': -0.053832146437108055},
 {'concept': 'app|en',
  'in_conjunction_with_count': 1315,
  'in_conjunction_with_exact_avg_pos': -3.5226120514596926,
  'in_conjunction_with_conceptual_avg_pos': 1.0321242676312332},
 {'concept': 'amazon|en',
  'in_conjunction_with_count': 1279,
  'in_conjunction_with_exact_avg_pos': -33.567281589755275,
  'in_conjunction_with_conceptual_avg_pos': -40.517411185305924},
 {'concept': 'device|en',
  'in_conjunction_with_count': 1188,
  'in_conjunction_with_exact_avg_pos': 51.2112221303337,
  'in_conjunction_with_conceptual_avg_pos': 32.84839483289896},
 {'concept': 'book|en',
  'in_conjunction_with_count': 955,
  'in_conjunction_with_exact_avg_pos': 11.182036578974044,
  'in_conjunction_with_conceptual_avg_pos': 22.43177235385973},
 {'concept': 'tablet|en',
  'in_conjunction_with_count': 798,
  'in_c

In [None]:
for cl in concept_lists:
    for c in cl['concepts']:
        c['in_conjunction_with'] = in_conjunction_with(c['name'], top_concept_filter_count=100)
        
        

In [None]:
concept_lists[0]

{'name': 'cluster7x4',
 'concept_list_id': '70974115-be92-4249-9870-47591a158079',
 'concepts': [{'shared_concept_id': 'edc90f25-9008-4244-82d8-fd3b72e1f461',
   'name': 'download',
   'texts': ['download'],
   'color': '#808080',
   'in_conjunction_with': [{'concept': 'kindle|en',
     'in_conjunction_with_count': 1555,
     'in_conjunction_with_exact_avg_pos': 55.622244135992126,
     'in_conjunction_with_conceptual_avg_pos': -0.053832146437108055},
    {'concept': 'app|en',
     'in_conjunction_with_count': 1315,
     'in_conjunction_with_exact_avg_pos': -3.5226120514596926,
     'in_conjunction_with_conceptual_avg_pos': 1.0321242676312332},
    {'concept': 'amazon|en',
     'in_conjunction_with_count': 1279,
     'in_conjunction_with_exact_avg_pos': -33.567281589755275,
     'in_conjunction_with_conceptual_avg_pos': -40.517411185305924},
    {'concept': 'device|en',
     'in_conjunction_with_count': 1188,
     'in_conjunction_with_exact_avg_pos': 51.2112221303337,
     'in_conjunct

In [None]:
# flatten out the concepts
export_data = []
for cl in concept_lists:
    for c in cl['concepts']:
        for icw in c['in_conjunction_with']:
            export_data.append({
                'saved_concept_list': cl['name'],
                'saved_concept': c['name'],
                'icw_concept': icw['concept'],
                'icw_count': icw['in_conjunction_with_count'],
                'icw_exact_avg_pos': icw['in_conjunction_with_exact_avg_pos'],
                'icw_conceptual_avg_pos': icw['in_conjunction_with_conceptual_avg_pos']
                })

In [None]:
export_data[0]

{'saved_concept_list': 'cluster7x4',
 'saved_concept': 'download',
 'icw_concept': 'kindle|en',
 'icw_count': 1555,
 'icw_exact_avg_pos': 55.622244135992126,
 'icw_conceptual_avg_pos': -0.053832146437108055}

In [None]:
# write csv sample
# output the labeled results

# when you need a calculated list of fields
# fields = list(set(val for dic in result_data for val in dic.keys())) 
output_csv_file = "output.csv"
fields = export_data[0].keys()
with open(output_csv_file, 'w') as f:
        writer = csv.DictWriter(f, fields)
        writer.writeheader()
        writer.writerows(export_data)