In [1]:
from textblob import TextBlob
from collections import Counter
import numpy as np

In [2]:
queries = [q.strip() for q in open("data/queries.txt", 'r').readlines()]

In [3]:
def get_num_words(query: str):
    blob = TextBlob(query)
    words = [str(w) for w in blob.words]
    return len(words)
    

In [4]:
lengths = Counter()
for q in queries:
    word_len = get_num_words(q)
    lengths[word_len] += 1


In [5]:
min([l for l, c in lengths.items()])

3

In [6]:
max([l for l, c in lengths.items()])

13

In [7]:
query_lengths = [v for q_len, c in lengths.items() for v in [q_len]*c]
print("Mean query length (words): {:.2f}".format(np.mean(query_lengths)))
print("Standard deviation (words): {:.2f}".format(np.std(query_lengths)))

Mean query length (words): 5.90
Standard deviation (words): 2.10


In [8]:
qe = {'span of timber structurally insulated panels (SIPs) roof': {'query': 'span of timber structurally insulated panels (SIPs) roof',   'expanded_query': 'span of timber structurally insulated panels (SIPs) roof, SIPs, structurally insulated panels, span, span L, drinks, structural bolting assemblies, mid - span, functional insulation',   'qe_insight': {'spans': ['SIPs', 'structurally insulated panels', 'span'],    'nearest_neighbours': {'weight': 1,     'candidates': [['drinks', 4.441206415069198],      ['structural bolting assemblies', 4.210373406604807],      ['functional insulation', 3.4933175575022553],      ['drips', 3.4182639907247405],      ['span L', 3.3993460298846356]]},    'kg_candidates': {'weight': 1,     'candidates': [['mid - span', 3.652309534232291],      ['span L', 3.3993460298846356]]},    'top_qe_terms': ['span L',     'drinks',     'structural bolting assemblies',     'mid - span',     'functional insulation']}},  'timber SIPs, fire protection requirements domestic': {'query': 'timber SIPs, fire protection requirements domestic',   'expanded_query': 'timber SIPs, fire protection requirements domestic, timber SIPs, fire protection requirements, timber leaf, health and safety requirements, Fire Precautions, fire precautions, fire safety systems',   'qe_insight': {'spans': ['timber SIPs', 'fire protection requirements'],    'nearest_neighbours': {'weight': 1,     'candidates': [['timber leaf', 3.600068253196425],      ['Fire Precautions', 3.277645023178056],      ['fire precautions', 3.277645023178056],      ['timber', 2.4163977257950884]]},    'kg_candidates': {'weight': 1,     'candidates': [['health and safety requirements', 3.3304884437656237],      ['fire safety systems', 3.129548300071241]]},    'top_qe_terms': ['timber leaf',     'health and safety requirements',     'Fire Precautions',     'fire precautions',     'fire safety systems']}},  'snow loading requirements of flat roofs': {'query': 'snow loading requirements of flat roofs',   'expanded_query': 'snow loading requirements of flat roofs, flat roofs, snow loading requirements, drop apron, snow load shape coefficients, pitched roofs, snow load classification, flat roofing system',   'qe_insight': {'spans': ['flat roofs', 'snow loading requirements'],    'nearest_neighbours': {'weight': 1,     'candidates': [['snow load shape coefficients', 4.586299213616495],      ['pitched roofs', 4.078961994970159],      ['snow load classification', 3.8551773080090177]]},    'kg_candidates': {'weight': 1,     'candidates': [['drop apron', 4.772525399025119],      ['flat roofing system', 3.538997999624364]]},    'top_qe_terms': ['drop apron',     'snow load shape coefficients',     'pitched roofs',     'snow load classification',     'flat roofing system']}},  'party wall, sound insulation requirements': {'query': 'party wall, sound insulation requirements',   'expanded_query': 'party wall, sound insulation requirements, party wall, sound insulation requirements, moeity, tenants, party wall U - value, insulation criteria, ventilation standards',   'qe_insight': {'spans': ['party wall', 'sound insulation requirements'],    'nearest_neighbours': {'weight': 1,     'candidates': [['party wall U - value', 4.032189982938549],      ['insulation criteria', 3.217020103173441],      ['ventilation standards', 2.9761904037963687]]},    'kg_candidates': {'weight': 1,     'candidates': [['moeity', 4.647401063312325],      ['tenants', 4.329049002862127]]},    'top_qe_terms': ['moeity',     'tenants',     'party wall U - value',     'insulation criteria',     'ventilation standards']}},  'air tightness, requirements of building junctions': {'query': 'air tightness, requirements of building junctions',   'expanded_query': 'air tightness, requirements of building junctions, air tightness, building junction, requirements, air - tightness, junction pile, air - tight tank, wall junction, requirements 3 1 15',   'qe_insight': {'spans': ['air tightness',     'building junction',     'requirements'],    'nearest_neighbours': {'weight': 1,     'candidates': [['junction pile', 4.138349479752003],      ['air - tightness', 3.7676198597359636],      ['wall junction', 3.553123215417246],      ['requirements 3 1 15', 2.818964759760348],      ['requirements 3 1 4 1', 2.510778377054112]]},    'kg_candidates': {'weight': 1,     'candidates': [['air - tight tank', 4.059852959961899],      ['air - tightness', 3.7676198597359636]]},    'top_qe_terms': ['air - tightness',     'junction pile',     'air - tight tank',     'wall junction',     'requirements 3 1 15']}},  'lifespan of bitumen felt for flat roofs': {'query': 'lifespan of bitumen felt for flat roofs',   'expanded_query': 'lifespan of bitumen felt for flat roofs, lifespan, flat roofs, bitumen felt, bitumen, drop apron, sheathing felt, longevity, pitched roofs',   'qe_insight': {'spans': ['lifespan', 'flat roofs', 'bitumen felt'],    'nearest_neighbours': {'weight': 1,     'candidates': [['sheathing felt', 4.38824790134999],      ['longevity', 4.276307667790496],      ['pitched roofs', 4.078961994970159],      ['bitumen', 4.035592554940117],      ['life cycle', 3.6874404819336837]]},    'kg_candidates': {'weight': 1,     'candidates': [['drop apron', 4.772525399025119],      ['bitumen', 4.035592554940117],      ['life span', 3.593869936765814],      ['flat roofing system', 3.538997999624364],      ['tuck-in', 3.4857604055043283],      ['specimen size', 3.074565598381229]]},    'top_qe_terms': ['bitumen',     'drop apron',     'sheathing felt',     'longevity',     'pitched roofs']}},  'external fire spread on roofs': {'query': 'external fire spread on roofs',   'expanded_query': 'external fire spread on roofs, external fire spread, roofs, external fire spread upwards, fire spreads, roof decks, roof sheets, fire - resisting',   'qe_insight': {'spans': ['external fire spread', 'roofs'],    'nearest_neighbours': {'weight': 1,     'candidates': [['fire spreads', 4.028196220192958],      ['roof decks', 3.7658550643815123],      ['roof sheets', 3.4150846158307253],      ['fire - resisting', 3.032437885312509]]},    'kg_candidates': {'weight': 1,     'candidates': [['external fire spread upwards', 4.359021818793473],      ['spread', 2.7768230884841194]]},    'top_qe_terms': ['external fire spread upwards',     'fire spreads',     'roof decks',     'roof sheets',     'fire - resisting']}},  'wheelchair, turning circle inside dwellings': {'query': 'wheelchair, turning circle inside dwellings',   'expanded_query': 'wheelchair, turning circle inside dwellings, wheelchair , turning circle inside, door swing, moving panel',   'qe_insight': {'spans': ['wheelchair , turning circle inside'],    'nearest_neighbours': {'weight': 1,     'candidates': [['door swing', 4.0547184287238185],      ['moving panel', 3.7964236218517495]]},    'top_qe_terms': ['door swing', 'moving panel']}},  'size of flanges for timber I-joists': {'query': 'size of flanges for timber I-joists',   'expanded_query': 'size of flanges for timber I-joists, size of flanges, timber I - joists, joists size, timber joist, timber joists, screw size',   'qe_insight': {'spans': ['size of flanges', 'timber I - joists'],    'nearest_neighbours': {'weight': 1,     'candidates': [['joists size', 4.317024442334],      ['timber joist', 4.27835739206318],      ['timber joists', 4.267410034178409],      ['screw size', 3.4979027966434515]]},    'top_qe_terms': ['joists size',     'timber joist',     'timber joists',     'screw size']}},  'fire alarm standards flats and houses': {'query': 'fire alarm standards flats and houses',   'expanded_query': 'fire alarm standards flats and houses, houses, fire alarm standards flats, school house, homes, smoke  fire alarm system, dwellings, Fire Standards Committee',   'qe_insight': {'spans': ['houses', 'fire alarm standards flats'],    'nearest_neighbours': {'weight': 1,     'candidates': [['homes', 3.759934960390303],      ['smoke / fire alarm system', 3.5214657167143883],      ['Fire Standards Committee', 3.076094142169697]]},    'kg_candidates': {'weight': 1,     'candidates': [['school house', 5.157897718785566],      ['dwellings', 3.196907635225628]]},    'top_qe_terms': ['school house',     'homes',     'smoke / fire alarm system',     'dwellings',     'Fire Standards Committee']}},  'surface spread and fire requirements for ceiling linings in common areas of flats': {'query': 'surface spread and fire requirements for ceiling linings in common areas of flats',   'expanded_query': 'surface spread and fire requirements for ceiling linings in common areas of flats, flats, fire requirements, ceiling linings, common areas, surface spread, fire instruction notice, edge distance limitations, surface roughness, common access areas, heel',   'qe_insight': {'spans': ['flats',     'fire requirements',     'ceiling linings',     'common areas',     'surface spread'],    'nearest_neighbours': {'weight': 1,     'candidates': [['fire instruction notice', 4.328288239847847],      ['surface roughness', 4.029017938788118],      ['floor slabs', 3.591671614139158],      ['vertical spread', 3.4265917216249724],      ['wall ceiling', 3.2948015406245226],      ['Fire Standards Committee', 3.076094142169697],      ['areas', 2.515818031796924]]},    'kg_candidates': {'weight': 1,     'candidates': [['edge distance limitations', 4.150292184355701],      ['common access areas', 3.872134768747297],      ['heel', 3.8245302892851876],      ['block of flats', 3.750165631847539],      ['Class 1 surface spread', 3.4753723210119585],      ['Fire performance requirements', 3.0643599323239825],      ['fire resistance requirements', 3.027233593924994],      ['lines', 3.004183899824735],      ['line', 2.6155512424106373],      ['Commons', 0.8333333333333334]]},    'top_qe_terms': ['fire instruction notice',     'edge distance limitations',     'surface roughness',     'common access areas',     'heel']}},  'thermoplastic materials, definition and fire standard': {'query': 'thermoplastic materials, definition and fire standard',   'expanded_query': 'thermoplastic materials, definition and fire standard, fire standard, thermoplastic materials, definition, strict definition, fire instruction notice, thermocouples, insulating materials, usage',   'qe_insight': {'spans': ['fire standard',     'thermoplastic materials',     'definition'],    'nearest_neighbours': {'weight': 1,     'candidates': [['fire instruction notice', 4.328288239847847],      ['thermocouples', 3.9422260287396003],      ['insulating materials', 3.862800448906737],      ['general term', 3.135069043429483],      ['Fire Standards Committee', 3.076094142169697],      ['S classification', 2.8784502841140784]]},    'kg_candidates': {'weight': 1,     'candidates': [['strict definition', 4.641430298994166],      ['usage', 3.4030718139787015]]},    'top_qe_terms': ['strict definition',     'fire instruction notice',     'thermocouples',     'insulating materials',     'usage']}},  'minimum fire resistance of floors in flats': {'query': 'minimum fire resistance of floors in flats',   'expanded_query': 'minimum fire resistance of floors in flats, floors, fire resistance, flats, minimum, minimum luminance, heel, block of flats, minimum conditioning temperature, floor slabs',   'qe_insight': {'spans': ['floors', 'fire resistance', 'flats', 'minimum'],    'nearest_neighbours': {'weight': 1,     'candidates': [['floor slabs', 3.591671614139158],      ['internal floors', 3.427459039746636],      ['fire resisting', 3.2090938830127653],      ['fire - resisting', 3.032437885312509],      ['Minimum', 2.303736673047687]]},    'kg_candidates': {'weight': 1,     'candidates': [['minimum luminance', 4.5914657659319245],      ['heel', 3.8245302892851876],      ['block of flats', 3.750165631847539],      ['minimum conditioning temperature', 3.675268317579048],      ['levels of fire resistance', 3.0257751070670484],      ['level of fire resistance', 2.833709245968402]]},    'top_qe_terms': ['minimum luminance',     'heel',     'block of flats',     'minimum conditioning temperature',     'floor slabs']}},  'fire resistance requirements for cavity barriers': {'query': 'fire resistance requirements for cavity barriers',   'expanded_query': 'fire resistance requirements for cavity barriers, cavity barriers, fire resistance requirements, allergy, heat recovery performance, edge cover requirements, thermal barrier, fire - resisting',   'qe_insight': {'spans': ['cavity barriers', 'fire resistance requirements'],    'nearest_neighbours': {'weight': 1,     'candidates': [['heat recovery performance', 3.9592814805103353],      ['fire - resisting', 3.032437885312509]]},    'kg_candidates': {'weight': 1,     'candidates': [['allergy', 4.432456498210003],      ['edge cover requirements', 3.5894258955226874],      ['thermal barrier', 3.4929446876856796],      ['barrier', 2.9736033571170615]]},    'top_qe_terms': ['allergy',     'heat recovery performance',     'edge cover requirements',     'thermal barrier',     'fire - resisting']}},  'safety standards for solar panels': {'query': 'safety standards for solar panels',   'expanded_query': 'safety standards for solar panels, safety standards, solar panels, solar control glazing, compliance obligations, machinery safety standards, health safety, safety rules',   'qe_insight': {'spans': ['safety standards', 'solar panels'],    'nearest_neighbours': {'weight': 1,     'candidates': [['solar control glazing', 4.269958549818392],      ['machinery safety standards', 3.627712120334812],      ['safety rules', 3.228633384764337],      ['PV system', 3.059322188397588]]},    'kg_candidates': {'weight': 1,     'candidates': [['compliance obligations', 3.801113361245452],      ['health safety', 3.404320812908068]]},    'top_qe_terms': ['solar control glazing',     'compliance obligations',     'machinery safety standards',     'health safety',     'safety rules']}},  'air pressure test for a house': {'query': 'air pressure test for a house',   'expanded_query': 'air pressure test for a house, air pressure test, air flow meter, ambient air test, air sample',   'qe_insight': {'spans': ['air pressure test'],    'nearest_neighbours': {'weight': 1,     'candidates': [['air flow meter', 3.884348867898551],      ['air sample', 3.2593435812818896]]},    'kg_candidates': {'weight': 1,     'candidates': [['air flow meter', 3.884348867898551],      ['ambient air test', 3.3866943984791447]]},    'top_qe_terms': ['air flow meter', 'ambient air test', 'air sample']}},  'allowed deflection of steel beams': {'query': 'allowed deflection of steel beams',   'expanded_query': 'allowed deflection of steel beams, allowed deflection, steel beams, faade steel beam, permitted deviation, permitted deviations, Beam',   'qe_insight': {'spans': ['allowed deflection', 'steel beams'],    'nearest_neighbours': {'weight': 1,     'candidates': [['permitted deviation', 3.59715879420862],      ['permitted deviations', 3.4805036916291847]]},    'kg_candidates': {'weight': 1,     'candidates': [['faade steel beam', 4.507641452359527],      ['Beam', 2.841399643853425]]},    'top_qe_terms': ['faade steel beam',     'permitted deviation',     'permitted deviations',     'Beam']}},  'air infiltration standards': {'query': 'air infiltration standards',   'expanded_query': 'air infiltration standards, air infiltration standards, air flow resistivity, air flow meter',   'qe_insight': {'spans': ['air infiltration standards'],    'nearest_neighbours': {'weight': 1,     'candidates': [['air flow resistivity', 4.293288972103675],      ['air flow meter', 3.884348867898551]]},    'top_qe_terms': ['air flow resistivity', 'air flow meter']}},  'u-value calculation for doors and windows': {'query': 'u-value calculation for doors and windows',   'expanded_query': 'u-value calculation for doors and windows, doors, windows, u - value calculation, entrance doors, windows and window components, U - value calculation, windows wall, window wall',   'qe_insight': {'spans': ['doors', 'windows', 'u - value calculation'],    'nearest_neighbours': {'weight': 1,     'candidates': [['entrance doors', 3.92209346564694],      ['U - value calculation', 3.501303508478092],      ['windows wall', 3.3598642849453233],      ['window wall', 3.3429503432869265],      ['U - value', 3.0505397939049272]]},    'kg_candidates': {'weight': 1,     'candidates': [['windows and window components', 3.6672540553558934],      ['Doors and windows', 3.248532692321942]]},    'top_qe_terms': ['entrance doors',     'windows and window components',     'U - value calculation',     'windows wall',     'window wall']}},  'maximum emissions of VOC in houses': {'query': 'maximum emissions of VOC in houses',   'expanded_query': 'maximum emissions of VOC in houses, VOC, maximum emissions, air polution index, VOCs, VOCx, emission rate, emission factor',   'qe_insight': {'spans': ['VOC', 'maximum emissions'],    'nearest_neighbours': {'weight': 1,     'candidates': [['VOCx', 4.084351502409356],      ['emission rate', 3.916484655663367],      ['emission factor', 3.7713563948932083]]},    'kg_candidates': {'weight': 1,     'candidates': [['air polution index', 5.492509221797763],      ['VOCs', 4.331228253519058]]},    'top_qe_terms': ['air polution index',     'VOCs',     'VOCx',     'emission rate',     'emission factor']}},  'rigid insulation, fire performance requirements': {'query': 'rigid insulation, fire performance requirements',   'expanded_query': 'rigid insulation, fire performance requirements, fire performance requirements, rigid insulation, expanded polystyrene polystyrene insulation, wood wool slab, heat recovery performance, insulation slabs, pipe insulation',   'qe_insight': {'spans': ['fire performance requirements',     'rigid insulation'],    'nearest_neighbours': {'weight': 1,     'candidates': [['heat recovery performance', 3.9592814805103353],      ['insulation slabs', 3.60518509757282],      ['pipe insulation', 3.287922882166381],      ['Fire Standards Committee', 3.076094142169697]]},    'kg_candidates': {'weight': 1,     'candidates': [['expanded polystyrene polystyrene insulation',       5.651096254973578],      ['wood wool slab', 4.531140896142328]]},    'top_qe_terms': ['expanded polystyrene polystyrene insulation',     'wood wool slab',     'heat recovery performance',     'insulation slabs',     'pipe insulation']}},  'thermal insulation, fire rating': {'query': 'thermal insulation, fire rating',   'expanded_query': 'thermal insulation, fire rating, thermal insulation, fire rating, fire instruction notice, efficiency rating, thermal contact sheets, thermal efficiency, rates',   'qe_insight': {'spans': ['thermal insulation', 'fire rating'],    'nearest_neighbours': {'weight': 1,     'candidates': [['fire instruction notice', 4.328288239847847],      ['efficiency rating', 4.087987904000819],      ['thermal contact sheets', 3.891209161178908],      ['thermal efficiency', 3.651032120873192]]},    'kg_candidates': {'weight': 1,     'candidates': [['rates', 3.1549990055021877],      ['u-value', 3.0505397939049272],      ['r-value', 2.892288506957347],      ['rated', 2.8658827863579326]]},    'top_qe_terms': ['fire instruction notice',     'efficiency rating',     'thermal contact sheets',     'thermal efficiency',     'rates']}},  'installation of timber studs': {'query': 'installation of timber studs',   'expanded_query': 'installation of timber studs, timber studs, installation, timber joists, ventilation air conditioning installation, trace heater installation, timber sheathing, internal installation',   'qe_insight': {'spans': ['timber studs', 'installation'],    'nearest_neighbours': {'weight': 1,     'candidates': [['timber joists', 4.267410034178409],      ['timber sheathing', 3.7630908209895524],      ['internal installation', 3.178750099579243]]},    'kg_candidates': {'weight': 1,     'candidates': [['ventilation air conditioning installation',       4.1602161301795375],      ['trace heater installation', 4.09481737971958]]},    'top_qe_terms': ['timber joists',     'ventilation air conditioning installation',     'trace heater installation',     'timber sheathing',     'internal installation']}},  'requirements for installing metal studs': {'query': 'requirements for installing metal studs',   'expanded_query': 'requirements for installing metal studs, metal stud, requirements, steel stud wall, metal plate, stud, requirements 3 1 15, metal',   'qe_insight': {'spans': ['metal stud', 'requirements'],    'nearest_neighbours': {'weight': 1,     'candidates': [['steel stud wall', 3.878383264749792],      ['metal plate', 3.371180589844527],      ['requirements 3 1 15', 2.818964759760348],      ['requirements 3 1 4 1', 2.510778377054112]]},    'kg_candidates': {'weight': 1,     'candidates': [['stud', 3.0784309184370193],      ['metal', 2.562610047235087]]},    'top_qe_terms': ['steel stud wall',     'metal plate',     'stud',     'requirements 3 1 15',     'metal']}},  'wind uplift, requirements for a plain tile roof covering': {'query': 'wind uplift, requirements for a plain tile roof covering',   'expanded_query': 'wind uplift, requirements for a plain tile roof covering, wind uplift, plain tile roof, requirements, plain tile, windward, wind uplift force, flat roofs, fall - off',   'qe_insight': {'spans': ['wind uplift', 'plain tile roof', 'requirements'],    'nearest_neighbours': {'weight': 1,     'candidates': [['plain tile', 4.41926686043474],      ['wind uplift force', 4.146040636087981],      ['flat roofs', 3.5834852206847936],      ['requirements 3 1 15', 2.818964759760348],      ['requirements 3 1 4 1', 2.510778377054112]]},    'kg_candidates': {'weight': 1,     'candidates': [['windward', 4.277060305567221],      ['fall - off', 3.5431931682145326]]},    'top_qe_terms': ['plain tile',     'windward',     'wind uplift force',     'flat roofs',     'fall - off']}},  'roof covering, fire performance': {'query': 'roof covering, fire performance',   'expanded_query': 'roof covering, fire performance, roof covering performance, roof covering system, roofing system',   'qe_insight': {'spans': ['roof covering performance'],    'nearest_neighbours': {'weight': 1,     'candidates': [['roof covering system', 3.526981141882125],      ['roofing system', 3.029149263862551]]},    'top_qe_terms': ['roof covering system', 'roofing system']}},  'bitumen felt, roof covering': {'query': 'bitumen felt, roof covering',   'expanded_query': 'bitumen felt, roof covering, roof covering, bitumen felt, bitumen, mastic asphalt, sheathing felt, tuck-in, roof sheeting',   'qe_insight': {'spans': ['roof covering', 'bitumen felt'],    'nearest_neighbours': {'weight': 1,     'candidates': [['sheathing felt', 4.38824790134999],      ['bitumen', 4.035592554940117],      ['roof sheeting', 3.35882340941514]]},    'kg_candidates': {'weight': 1,     'candidates': [['mastic asphalt', 4.855512577702439],      ['bitumen', 4.035592554940117],      ['tuck-in', 3.4857604055043283],      ['cover', 2.630781883803644]]},    'top_qe_terms': ['bitumen',     'mastic asphalt',     'sheathing felt',     'tuck-in',     'roof sheeting']}},  'waterproofing underlay for pitched roof': {'query': 'waterproofing underlay for pitched roof',   'expanded_query': 'waterproofing underlay for pitched roof, pitched roof, waterproofing underlay, valley gutter, dormer window, waterproof membrane, pitched roofs, waterproofing membrane',   'qe_insight': {'spans': ['pitched roof', 'waterproofing underlay'],    'nearest_neighbours': {'weight': 1,     'candidates': [['waterproof membrane', 4.087102864868054],      ['pitched roofs', 4.078961994970159],      ['waterproofing membrane', 4.065288012233247]]},    'kg_candidates': {'weight': 1,     'candidates': [['valley gutter', 5.123777834534969],      ['dormer window', 4.636601343241819]]},    'top_qe_terms': ['valley gutter',     'dormer window',     'waterproof membrane',     'pitched roofs',     'waterproofing membrane']}},  'timber joist, span for upper-floors': {'query': 'timber joist, span for upper-floors',   'expanded_query': 'timber joist, span for upper-floors, timber joist, upper - floors, span, timber joists, span L, girder joist, mid - span, upper floor',   'qe_insight': {'spans': ['timber joist', 'upper - floors', 'span'],    'nearest_neighbours': {'weight': 1,     'candidates': [['timber joists', 4.267410034178409],      ['span L', 3.3993460298846356],      ['upper floor', 3.371817049793062],      ['lower floor', 3.2545661960853858]]},    'kg_candidates': {'weight': 1,     'candidates': [['girder joist', 5.43756887284913],      ['timber joists', 4.267410034178409],      ['mid - span', 3.652309534232291],      ['span L', 3.3993460298846356]]},    'top_qe_terms': ['timber joists',     'span L',     'girder joist',     'mid - span',     'upper floor']}},  'pitched roof, drainage requirements': {'query': 'pitched roof, drainage requirements',   'expanded_query': 'pitched roof, drainage requirements, pitched roof, drainage requirements, valley gutter, drainage basin, dormer window, pitched roofs, drainage system',   'qe_insight': {'spans': ['pitched roof', 'drainage requirements'],    'nearest_neighbours': {'weight': 1,     'candidates': [['drainage basin', 4.676551113026643],      ['pitched roofs', 4.078961994970159],      ['drainage system', 3.2809059735761545]]},    'kg_candidates': {'weight': 1,     'candidates': [['valley gutter', 5.123777834534969],      ['dormer window', 4.636601343241819]]},    'top_qe_terms': ['valley gutter',     'drainage basin',     'dormer window',     'pitched roofs',     'drainage system']}},  'fire performance of bituminous materials for roofs': {'query': 'fire performance of bituminous materials for roofs',   'expanded_query': 'fire performance of bituminous materials for roofs, fire performance, bituminous materials, roofs, bituminous putty, bituminous binder, granular material, porous material, roof decks',   'qe_insight': {'spans': ['fire performance',     'bituminous materials',     'roofs'],    'nearest_neighbours': {'weight': 1,     'candidates': [['granular material', 4.753571985681283],      ['porous material', 4.276571514652382],      ['roof decks', 3.7658550643815123],      ['fire sensitivity', 3.4166232667779033],      ['roof sheets', 3.4150846158307253],      ['fire behaviour', 3.104438886209442]]},    'kg_candidates': {'weight': 1,     'candidates': [['bituminous putty', 5.863937364030709],      ['bituminous binder', 5.471724729032184],      ['performances', 3.496801967050459],      ['Fire performance requirements', 3.0643599323239825]]},    'top_qe_terms': ['bituminous putty',     'bituminous binder',     'granular material',     'porous material',     'roof decks']}},  'PIR insulation, fire rating': {'query': 'PIR insulation, fire rating',   'expanded_query': 'PIR insulation, fire rating, PIR insulation, fire rating, fire instruction notice, efficiency rating, pipe - size insulation, pipe insulation, rates',   'qe_insight': {'spans': ['PIR insulation', 'fire rating'],    'nearest_neighbours': {'weight': 1,     'candidates': [['fire instruction notice', 4.328288239847847],      ['efficiency rating', 4.087987904000819],      ['pipe - size insulation', 3.495795106890036],      ['pipe insulation', 3.287922882166381]]},    'kg_candidates': {'weight': 1,     'candidates': [['rates', 3.1549990055021877],      ['rated', 2.8658827863579326]]},    'top_qe_terms': ['fire instruction notice',     'efficiency rating',     'pipe - size insulation',     'pipe insulation',     'rates']}},  'fire performance for rigid cellular plastic as a thermal insulation material': {'query': 'fire performance for rigid cellular plastic as a thermal insulation material',   'expanded_query': 'fire performance for rigid cellular plastic as a thermal insulation material, rigid cellular plastic, thermal insulation, fire performance, cellular plastics, flexible membrane, thermal contact sheets, thermal efficiency, performances',   'qe_insight': {'spans': ['rigid cellular plastic',     'thermal insulation',     'fire performance'],    'nearest_neighbours': {'weight': 1,     'candidates': [['cellular plastics', 4.331630500688686],      ['flexible membrane', 3.9363660682871595],      ['thermal contact sheets', 3.891209161178908],      ['thermal efficiency', 3.651032120873192],      ['fire sensitivity', 3.4166232667779033],      ['fire behaviour', 3.104438886209442]]},    'kg_candidates': {'weight': 1,     'candidates': [['performances', 3.496801967050459],      ['Fire performance requirements', 3.0643599323239825],      ['u-value', 3.0505397939049272],      ['r-value', 2.892288506957347]]},    'top_qe_terms': ['cellular plastics',     'flexible membrane',     'thermal contact sheets',     'thermal efficiency',     'performances']}},  'spray-applied insulation for roofs': {'query': 'spray-applied insulation for roofs',   'expanded_query': 'spray-applied insulation for roofs, spray - applied insulation, roofs, galvanized coating, roof decks, roof sheets, pipe insulation',   'qe_insight': {'spans': ['spray - applied insulation', 'roofs'],    'nearest_neighbours': {'weight': 1,     'candidates': [['galvanized coating', 5.029249608352135],      ['roof decks', 3.7658550643815123],      ['roof sheets', 3.4150846158307253],      ['pipe insulation', 3.287922882166381]]},    'top_qe_terms': ['galvanized coating',     'roof decks',     'roof sheets',     'pipe insulation']}},  'concrete floor slabs, fire performance': {'query': 'concrete floor slabs, fire performance',   'expanded_query': 'concrete floor slabs, fire performance, fire performance, concrete floor slabs, concrete slabs, performances, fire sensitivity, concrete floor, fire behaviour',   'qe_insight': {'spans': ['fire performance', 'concrete floor slabs'],    'nearest_neighbours': {'weight': 1,     'candidates': [['concrete slabs', 3.673022614728005],      ['fire sensitivity', 3.4166232667779033],      ['concrete floor', 3.1341395256144065],      ['fire behaviour', 3.104438886209442]]},    'kg_candidates': {'weight': 1,     'candidates': [['performances', 3.496801967050459],      ['Fire performance requirements', 3.0643599323239825]]},    'top_qe_terms': ['concrete slabs',     'performances',     'fire sensitivity',     'concrete floor',     'fire behaviour']}},  'rigid insulation for cavity walls': {'query': 'rigid insulation for cavity walls',   'expanded_query': 'rigid insulation for cavity walls, rigid insulation for cavity walls, insulation slabs, cavity insulation',   'qe_insight': {'spans': ['rigid insulation for cavity walls'],    'nearest_neighbours': {'weight': 1,     'candidates': [['insulation slabs', 3.60518509757282],      ['cavity insulation', 3.4811881128009254]]},    'top_qe_terms': ['insulation slabs', 'cavity insulation']}},  'sealant requirements of fire doors': {'query': 'sealant requirements of fire doors',   'expanded_query': 'sealant requirements of fire doors, sealant requirements, fire doors, doors, sealants, fire door assembly, fire - resisting door, sealing system',   'qe_insight': {'spans': ['sealant requirements', 'fire doors'],    'nearest_neighbours': {'weight': 1,     'candidates': [['sealants', 4.141716834252918],      ['fire - resisting door', 3.467879271894565],      ['sealing system', 3.362568784805412],      ['doors', 2.626029832993057]]},    'kg_candidates': {'weight': 1,     'candidates': [['fire door assembly', 3.5904149543388413],      ['doors', 2.626029832993057]]},    'top_qe_terms': ['doors',     'sealants',     'fire door assembly',     'fire - resisting door',     'sealing system']}},  'ventilation requirements for cold roofs': {'query': 'ventilation requirements for cold roofs',   'expanded_query': 'ventilation requirements for cold roofs, ventilation requirements, cold roofs, ventilator, ventilators, ventilation provision, cold wall, ventilation standards',   'qe_insight': {'spans': ['ventilation requirements', 'cold roofs'],    'nearest_neighbours': {'weight': 1,     'candidates': [['ventilation provision', 3.654144471683172],      ['cold wall', 3.3859219134444913],      ['ventilation standards', 2.9761904037963687]]},    'kg_candidates': {'weight': 1,     'candidates': [['ventilator', 4.619641614122921],      ['ventilators', 4.5511547154357945]]},    'top_qe_terms': ['ventilator',     'ventilators',     'ventilation provision',     'cold wall',     'ventilation standards']}},  'mineral fibre mats, installation requirements for roofs': {'query': 'mineral fibre mats, installation requirements for roofs',   'expanded_query': 'mineral fibre mats, installation requirements for roofs, mineral fibre mats, installation requirements, roofs, mineral wool packing, roof decks, installers, installation instructions, Installation instructions',   'qe_insight': {'spans': ['mineral fibre mats',     'installation requirements',     'roofs'],    'nearest_neighbours': {'weight': 1,     'candidates': [['mineral wool packing', 5.169624036394211],      ['roof decks', 3.7658550643815123],      ['installation instructions', 3.436721130097337],      ['Installation instructions', 3.436721130097337],      ['roof sheets', 3.4150846158307253]]},    'kg_candidates': {'weight': 1,     'candidates': [['installers', 3.724581283510842],      ['installed', 2.5173511730829774]]},    'top_qe_terms': ['mineral wool packing',     'roof decks',     'installers',     'installation instructions',     'Installation instructions']}},  'battens and counter battens, size and space for supporting the roof covering': {'query': 'battens and counter battens, size and space for supporting the roof covering',   'expanded_query': 'battens and counter battens, size and space for supporting the roof covering, roof, counter battens, space, battens, size, wicks, counter batten, counter battening, cobble, counter pane',   'qe_insight': {'spans': ['roof',     'counter battens',     'space',     'battens',     'size'],    'nearest_neighbours': {'weight': 1,     'candidates': [['wicks', 5.327461863388171],      ['counter battening', 4.56728249117111],      ['counter pane', 4.26302221707084],      ['machinery space', 3.8006921832465745],      ['size depth', 3.4353674134699315],      ['roof sheet', 3.2094758245041217],      ['roof wall', 2.988941590588941]]},    'kg_candidates': {'weight': 1,     'candidates': [['counter batten', 4.689496239593489],      ['cobble', 4.367173251662872],      ['topcoat', 4.139207332249283],      ['magnitude', 3.2922704507644625]]},    'top_qe_terms': ['wicks',     'counter batten',     'counter battening',     'cobble',     'counter pane']}},  'damp proof course (DPC) installation requirements': {'query': 'damp proof course (DPC) installation requirements',   'expanded_query': 'damp proof course (DPC) installation requirements, DPC, installation requirements, damp proof course, damp - proof courses, DP, PSLC, damp - proof course, installers',   'qe_insight': {'spans': ['DPC',     'installation requirements',     'damp proof course'],    'nearest_neighbours': {'weight': 1,     'candidates': [['damp - proof courses', 4.450675009708523],      ['damp - proof course', 4.167349689712702],      ['installation instructions', 3.436721130097337],      ['Installation instructions', 3.436721130097337],      ['DP', 2.968094792169908]]},    'kg_candidates': {'weight': 1,     'candidates': [['PSLC', 4.709918185823879],      ['damp - proof courses', 4.450675009708523],      ['installers', 3.724581283510842],      ['DP', 2.968094792169908],      ['installed', 2.5173511730829774]]},    'top_qe_terms': ['damp - proof courses',     'DP',     'PSLC',     'damp - proof course',     'installers']}},  'waterproofing of abutments': {'query': 'waterproofing of abutments',   'expanded_query': 'waterproofing of abutments, waterproofing, abutments, abutting edges, skirtings, Abutments, weatherproofing, water - tightness',   'qe_insight': {'spans': ['waterproofing', 'abutments'],    'nearest_neighbours': {'weight': 1,     'candidates': [['skirtings', 4.535782493433608],      ['water - tightness', 3.7736882646518826],      ['cooling water', 3.600158847108967],      ['mortars', 3.094077119271111]]},    'kg_candidates': {'weight': 1,     'candidates': [['abutting edges', 4.9369425535286195],      ['Abutments', 4.2821397506571985],      ['weatherproofing', 3.8696593663577254],      ['waterproofed', 3.7170121184992873]]},    'top_qe_terms': ['abutting edges',     'skirtings',     'Abutments',     'weatherproofing',     'water - tightness']}}}

In [9]:
expanded_queries = [v['expanded_query'] for v in qe.values()]

In [10]:
expanded_lengths = Counter()
for q in expanded_queries:
    word_len = get_num_words(q)
    expanded_lengths[word_len] += 1


In [11]:
min([l for l, c in expanded_lengths.items()])

12

In [12]:
max([l for l, c in expanded_lengths.items()])

34

In [13]:
expanded_query_lengths = [v for q_len, c in expanded_lengths.items() for v in [q_len]*c]
print("Mean query length (words): {:.2f}".format(np.mean(expanded_query_lengths)))
print("Standard deviation (words): {:.2f}".format(np.std(expanded_query_lengths)))

Mean query length (words): 20.19
Standard deviation (words): 4.37
