In [112]:
import json
from collections import defaultdict
import numpy as np
from transformers import MPNetTokenizer
from sklearn.metrics import ndcg_score
import pytrec_eval

# MS-P dev

In [4]:
def read_qrel(path):
    qrels = defaultdict(list)
    with open(path) as f:
        for line in f:
            qid, _, did, _ = line.strip().split()
            qrels[qid].append(did)
            
    return qrels

In [5]:
def read_query(path):
    queries = defaultdict(list)
    with open(path) as f:
        for line in f:
            qid, query = line.strip().split("\t")
            queries[qid] = query
            
    return queries

In [6]:
def read_doc_tsv(path):
    docs = defaultdict(list)
    with open(path) as f:
        for line in f:
            did, text = line.strip().split("\t")
            docs[did] = text
    
    return docs

In [7]:
def read_trec(path):
    all_scores = defaultdict(dict)
    with open(path):
        with open(path) as f:
            for line in f:                
                qid, _, did, ran, score, _ = line.strip().split()
                all_scores[qid][did] = float(score)
    return all_scores

In [8]:
def compare(target_result, ref_result, qrels):
    better_item = defaultdict(list)
    worse_item = defaultdict(list)
    for qid, dids in target_result.items():
        rdids = ref_result[qid]
        qrel_d = set(qrels[qid])
        ref_ranks = dict()
        ref_dids = [info[0] for info in sorted(rdids.items(), key=lambda x: -x[1])]
        target_ranks = dict()
        ranked_dids = [info[0] for info in sorted(dids.items(), key=lambda x: -x[1])]
        for qd in qrel_d:
            try:
                ref_ranks[qd] = ref_dids.index(qd)
                target_ranks[qd] = ranked_dids.index(qd)
            except ValueError:
                continue
        if not target_ranks:
            continue
        for qd in qrel_d:
            if qd in ref_ranks and qd in target_ranks:
                ref_rank = ref_ranks[qd]
                target_rank = target_ranks[qd]
                if target_rank < ref_rank:
                    better_item[qid].append((qd, ref_rank, target_rank))
                elif target_rank > ref_rank:
                    worse_item[qid].append((qd, ref_rank, target_rank))        
                
    return better_item, worse_item

In [9]:
def output_diff(items, queries, docs):
    for qid, it in items.items():
        for i in it:
            did, r_rank, t_rank = i
            print("{} {} {} {} {}:: {}".format(qid, did, r_rank, t_rank, queries[qid], docs[did]))

In [10]:
def output_result(qid, queries, docs, result, items, target=True):
    print(queries[qid])
    item = items[qid]
    cut_rank = 10000
    for i in item:
        print(item)
        _, rr, tr = i
        tmp_rank = tr if target else rr
        if tmp_rank < cut_rank:
            cut_rank = tmp_rank
            
    print("cut_rank: {}".format(cut_rank))
    for i, (did, score) in enumerate(sorted(result[qid].items(), key=lambda x: -x[1])):
        if i > cut_rank:
            break
        print(i, did, score, docs[did])

In [11]:
def common(items1, items2):
    common_result = defaultdict(list)
    for qid, item1 in items1.items():
        if qid in items2:
            item2 = items2[qid]
            for i1 in item1:
                for i2 in item2:
                    did1, rr1, tr1 = i1
                    did2, rr2, tr2 = i2
                    common_result[qid].append((did1, rr1, tr1, tr2))
            
    return common_result

In [12]:
msp_dev_qrel_path = "/home/gaia_data/iida.h/msmarco/passage/collection_and_queries/qrels.dev.small.tsv"
msp_dev_queries_path = "/home/gaia_data/iida.h/msmarco/passage/collection_and_queries/queries.dev.small.tsv"
msp_docs_path = "/home/gaia_data/iida.h/msmarco/passage/collection_and_queries/collection.tsv"
msp_bm25_path = "/home/gaia_data/iida.h/msmarco/passage/experiment/bm25/run.msmarco-passage.dev.small.bm25-tuned.trec"

In [13]:
msp_dev_small_queries = read_query(msp_dev_queries_path)
msp_docs = read_doc_tsv(msp_docs_path)
msp_dev_qrels = read_qrel(msp_dev_qrel_path)
msp_dev_bm25 = read_trec(msp_bm25_path)

In [14]:
with open("../test/msmarco-passage/sbert/result_dev/max_soft_cos/local_ave/1/rerank_score.json") as f:
    max_soft_cos = json.load(f)
    
with open("../test/msmarco-passage/sbert/result_dev/coef_max/local_ave/0/rerank_score.json") as f:
    coef_max = json.load(f)
    
with open("../test/msmarco-passage/sbert/result_dev/dense/ave/0/rerank_score.json") as f:
    dense = json.load(f)


In [15]:
dense_bm25_better_item, dense_bm25_worse_item = compare(dense, msp_dev_bm25, msp_dev_qrels)
max_soft_cos_dense_better_item, max_soft_cos_dense_worse_item =  compare(max_soft_cos, dense, msp_dev_qrels)
max_soft_cos_bm25_better_item, max_soft_cos_bm25_worse_item =  compare(max_soft_cos, msp_dev_bm25, msp_dev_qrels)

In [16]:
coef_max_dense_better_item, coef_max_dense_worse_item =  compare(coef_max, dense, msp_dev_qrels)
coef_max_bm25_better_item, coef_max_bm25_worse_item =  compare(coef_max, msp_dev_bm25, msp_dev_qrels)

## improve

In [17]:
common_lss_b = common(max_soft_cos_bm25_better_item, coef_max_bm25_better_item)
common_lss_b

defaultdict(list,
            {'2': [('4339068', 6, 5, 4)],
             '524332': [('740662', 858, 220, 679)],
             '786786': [('7807976', 9, 1, 6)],
             '524699': [('7697755', 19, 13, 16)],
             '524722': [('7471189', 1, 0, 0)],
             '873886': [('7913234', 11, 6, 7)],
             '786937': [('7867876', 68, 5, 27)],
             '1049085': [('7186309', 151, 13, 109)],
             '524848': [('7670356', 18, 6, 16)],
             '262974': [('7301412', 108, 16, 26)],
             '1049456': [('6121038', 12, 11, 8)],
             '525534': [('7560662', 2, 0, 1)],
             '1049894': [('22297', 180, 139, 87)],
             '1050231': [('7185139', 34, 13, 32)],
             '1050253': [('3255432', 53, 6, 11)],
             '1050275': [('7185091', 5, 3, 4)],
             '526013': [('7638663', 15, 1, 4)],
             '264150': [('7383302', 4, 2, 3)],
             '788484': [('7963717', 755, 275, 428)],
             '264284': [('5471184', 51, 25, 32)],

In [18]:
for qid, item in common_lss_b.items():
    if qid in dense_bm25_worse_item:
        print(qid, item, dense_bm25_worse_item[qid])

2 [('4339068', 6, 5, 4)] [('4339068', 6, 12)]
786786 [('7807976', 9, 1, 6)] [('7807976', 9, 204)]
524722 [('7471189', 1, 0, 0)] [('7471189', 1, 161)]
1049085 [('7186309', 151, 13, 109)] [('7186309', 151, 227)]
1050231 [('7185139', 34, 13, 32)] [('7185139', 34, 183)]
1050275 [('7185091', 5, 3, 4)] [('7185091', 5, 422)]
788702 [('7776257', 479, 425, 359)] [('7776257', 479, 497)]
1050857 [('5269929', 8, 3, 7)] [('5269929', 8, 175)]
788851 [('7774972', 8, 3, 7)] [('7774972', 8, 197)]
1051095 [('7911726', 128, 36, 126)] [('7911726', 128, 235)]
789037 [('7810187', 1, 0, 0)] [('7810187', 1, 17)]
1051223 [('7183716', 9, 6, 4)] [('7183716', 9, 45)]
526984 [('7630382', 15, 0, 10)] [('7630382', 15, 29)]
1051285 [('7183600', 40, 38, 39)] [('7183600', 40, 43)]
789332 [('5553013', 207, 71, 164)] [('5553013', 207, 268)]
2962 [('7314664', 72, 20, 38), ('7314664', 72, 20, 207), ('658625', 744, 29, 38), ('658625', 744, 29, 207)] [('7314664', 72, 100)]
1051723 [('7182394', 8, 0, 6)] [('7182394', 8, 112)]

748054 [('7506788', 7, 4, 4)] [('7506788', 7, 367), ('7506791', 1, 3)]
748321 [('7637360', 45, 13, 25)] [('7637360', 45, 55)]
1010527 [('7247196', 192, 34, 74)] [('7247196', 192, 322)]
1010537 [('7787764', 3, 2, 2), ('7787764', 3, 2, 1)] [('7787764', 3, 7), ('7787765', 4, 8)]
605363 [('7534976', 4, 1, 2)] [('7534976', 4, 5)]
1011140 [('7246357', 3, 0, 1)] [('7246357', 3, 22)]
212251 [('7563950', 12, 4, 10)] [('7563950', 12, 58)]
212303 [('7840105', 28, 3, 12)] [('7840105', 28, 45)]
749752 [('7610789', 12, 5, 11)] [('7610789', 12, 112)]
1012547 [('7243485', 21, 10, 20), ('7243485', 21, 10, 5), ('7243486', 8, 1, 20), ('7243486', 8, 1, 5)] [('7243485', 21, 462), ('7243486', 8, 77)]
998965 [('7266627', 2, 0, 0)] [('7266627', 2, 7)]
750946 [('7549100', 46, 43, 45)] [('7549100', 46, 142)]
1013114 [('7242575', 55, 6, 10)] [('7242575', 55, 94)]
1013304 [('7242268', 5, 3, 2)] [('7242268', 5, 18)]
1013367 [('2285677', 74, 65, 39)] [('2285677', 74, 112)]
227591 [('7319314', 990, 79, 753)] [('7319

In [19]:
# max-sim-idf
output_result("526984", msp_dev_small_queries, msp_docs, max_soft_cos, max_soft_cos_bm25_better_item, True)

types of coffee drinks mocha latte
[('7630382', 15, 0)]
cut_rank: 0
0 7630382 37.83771104528328 Espresso is generally denser than coffee brewed by other methods, having a higher concentration of suspended and dissolved solids; it generally has a creamy foam on top termed crema. Espresso is the base for a number of other coffee drinks, such as latte, cappuccino, macchiato, mocha, and Americano.


In [20]:
# bm25
output_result("526984", msp_dev_small_queries, msp_docs, msp_dev_bm25, max_soft_cos_bm25_worse_item, True)

types of coffee drinks mocha latte
cut_rank: 10000
0 7200617 22.968399 Best Answer: A Cafe Mocha is a shot of espresso-roasted and -brewed coffee into a hot chocolate drink -- or, chocolate flavoring in a dark-roasted coffee drink. A Latte is a cappuccino but made with more milk so that it is lighter. A Mocha Latte is the same, but with a shot of chocolate flavoring added. You could add whipped cream to either drink, but will see it on the Latte more often than the Cafe Mocha. What Is A Mocha Latte. Read the CBTL recipes for your self. Basically Cafe Mocha = more coffee less milk, vs Mocha Latte.
1 7630383 21.301399 Also called Caffe Mocha or Mocaccino, the mocha is a chocolate lover's delight. This coffee drink combines espresso with hot milk and chocolate and is a variant of the latte. The espresso to milk ratio of a mocha is about the same as a latte, but the addition of white, milk or dark chocolate to the drink is essential.
2 7336556 19.789801 Coffee Consumption: Over 50% of Amer

535 4173223 9.8688 1 It's also sometimes called a shot in the dark. 2  Irish Coffee: Coffee mixed with a dash of Irish whiskey and served with cream on top. 3  Latte Macchiato: Steamed milk served in a tall glass rather than a cup that is âstainedâ by a shot of espresso coffee.
536 1358174 9.8686 Iced Raspberry Latte. Created like the traditional Starbucks favorite, the Iced Raspberry Latte puts a twist on the traditional latte option. Starbucks is open about being able to add different syrup flavorings to drinks, but typically fruit-flavored additions arenât as common.
537 6622300 9.8534 Calories, Fat, Protein, Fiber, & Carbs In Tall Vanilla Latte; Calories, Fat, Protein, Fiber, & Carbs In Tall Skinny Vanilla Latte; Calories, Fat, Protein, Fiber, & Carbs In Starbucks Grande Vanilla Latte; Calories, Fat, Protein, Fiber, & Carbs In Sugar Free Vanilla Latte; Ingredient Specific Calorie Information From Our Recipes: 1  Calories In Sugar-free Vanilla Latte Calories: 85, Fat: 2g, Carb

662 1305758 9.3629 Photo Credit Robert Brown/iStock/Getty Images. A typical coffee shop chai tea latte made with spiced black tea and steamed milk may come in several sizes. A 16-ounce chai tea latte contains an average of 95 milligrams of caffeine, while a 12-ounce has slightly less, with approximately 70 milligrams of caffeine.
663 3724184 9.362899 Here's the approximate price breakdown: Grande iced vanilla latte: $4.15 (about $4.84 with tax) Shot of espresso over ice: $1.95 (about $2.12 with tax) Granted, a grande iced vanilla latte has two shots of espresso in it, not one. So if you're looking for the same amount of coffee, order two shots of espresso over ice.
664 6001070 9.3502 It depends on the type of coffee you drink. A plain cup of brewed coffee has only 2 calories â and no fat. But if you dress up your coffee with extras, whether at home or your favorite coffee shop, you also add extra calories. See how many calories 1 tablespoon of these extras can add to your coffee:
665

690 3194506 9.255999 Coffee is an international drink. There are few, if any, places you can travel where coffee is not an appreciated beverage. However, the tree itself is a bit prejudice about the type of environment in which it will produce. It only grows and produces well in the tropics.
691 128690 9.2553 #9 (TIE): AU BON PAIN FROZEN CARAMEL LATTE. You may feel virtuous as you bypass the croissants and cakes and head directly to the coffee counter. But if you opt for a 16-ounce Frozen Caramel Latte at Au Bon Pain, it will set you back 600 calories (37.5 per ounce), 20 grams of fat and 91 grams of sugar.
692 7990939 9.2543 Speakeasies Fact 2: Speakeasy clubs claimed to sell soft drinks and coffee, but served alcohol behind the scenes - some famously served in coffee cups. Speakeasies Fact 3: There were different types of speakeasies in most major cities that varied in quality, size, and the types of people that visited them.
693 635854 9.2426 So we bought one. On Jan. 5, Starbucks w

In [21]:
# dense
output_result("526984", msp_dev_small_queries,  msp_docs, dense, dense_bm25_better_item, True)

types of coffee drinks mocha latte
cut_rank: 10000
0 3186317 0.8652582168579102 A Starbucks Grande Caffe Mocha (sometimes called a Mochaccino) is made from espresso, steamed milk, and their mocha syrup. It is sometimes topped with whipped cream. A Starbucks Mocha will be higher in caffeine than their other espresso beverages because of the cocoa and coffee syrup used in the beverage.
1 3186315 0.8638187646865845 Send Feedback. A Starbucks Grande Caffe Mocha (sometimes called a Mochaccino) is made from espresso, steamed milk, and their mocha syrup. It is sometimes topped with whipped cream. A Starbucks Mocha will be higher in caffeine than their other espresso beverages because of the cocoa and coffee syrup used in the beverage.
2 2559958 0.8604786992073059 Popular Espresso Drinks. 1  Cappuccino - A short steamed or frothed milk drink with a single shot of espresso. 2  CaffÃ© Latte - A tall steamed milk drink with a double shot of espresso. Lattes are often flavored with syrups. 3  Caff

142 3668794 0.7810565233230591 Latte is a coffee variant that is prepared using espresso and milk. In other words, a Latte is nothing but an espresso and steamed milk served with a small layer of milk froth on top. Latte is 1/4th espresso and three times more milk with a topping of milk foam. As a result, a latte is milder and milkier.
143 3827250 0.780714750289917 A chai tea latte is a caffeine containing coffee alternative. Photo Credit joanna wnuk/iStock/Getty Images. A chai tea latte is typically made with warm milk, black tea, sweetener and a combination of ginger, peppercorns, cinnamon and cardamom.
144 8493373 0.7807049751281738 In general cappuccino should have more than filtered coffee but the same as espresso. In America filtered coffee is more common and you get less caffeine per serving than one espresso. The way you make cappuccino is basically you pour in 1 shot of espresso, top with 1/3 of cream, and 1/3 of milk. It is similar to a latte but has 1/3 of cream. Hopefully t

305 3987093 0.735116720199585 Cocoa beans contain caffeine, so the higher the percentage of cocoa in a chocolate bar, the higher its caffeine content. Likewise for coffee: Ounce for ounce, the more diluted the coffee drink, the less caffeine. A 12-ounce latte contains much less caffeine than a 12-ounce mug of plain coffee.
306 8334193 0.7341511249542236 Itâs the Milk and Sugar! Coffee drinks are high in calories due to what we add to the coffee. Caffeine is a bitter chemical so we tend to sweeten it up â with milk, sugar, or syrups. The larger the drink volume â the more milk you will be getting.With espresso-based drinks (latte, cappuccino) â the amount of water in the drink is minimal (typically 45ml / 1.5 fl. oz per espresso shot).You do the math:dd in cream, a pump of flavored syrup, and some sugar â and you are in for a very calorie-dense drink â more like sweetened milk than coffee. I think Iâm beginning to sound like a coffee snob.
307 4173226 0.7339869737625122 Th

329 2519995 0.7274314165115356 Iced Caramel Mocha, with whole milk, whipped cream & caramel drizzle, large. Iced Caramel Mocha, with whole milk, whipped cream & caramel drizzle, medium. Iced Caramel Mocha, with whole milk, whipped cream & caramel drizzle, small. Iced Caramel Mocha, with whole milk, without whipped cream or caramel drizzle, medium. Iced Coffee, with Sugar-Free French Vanilla Syrup. Iced Coffee, with light cream & liquid sugar, large.
330 5861269 0.7271396517753601 A New Orleans original for over 30 years. Also Puccinoâs 1 seller. Made w/ our rich Italian espresso & secret coffee mix blended in w/ our low-fat frozen ice milk, topped w/ lite whipped cream & dusted w/ chocolate powder. A traditional frozen latte made w/ our 24 hour cold drip coffee.
331 4600257 0.7267484664916992 Caramel Macchiato with Whole Milk; Chai Tea Latte with 2% Milk; Chai Tea Latte with Nonfat Milk; Chai Tea Latte with Soy Milk; Chai Tea Latte with Whole Milk; Chile Mocha, with 2% milk, without 

652 3165767 0.6287528276443481 Starbucks Coffee Drinks. Starbucks Drinks is a dedicated service to provide a concise and easy list of all Starbucks menu items. All the way from brewed coffees, to ice-cold Frappuccinos, this list provides all major drinks.
653 5602691 0.628740668296814 I started ordering matcha tea latte drinks at coffee shops. I was just curious. I found some things out. 1  Iâm going to be honest â I canât stomach matcha green tea lattes when theyâre hot. Justâ¦ too much of a green plant steaminess in my face.  I do, however, LOVE love love them cold.
654 5009857 0.6286317110061646 What the heck is COFFEE-MATE, anyway? COFFEE-MATE was introduced in 1961 as the first ânon-dairy creamerâ on the market, and today it remains the most popular such product in the world. Manufactured by NestlÃ© out of Glendale, California, COFFEE-MATE comes in over 25 flavors including gingerbread, Parisian almond crÃ¨me and peppermint mocha.
655 1015167 0.62861168384552 Get Pric

777 6866055 0.5897793769836426 I had the best spanish latte at Urth Cafe in LA. It was espresso with steamed condensed milk. So heavenly! It was the highlight of my California vacation. I've been trying to replicate the drink ever since.
778 635858 0.5892495512962341 Cold Drinks and Higher Prices. Scorching hot temperatures and bright sunrays are both signs that summer is definitely here! And Starbucks wants to help you stay chill all summer long. The iconic coffee chain released three new summer sips and a sweet treat along with a price hike (some espressos and tea lattes went up as much as 30 cents).
779 3455009 0.5892228484153748 Startup Costs. The type of location and whether a coffee shop is independent or financed are both major determinants of startup costs. A coffee cart can cost $20,000 to $25,000 to start up while a kiosk may cost $25,000 to $75,000.s an example, if milk costs $3 per gallon, or 2.34 cents per ounce, and a drink contains 9 ounces of milk, the milk cost will be

880 3537909 0.538669764995575 There are two main types of coffee plant. The Coffea Arabica, the most common. Most of the world's coffee is made from Arabica beans, and the Coffea Robusta, which is easier to grow in places where Arabica will not grow.offee is a plant and the name of the drink that is made from this plant. The coffee plant is a bush or tree that can grow up to ten meters (about 32 feet) high, but is usually cut shorter. Coffee plants originally grew in Africa, and now also grow in South America, Central America and Southeast Asia.
881 6622298 0.5377074480056763 There are 160 calories in 1 large Latte Coffee. Calorie breakdown: 37% fat, 37% carbs, 26% protein.
882 6325647 0.5367283821105957 Drinking coffee daily may help prevent serious conditions. There is an abundance of research showing that instead of causing health problems, as you may have thought, drinking coffee may actually protect your body against a wide variety of diseases and health conditions.1. Type 2 diabe

In [22]:
# max-sim-idf
output_result("83506", msp_dev_small_queries, msp_docs, max_soft_cos, max_soft_cos_bm25_better_item, True)

cast of rock dog
[('7398040', 15, 1)]
cut_rank: 1
0 7398049 8.56077318166597 Rock Dog (2016) cast and crew credits, including actors, actresses, directors, writers and more.
1 7398040 8.342106788945665 Rock Dog - Coming 2016 Huayi Brothers & Mandoo Pictures present, a comedy directed by Academy AwardÂ® Nominated Writer-Director of SURFâS UP and Co-Director of TOY STORY 2, Ash Brannon. When a radio falls from the sky, a young Tibetan Mastiff is inspired to leave his village in the mountains and move into the big city to become a rock musician. Cast: Luke Wilson, Eddie Izzard, J.K. Simmons, Lewis Black, Kenan Thompson, Mae Whitman, Jorge Garcia, with Matt Dillon and Sam Elliott.


In [23]:
# bm25
output_result("83506", msp_dev_small_queries, msp_docs, msp_dev_bm25, max_soft_cos_bm25_worse_item, True)

cast of rock dog
cut_rank: 10000
0 7398049 10.759 Rock Dog (2016) cast and crew credits, including actors, actresses, directors, writers and more.
1 7398043 10.3539 Rock Dog Details. Full Cast and Crew; Release Dates; Official Sites; Company Credits; Filming Locations; Technical Specs
2 4145175 9.3328 Max's dog was saved from being euthanized by the filmmakers. One day before he was to be put to sleep, members of the crew visited his shelter looking for a pet to cast for the film. He was picked out of a number of other dogs due to him picking up a rock off the ground and playing with it like a toy.
3 5993041 9.2691 Hotel for dogs is a comedy and family movie. It is rated PG. It is directed by Thor Freudenthal. Jeff Lowell and Robert Schooley wrote the screen play. Hotel for Dogs has a well known cast that everyone in the family may have seen before. Emma Roberts, Jake Austin, Kyla Pratt, and Lisa Kudrow are among the cast. Emma Roberts plays Andi and Jake Austin plays her little brothe

402 6908732 5.804198 Weight gain or weight loss can always be a sign of some change in the body. The hair coat may be less thick or have a dull cast as your Bichon approaches old age. Unless the diet has changed, this may not be an issue until the last year or two but it is certainly a sign of a less healthy dog.
403 7447441 5.804197 also at this time the our gang cast acquired an american pit bull terrier with a ring around his eye originally named pansy the dog soon became known as pete the pup the most famous our gang pet in 1927 hal roach ended his distribution arrangement with the pathÃ© company
404 8512259 5.804196 In 1996, he was cast in the role that would make him a star, as Jerry Lundegaard in Fargo. He has gone on to appear in a number of successful films since then, including Boogie Nights, Wag the Dog, Magnolia, and The Lincoln Lawyer. Felicity Huffman & William H Macy Net Worth. Pharoahe Monch Net Worth.
405 8676031 5.804195 Prohibited. 1  Any sighting device that casts a

528 5590192 5.426699 (OP). We're looking to approve a thin, bonded topping slab. The substrate is a cast-in-place, structural concrete slab 8 to 9 1/4 thick spanning 28-feet, built in 1950 and designed for 125psf.The structural floor system is a two-way slab.For a renovation project, the architect has specified floor leveling underlayment for all floors.As an alternate, the contractor proposes a hard rock concrete topping, 1.25 minimimum to 2.5 maximum +/- thickness, 3/8 rock.OP). We're looking to approve a thin, bonded topping slab. The substrate is a cast-in-place, structural concrete slab 8 to 9 1/4 thick spanning 28-feet, built in 1950 and designed for 125psf.
529 6182605 5.426698 Christopher Crosby  Chris  Farley (February 15, 1964 â December 18, 1997) was an American comedian and actor. Farley was known for his loud, energetic comedic style, and was a member of Chicago 's Second City Theatre and cast member of the NBC sketch comedy show Saturday Night Live between 1990 and 1995

573 1475871 5.267 No but you can get sick from germs and other things that the iguana may carry. The same can also be said for cats, dogs, humans, inanimate objects like rocks, etc. (yes, you câ¦an catch something from your cat). Germs, bacteria, and the like are everywhere. Most of these are not harmful, so don't worry too much.
574 2845066 5.266999 Dr. Dog is an American rock band from West Grove, Pennsylvania, United States. Its lineup consists of Toby Leaman (bass guitar), Scott McMicken (lead guitar), Frank McElroy (rhythm guitar), Zach Miller (keyboard), and Eric Slick (drums). Lead vocal duties are shared between Leaman and McMicken, with all members contributing harmonies.
575 2902835 5.266998 All he does is chew sticks and rocks maninly concrete! he ate my stucco bands on my patio! So now for toys I give him big blocks of wood and leather balls to munch on. This dog is like Marley from Marley and Me! He is AWFUL! But we love him so much. he is so sweet.
576 379049 5.266997 La

760 1228054 4.9182 A range of casting methods are available, but three of the most common forms include sand casting (expendable mold, permanent pattern), die casting (permanent mold) and investment casting (expendable mold, expendable pattern). 1  Sand casting is a method characterized by the use of sand as a mold material.
761 1569222 4.918199 Intro: Cast a Metal Ring! This Instructable is about casting and finishing a ring using the art of lost wax metal casting. In lost wax casting, a wax positive of the final product ... Step 1: Cast the Wax. First off, you will need some wax to carve into the ring.
762 1876270 4.918198 The Indian caste system has existed for about 3,000 years. 1  There were four original castes, and one caste so low that it was not even considered to be part of the caste system: 2  The Brahman caste usually consisted of priests or scholars and enjoyed a great deal of prestige and wealth.
763 7608636 4.918197 ( casts plural & 3rd person present) ( casting present 

In [24]:
# dense
output_result("83506", msp_dev_small_queries,  msp_docs, dense, dense_bm25_worse_item, True)

cast of rock dog
[('7398040', 15, 29)]
cut_rank: 29
0 3094180 0.5616450309753418 to put on the dog (US) vestirse de punta en blanco. to be top dog ser el gallo del lugar; triunfar. the dog's bollocks (Britain) la hostia (vulgar) Tony-boy, you are the dog's bollocks To a wide-eyed teenager, Mott were the dog's bollocks. They had the loudest amps, the longest hair, the hardest rock and the baddest attitude 'At Ascot Park.
1 6151165 0.5595523118972778 The Bedlington Terrier is a breed of small dog named after the mining town of Bedlington, Northumberland in North East England. Originally bred to hunt vermin in mines, the Bedlington Terrier has since often been used in dog racing and in conformation shows numerous dog sports, and as a companion dog.n 1948, a Bedlington Terrier known as Rock Ridge Night Rocket won best in show at the Westminster Kennel Club Dog Show. The breed has a high instance of copper toxicosis, but with the exception of eye problems, it is mostly free from health comp

In [25]:
# max-sim-idf
output_result("601684", msp_dev_small_queries, msp_docs, max_soft_cos, max_soft_cos_bm25_better_item, True)

what country does fennel come from
[('7527096', 15, 3)]
cut_rank: 3
0 6751121 17.808432080715495 1 Herb fennel does not produce the same bulbous stem. 2  It is grown for its delicate leaves, which are used as a herb. 3  Herb fennel also produces seeds which have a licorice-like flavor (as does the rest of the plant) and are used for seasoning.he type of fennel you choose to grow will depend on what part of the fennel plant you wish to use -- the bulb, the fronds or the seeds. 1  Florence Fennel is grown for its bulbous stem, which can be eaten raw, grilled or baked. 2  Herb fennel does not produce the same bulbous stem.
1 954272 17.62122426166579 Categories: Fennel. Common fennel is the variety from which the oval greenish-brown fennel seeds come. The seeds are available whole and ground and are used in both sweet and savory foods, as well as to flavor many liqueurs. They should be stored in a cool, dark place for no more than six months.ategories: Fennel. Common fennel is the variety 

In [26]:
# bm25
output_result("601684", msp_dev_small_queries, msp_docs, msp_dev_bm25, max_soft_cos_bm25_worse_item, True)

what country does fennel come from
cut_rank: 10000
0 7786940 11.304 Where does the surname Beam come from? already exists. already exists as an alternate of this question. exists and is an alternate of. You also play NYPD Officer Eddie Janko on Blue Bloods. How did you prepare for your role as a cop? Where does the surname Be come? What country did the surname Crabtree come from? What country does the surname ...
1 3507438 11.1732 What country did the last name Huynh come from? already exists. already exists as an alternate of this question. exists and is an alternate of. Tell us about your skateboard shoe! And where can we get a pair? What country did the last name Snavely come from? What country does the last name Canenguez caÃ±enguez come? From what country did the last ... Name wha
2 6751121 11.022 1 Herb fennel does not produce the same bulbous stem. 2  It is grown for its delicate leaves, which are used as a herb. 3  Herb fennel also produces seeds which have a licorice-like flav

132 6032019 7.9412 It is a highly aromatic and flavorful herb with culinary and medicinal uses and, along with the similar-tasting anise, is one of the primary ingredients of absinthe. Florence fennel or finocchio is a selection with a swollen, bulb-like stem base that is used as a vegetable.lorence fennel is one of the three main herbs used in the preparation of absinthe, an alcoholic mixture which originated as a medicinal elixir in Switzerland and became, by the late 19th century, a popular alcoholic drink in France and other countries.
133 8474678 7.9371 Finocchio fennel is closely related to sweet and bitter fennel commercially produced for either the seed, which is used as a spice, or the essential oil extracted from the seed.
134 2328301 7.9296 What Does Trans* Mean, and Where Did It Come From? What Does Trans* Mean, and Where Did It Come From? Poster created by online LGBTQ educator Sam Killermann. Itâs widely accepted that computer-mediated communicationâemailing, texting,

306 4607579 7.3904 Once I even found something that said it comes from the name 'Eve' and 'Anna'. If it does mean 'Archer' and or 'Yew' what do... show more My name is Yvonne, and I want to know what it means.
307 79033 7.389 Give me: who invented the robot, why they invented the robot, what does the robot do, how does the robot work, the date that person invented the robot, and the place that person comes from. 1  Share Question. 2  Flag as...
308 5582502 7.3887 Youâre probably familiar with the black licorice flavor of fennel, and while not everyone likes licorice, you can still get all the fennel benefits by using fennel essential oil. Fennel essential oil is best known as a powerful ingredient for good digestive health.
309 7882186 7.388699 Health Benefits Of Fennel Tea. Here is a list of fennel tea benefits. One of the most common applications of fennel is to treat heartburn. Drinking a single cup of tea or chewing fennel seeds when you start feeling the symptoms of indigestion 

943 6363880 6.692491 When it comes to wedding expenses, sometimes itâs hard to understand just why a particular element costs what it does, or why thereâs such a range in rates. Something as seemingly simple as cake can cost anywhere from $1.50 to $12.00 a slice!
944 6852814 6.69249 All of the B vitamins are incredibly important when it comes to energy metabolism. Find out what Vitamin B12 does for your health with help from a natural health enthusiast, avid runner, active blogger and published author in this free video clip. advertisement.
945 7100518 6.692489 No one enjoys feeling jealous. Yet, jealousy is an inevitable emotion that pretty much every one of us will experience. The problem with jealousy isnât that it comes up from time to time, but what it does to us when we donât get a hold on it.
946 7631891 6.692488 kgb answers Â» Local & Lookup Â» Definitions & Word Origins Â» What does the saying fair to midland mean, and where did it come from? Fair to Midland is an Amer

In [27]:
# dense
output_result("601684", msp_dev_small_queries,  msp_docs, dense, dense_bm25_worse_item, True)

what country does fennel come from
[('7527096', 15, 32)]
cut_rank: 32
0 7527089 0.7294572591781616 Fennel is originally from, Syria, India, Mexico and some more others. How to Grow Fennel Hints Fennel, popular in Mediterranean dishes, is a crunchy and sweet herb, available from autumn to early spring. 2  It is an excellent source of vitamin C and contains several otherâ¦. 3  Roasting Fennel in the Oven Fennel is an ancient vegetable dating to the times of the Ancient Greeks and Romans.
1 1039752 0.7198396921157837 Fennel, which has a delicate sweet aroma, is used as a spice (the seed), an herb (the leaves) and a vegetable (the bulb). It is also known as Florence Fennel or Finocchio, Roman Fennel, and Common Fennel.
2 4805371 0.6880911588668823 By Bonnie L. Grant. Florence fennel (Foeniculum vulgare) is the bulb type of fennel that is eaten as a vegetable. All parts of the plant are fragrant and can be used in culinary applications. Florence fennel cultivation began with the Greeks and

In [28]:
# max-sim-idf
output_result("531676", msp_dev_small_queries, msp_docs, max_soft_cos, max_soft_cos_bm25_better_item, True)

umeclidinium cost
[('7597704', 20, 5)]
cut_rank: 5
0 7597710 21.860270050807816 Abbreviations: UMEC/VI, umeclidinium and vilanterol; TIO, tiotropium; QALY, quality-adjusted life years; ICER, incremental cost-effectiveness ratio.
1 4844872 20.304280864882937 Order Trelegy Ellipta online or call toll free 1-866-401-3784. Lowest Price on Trelegy Ellipta, Guaranteed! We will beat any price on Trelegy Ellipta as a commitment to our Lowest Price Guarantee. If you find Trelegy Ellipta (Fluticasone Furoate/Umeclidinium/Vilanterol Trifenatate) for a lower price, contact us and we will match the price.
2 7597706 19.56265399385518 Full-text (PDF) | Purpose Umeclidinium/vilanterol (UMEC/VI) is a novel fixed dose combination of a long-acting muscarinic receptor antagonist (LAMA) and a long-acting beta 2 receptor antagonist (LABA) agent. This analysis evaluated the incremental cost-effectiveness ratio (ICER) of UMEC/VI compare...
3 7597701 19.08451097110008 Purpose Umeclidinium/vilanterol (UMEC/VI) 

In [29]:
# bm25
output_result("531676", msp_dev_small_queries, msp_docs, msp_dev_bm25, max_soft_cos_bm25_worse_item, True)

umeclidinium cost
cut_rank: 10000
0 1243151 10.6496 The average cost of 1 inhaler (28 doses) ranges between $300 and $380. 21, 23. Umeclidinium/vilanterol (Anoro Ellipta) Anoro Ellipta is a combination of 2 newer COPD medications, vilanterol and umeclidinium. It was the first LABA/LAMA product to receive FDA approval.
1 7597707 10.4695 Cost-effectiveness of combination therapy umeclidinium/vilanterol versus tiotropium in symptomatic COPD Spanish patients. Purpose: Umeclidinium/vilanterol (UMEC/VI) is a novel fixed dose combination of a long-acting muscarinic receptor antagonist (LAMA) and a long-acting beta 2 receptor antagonist (LABA) agent.
2 7597702 10.2958 Umeclidinium inhalation powder (Incruse Ellipta) is a long-acting anticholinergic medi-cation labeled for maintenance treatment of airflow obstruction in patients with chronic obstructive pulmonary disease (COPD), including emphysema and chronic bronchitis.1 Drug Dosage Dose form Cost* Umeclidinium (Incruse Ellipta) One inhalatio

436 6998982 3.139897 1. understand the calculation of: (1) unit cost, (2) cost of goods sold, (3) ending inventory, and (4) net income using absorption and variable costing. 2. be able to reconcile the difference between absorption costing and variable costing net income.3. prepare and use segmented income statements using a variable costing approach. Table of Contents. Overview Previous tutorials discussed the difference between product cost and period cost.otal unit cost. $7. Cost of Goods Sold: Absorption Costing versus Variable Costing To calculate the cost of goods sold, we must first calculate the sales in units. The sales in units is multiplied by the unit cost to calculate cost of goods sold.
437 1344218 3.139 product costs: -include all costs that are required to make a product-Product costs are: Direct Material, Direct Labor, Manufacturing Overhead-Are included as part of inventory and shown on the balance sheet until the product is sold. Product costs are often called invent

485 1726360 3.134199 DEFINITION of 'Cost Of Revenue'. The total cost of manufacturing and delivering a product or service. Cost of revenue information is found in a company's income statement, and is designed to represent the direct costs associated with the goods and services the company provides.Indirect costs, such as salaries, are not included.Next Up.REAKING DOWN 'Cost Of Revenue'. Cost of revenue is different from cost of goods sold (COGS) because it includes costs outside of production, such as distribution and marketing.
486 5401102 3.134198 Along with variable costs, fixed costs make up one of the two components of total cost: total cost is equal to fixed costs plus variable costs. In economics, fixed costs, indirect costs or overheads are business expenses that are not dependent on the level of goods or services produced by the business. They tend to be time-related, such as salaries or rents being paid per month, and are often referred to as overhead costs.
487 5465707 3.134

690 2134447 3.117196 fixed costs A cost of business which does not vary with output or sales; overheads. average total cost Average cost or unit cost is equal to total cost divided by the number of goods produced (the output quantity, Q).
691 2379201 3.117195 in_cost_distance_raster. The name of a cost distance raster to be used to determine the least-cost path from the destination locations to a source. The cost distance raster is usually created with the Cost Distance, Cost Allocation or Cost Back Link tools.
692 2839798 3.117194 6 Cost Studies of Buildings â  There is a general trend towards greater cost-effectiveness, and thus a need toexamine construction costs not solely in the context of initial costs but in terms of whole-life costs, or total-cost appraisal.
693 3409278 3.117193 Cost control, also known as cost management or cost containment, is a broad set of cost accounting methods and management techniques with the common goal of improving business cost-efficiency by reduci

In [30]:
# dense
output_result("531676", msp_dev_small_queries,  msp_docs, dense, dense_bm25_worse_item, True)

umeclidinium cost
[('7597704', 20, 23)]
cut_rank: 23
0 4644860 0.5811550617218018 incremental cost. The additional cost of an additional quantity. It is similar to marginal cost, except that marginal cost refers to the cost of the next unit. Incremental cost might be the additional cost from the next 200 units.
1 5119499 0.5692259073257446 n the actual cost of producing one article. variable cost. n a cost that varies directly with output. English Collins Dictionary-English Definition & Thesaurus &nbsp. See also: cost accounting, cost centre, cost rent, cost accountant. cost an arm and a leg. cost an arm and a leg. exp.
2 3049147 0.5606458187103271 Costs per unit are: Cost 1: $100,000/8,000 = $12.50 and $125,000/10,000 = $12.50. Cost 2: $40,000/8,000 = $5.00 and $50,000/10,000 = $5.00. Cost 3 is a mixed cost since the total cost is different, however, since the cost per unit is not the same at both levels, this is a mixed cost.
3 6912127 0.5539877414703369 Marginal costing-definition M

## degrade

In [31]:
common_lss_w = common(max_soft_cos_bm25_worse_item, coef_max_bm25_worse_item)
common_lss_w

defaultdict(list,
            {'524447': [('7573434', 424, 528, 478),
              ('7573434', 424, 528, 836),
              ('7573434', 424, 528, 956),
              ('7573433', 834, 888, 478),
              ('7573433', 834, 888, 836),
              ('7573433', 834, 888, 956),
              ('7573435', 851, 892, 478),
              ('7573435', 851, 892, 836),
              ('7573435', 851, 892, 956)],
             '524733': [('3399995', 110, 529, 157)],
             '787255': [('4216689', 2, 6, 6)],
             '787784': [('7724077', 11, 784, 16)],
             '525779': [('7706458', 324, 777, 360)],
             '811852': [('7941856', 1, 6, 6)],
             '1050695': [('7184656', 6, 8, 11)],
             '1051108': [('7183936', 252, 335, 257)],
             '1051279': [('7183615', 6, 32, 12)],
             '2798': [('7800991', 2, 3, 3)],
             '789292': [('7842289', 0, 1, 1)],
             '961705': [('7915039', 18, 72, 30)],
             '1052563': [('2882149', 2, 50, 4)]

In [32]:
for qid, item in common_lss_w.items():
    if qid in dense_bm25_better_item:
        print(qid, item, dense_bm25_better_item[qid])

524447 [('7573434', 424, 528, 478), ('7573434', 424, 528, 836), ('7573434', 424, 528, 956), ('7573433', 834, 888, 478), ('7573433', 834, 888, 836), ('7573433', 834, 888, 956), ('7573435', 851, 892, 478), ('7573435', 851, 892, 836), ('7573435', 851, 892, 956)] [('7573433', 834, 534)]
4947 [('7581255', 5, 9, 6)] [('7581263', 7, 6)]
1053931 [('7179185', 213, 642, 241)] [('7179178', 28, 26)]
277556 [('3655235', 6, 13, 7)] [('3655235', 6, 3)]
1067659 [('7160909', 73, 998, 110)] [('7160909', 73, 6)]
544060 [('7516973', 15, 72, 665)] [('7516971', 591, 487)]
816289 [('7780128', 7, 47, 10)] [('7780128', 7, 5), ('7780132', 643, 507)]
31432 [('7341787', 5, 147, 493)] [('7341780', 474, 2)]
818819 [('7783710', 4, 6, 5)] [('747282', 720, 562)]
556976 [('2020524', 5, 22, 7)] [('7502345', 120, 3)]
1084898 [('7124035', 292, 341, 482), ('7124035', 292, 341, 908), ('7124027', 29, 45, 482), ('7124027', 29, 45, 908)] [('7124029', 874, 848)]
1085762 [('7116282', 0, 43, 1)] [('7116278', 201, 1)]
300674 [('70

In [33]:
# max-sim-idf
output_result("1035719", msp_dev_small_queries, msp_docs, max_soft_cos, max_soft_cos_bm25_worse_item, True)

airplane definition of drag
[('7208597', 10, 17)]
cut_rank: 17
0 164137 13.004177491171404 Context: When two surfaces are in contact, friction is always present. Definition: To pull along with difficulty or effort. Context: An airplane comes to a stop because of the drag or pull on it as it moves through air and across a surface. Definition: Strength or energy exerted. Context: A force can change the direction of motion, increase the rate of motion, slow down motion, or stop it all together. Definition: The rubbing of one object or surface against another; the force that resists motion between bodies in contact. Context: Bicycle brakes use friction to stop the wheels from turning.
1 7507821 12.655625457593352 State the relationship between airspeed, camber, angle of attack, and lift. Give the four forces of flight and tell which of these forces oppose each other. Describe maximum gross weight, empty weight, center of gravity, center of lift, and useful. load with relation to an airplan

In [34]:
# coef-max
output_result("1035719", msp_dev_small_queries, msp_docs, coef_max, coef_max_bm25_worse_item, True)

airplane definition of drag
[('7208597', 10, 11)]
cut_rank: 11
0 8108954 18.986013904070855 This slide gives technical definitions of a wing's geometry, which is one of the chief factors affecting airplane lift and drag. The terminology used here is used throughout the airplane industry today and was mostly known to the Wright brothers in 1900.
1 49562 18.835955568313597 If you have a light airplane with more drag and a heavy airplane with less drag, the heavy airplane can fly much more easily. This is because the heavier airplane has less drag as well as more momentum to push through the air.
2 6907132 18.48053840982914 An aircraft propulsion system must achieve two things. First, the thrust from the propulsion system must balance the drag of the airplane when the airplane is cruising. And second, the thrust from the propulsion system must exceed the drag of the airplane for the airplane to accelerate. In fact, the greater the difference between the thrust and the drag, called the exc

In [35]:
# bm25
output_result("1035719", msp_dev_small_queries, msp_docs, msp_dev_bm25, max_soft_cos_bm25_worse_item, True)

airplane definition of drag
[('7208597', 10, 17)]
cut_rank: 17
0 8108954 11.8972 This slide gives technical definitions of a wing's geometry, which is one of the chief factors affecting airplane lift and drag. The terminology used here is used throughout the airplane industry today and was mostly known to the Wright brothers in 1900.
1 49562 11.7296 If you have a light airplane with more drag and a heavy airplane with less drag, the heavy airplane can fly much more easily. This is because the heavier airplane has less drag as well as more momentum to push through the air.
2 7741143 11.4944 + Contact Glenn. This slide gives technical definitions of a wing's geometry, which is one of the chief factors affecting airplane lift and drag. The terminology is used throughout the airplane industry and is also found in the FoilSim interactive airfoil simulation program developed here at NASA Glenn.
3 6907132 11.4567 An aircraft propulsion system must achieve two things. First, the thrust from th

In [36]:
# dense
output_result("1035719", msp_dev_small_queries,  msp_docs, dense, dense_bm25_better_item, True)

airplane definition of drag
[('7208597', 10, 4)]
cut_rank: 4
0 2225979 0.703112006187439 - Cannot fly in a controlled manner with out drag - Allows pilot to control flight By increasing drag in certain areas using control surfaces that push against the air, the pilot can control the direction they are flying at. As the angle of the wing increases (Angle of Attack), so does the induced drag EO M231.03 - Describe the Types of Drag That Act Upon an Aircraft WO2 Tieu / March 19, 2015 / 1 x 30 min. Main Teaching Points 1. Introduce Drag as a Useful Force and Explain the Two Types of Drag 2.
1 8042198 0.6831392049789429 Forms of drag. In flight, any airplane produces drag. This total drag is a combination, or the sum of induced drag (as a result of lift produced by the wing) and parasite drag. Note that: when an aircraft increases speed, induced drag becomes less (due to a lower angle of attack) and parasite drag increases. Induced drag
2 2930108 0.6693023443222046 Each airplane has a differ

In [37]:
# max-sim-idf
output_result("859274", msp_dev_small_queries, msp_docs, max_soft_cos, max_soft_cos_bm25_worse_item, True)

what is unreserved designated fund balance in government accounting
[('7767938', 5, 7)]
cut_rank: 7
0 7767934 40.384664649030285 Designated fund balance accounts are reported as part of the unreserved fund balance. There are basically three groups of funds in governmental accounting; governmental funds, proprietary funds, and fiduciary funds.
1 4474887 40.09112860088141 Designated fund balance accounts are reported as part of the unreserved fund balance. There are basically three groups of funds in governmental accounting; governmental funds, proprietary funds, and fiduciary funds.. The equity accounts in governmental accounting are referred to as fund balance. The fund balance accounts are more of a balancing item as contrasted to rights of owners in a business enterprise.
2 7767930 39.604105200390975 Undesignated, Unreserved Fund Balances. Undesignated, unreserved fund balance is the difference between total fund balance and the portion that is reserved and designated. This is the ba

In [38]:
# bm25
output_result("859274", msp_dev_small_queries, msp_docs, msp_dev_bm25, max_soft_cos_bm25_worse_item, True)

what is unreserved designated fund balance in government accounting
[('7767938', 5, 7)]
cut_rank: 7
0 7767935 23.504101 The portion of fund balance that is not reserved is fittingly called unreserved fund balance. It represents resources that can be used for any purpose of the fund they are reported in. Unreserved fund balance in a debt service fund can be used to repay any outstanding debt. Unreserved fund balance in the general fund can be used for any purpose at all. Governments may report designations of their unreserved fund balance. Although unreserved fund balance is not legally limited to any specific purpose, a government may designate some unreserved fund balance to express its intention to use available resources in a particular manner.
1 1180405 22.5229 The portion of fund balance that is not reserved is fittingly called unreserved fund balance. It represents resources that can be used for any purpose of the fund they are reported in. Unreserved fund balance in a debt servi

In [39]:
# dense
output_result("859274", msp_dev_small_queries,  msp_docs, dense, dense_bm25_better_item, True)

what is unreserved designated fund balance in government accounting
[('7767938', 5, 1)]
cut_rank: 1
0 7767930 0.7987086772918701 Undesignated, Unreserved Fund Balances. Undesignated, unreserved fund balance is the difference between total fund balance and the portion that is reserved and designated. This is the balance available for legal appropriation and expenditure if a government budgets on a GAAP basis for its governmental funds.
1 7767938 0.7152896523475647 Unreserved fund balance is the difference between total and reserved fund balance. It has two components: designated and undesignated. The unreserved fund balance of the general fund represents the balance available for legal appropriation and general operating expenditures.


## degrade2

In [40]:
for qid, item in common_lss_w.items():
    if qid in dense_bm25_worse_item:
        if item[0][1] < 10:
            print(qid, item, dense_bm25_worse_item[qid], msp_dev_small_queries[qid])

787255 [('4216689', 2, 6, 6)] [('4216689', 2, 49)] what is project charter in project management
811852 [('7941856', 1, 6, 6)] [('7941856', 1, 134)] what is the common name for jade
1050695 [('7184656', 6, 8, 11)] [('7184656', 6, 71)] what is motor dyspraxia
1051279 [('7183615', 6, 32, 12)] [('7183615', 6, 192)] what is medjool dates
2798 [('7800991', 2, 3, 3)] [('7800991', 2, 15)] Does Suddenlink Carry ESPN3
789292 [('7842289', 0, 1, 1)] [('7842289', 0, 1)] what is radio miraya
1052563 [('2882149', 2, 50, 4)] [('2882149', 2, 26)] who stars in the tv show taken
1052985 [('7180637', 0, 1, 1)] [('7180637', 0, 6)] what is ion plating on jewelry
4947 [('7581255', 5, 9, 6)] [('7581255', 5, 8)] Ludacris Net Worth
1057631 [('7172696', 5, 6, 6)] [('7172696', 5, 46)] what is cplm
9083 [('7067274', 0, 3, 1)] [('7067274', 0, 202)] ____________________ is considered the father of modern medicine.
796383 [('7843257', 2, 3, 3)] [('7843257', 2, 338)] what is simplicheck
1059045 [('7170234', 5, 274, 8

402075 [('7682183', 2, 36, 4)] [('7682183', 2, 409)] is alopecia permanent
665231 [('7433180', 6, 10, 7)] [('7433180', 6, 15)] what happened to occupy wall street movement
403361 [('7535298', 0, 1, 9), ('7535295', 3, 25, 9)] [('7535298', 0, 2), ('7535295', 3, 274)] is asperger's under a new dsm
404051 [('7689210', 2, 9, 4)] [('7689210', 2, 86)] is beer or wine more fattening
142579 [('7532380', 5, 13, 6)] [('7532380', 5, 344), ('7532374', 4, 29)] devine paint color stores
405090 [('7517450', 1, 5, 3)] [('7517450', 1, 59)] is bv an std
667373 [('6817792', 5, 32, 14)] [('6817792', 5, 469)] what happens when stop drinking alcohol
405660 [('7405676', 6, 20, 7)] [('7405676', 6, 258)] is catecholamine a steroid
146212 [('7682903', 0, 3, 1), ('7682902', 150, 162, 1), ('7682904', 9, 21, 1)] [('7682903', 0, 8), ('7682904', 9, 11)] difference between a prawn and a shrimp
409557 [('7549686', 3, 41, 6)] [('7549686', 3, 259)] is erythritol all natural?
148777 [('7557966', 1, 2, 11), ('7557969', 9, 

In [41]:
# max-sim-idf
output_result("273481", msp_dev_small_queries, msp_docs, max_soft_cos, max_soft_cos_bm25_worse_item, True)

how long will food last in the refrigerator without power
[('2097896', 3, 40)]
cut_rank: 40
0 2709751 32.35729141509716 How Long Will Food Last? Perishable food in a refrigerator lasts about four hours without power, as long as the door is kept closed. Opening the door lets some of the cold escape and increases the temperature in the fridge, so keep it closed as long as possible. A refrigerator thermometer helps you keep an eye on the temperature.
1 3257445 28.631752259468698 The time a refrigerator will stay cold when power is lost depends on the brand and how. Keep your food fresh during the storm. good the seals keep the cold inside. Time can vary from 2-6 hours without opening the doors. Without any power you can think of your refrigerator as a large picnic cooler. Placing blocks of ice inside will help to keep the temperature cool. Food in the freezer will last longer, especially if the freezer is more than half full. You need to be aware of how long the power has been out. Foods 

In [42]:
# bm25
output_result("273481", msp_dev_small_queries, msp_docs, msp_dev_bm25, max_soft_cos_bm25_worse_item, True)

how long will food last in the refrigerator without power
[('2097896', 3, 40)]
cut_rank: 40
0 2709751 17.998199 How Long Will Food Last? Perishable food in a refrigerator lasts about four hours without power, as long as the door is kept closed. Opening the door lets some of the cold escape and increases the temperature in the fridge, so keep it closed as long as possible. A refrigerator thermometer helps you keep an eye on the temperature.
1 4694658 15.7739 The refrigerator: how long will foods remain safe without power? Â· All foods: at least 4 hours, if the fridge is left unopened. Â· Discard after 4 hours without power: Meat, poultry, fish, milk, eggs, cooked leftovers, soft cheeses, deli cold cuts/salads and other perishable foods.he refrigerator: how long will foods remain safe without power? Â· All foods: at least 4 hours, if the fridge is left unopened. Â· Discard after 4 hours without power: Meat, poultry, fish, milk, eggs, cooked leftovers, soft cheeses, deli cold cuts/salads 

37 3598397 13.1913 How long does deli provolone cheese stay fresh in the refrigerator? This food will only last a few days. 10 days in the refrigerator if the food is stored properly. Place in an air tight container for the best results.
38 4700648 13.1645 Cooler. The first thing you should do when a refrigerator stops running is find every cooler you have in the home. Load the coolers with ice, ice packs, or even those old frozen bags of peas to keep your perishable foods cool.f the refrigerator has a freezer, food will last longer in the freezer section -- 24 hours or longer without power (assuming the freezer door hasn't been opened frequently), depending on how full the freezer is.
39 6324371 13.1477 Any perishable food that has been above 40 degrees Fahrenheit for more than two hours should be discarded. If the refrigerator has a freezer, food will last longer in the freezer section -- 24 hours or longer without power (assuming the freezer door hasn't been opened frequently), depe

In [43]:
# dense
output_result("273481", msp_dev_small_queries,  msp_docs, dense, dense_bm25_worse_item, True)

how long will food last in the refrigerator without power
[('2097896', 3, 80)]
cut_rank: 80
0 5550056 0.7416685819625854 Obviously, the food is in the fridge to keep it from spoiling and without power to the appliance, it will eventually go bad. But how long does that take? The U.S. Department of Agriculture says the average refrigerator will keep food cold for about four hours without power, as long as you keep the doors closed. The more you open the door, the shorter that period becomes.
1 337922 0.7352566719055176 How Long Does Food Keep in a Refrigerator With No Power? Losing power is never fun, but you donât want to make it worse by eating food from the refrigerator thatâs gone bad. If your power is out for longer than four hours, you may be tossing most of the food from your refrigerator into the trash. As the saying goes: When in doubt, throw it out.
2 5713343 0.7304427623748779 They said that food will last about 4 to 6 hours depending on how cold you kept the fridge/freeze

In [44]:
# max-sim-idf
output_result("1089177", msp_dev_small_queries, msp_docs, max_soft_cos, max_soft_cos_bm25_worse_item, True)

united home life insurance phone number
[('7087481', 3, 27)]
cut_rank: 27
0 6998823 18.835177058060722 American Modern Insurance Customer Service Phone Number. Phone Number of American Modern Insurance is +1-866-884-6167 . American Modern Insurance is a group of insurance companies and offers Home Insurance, Vehicle Insurance and Boat Insurance solutions in the United States.
1 1186352 18.796429737709182 United Of Omaha toll free customer service number : (800)775-6000. First United American Life Insurance Company toll free customer service number : 1-888-342-7243. Equitable Life Insurance Company of Iowa toll free customer service number : (515) 698-7000. United Home Life toll free customer service number : -4228.
2 6998818 18.66771088111334 American Modern Insurance Customer Service Phone Number Phone Number of American Modern Insurance is +1-866-884-6167. American Modern Insurance is a group of insurance companies and offers Home Insurance, Vehicle Insurance and Boat Insurance solut

In [45]:
# coef-max
output_result("1089177", msp_dev_small_queries, msp_docs, coef_max, coef_max_bm25_worse_item, True)

united home life insurance phone number
[('7087481', 3, 13)]
cut_rank: 13
0 5704749 24.72621071239114 Hartford Life Insurance Customer Service Phone Number. Phone Number of Hartford Life Insurance is 1-800-231-5453 . The Hartford Financial Services Group, Incorporation is one of the privately held insurance services providing organization in the United States.
1 6998818 24.101455158233644 American Modern Insurance Customer Service Phone Number Phone Number of American Modern Insurance is +1-866-884-6167. American Modern Insurance is a group of insurance companies and offers Home Insurance, Vehicle Insurance and Boat Insurance solutions in the United States. Its home insurance solutions cover wide array of homes like mobile homes, seasonal homes, rental homes, etc.
2 6998823 23.734764740395544 American Modern Insurance Customer Service Phone Number. Phone Number of American Modern Insurance is +1-866-884-6167 . American Modern Insurance is a group of insurance companies and offers Home 

In [46]:
# bm25
output_result("1089177", msp_dev_small_queries, msp_docs, msp_dev_bm25, max_soft_cos_bm25_worse_item, True)

united home life insurance phone number
[('7087481', 3, 27)]
cut_rank: 27
0 5704749 14.2209 Hartford Life Insurance Customer Service Phone Number. Phone Number of Hartford Life Insurance is 1-800-231-5453 . The Hartford Financial Services Group, Incorporation is one of the privately held insurance services providing organization in the United States.
1 6998818 14.0943 American Modern Insurance Customer Service Phone Number Phone Number of American Modern Insurance is +1-866-884-6167. American Modern Insurance is a group of insurance companies and offers Home Insurance, Vehicle Insurance and Boat Insurance solutions in the United States. Its home insurance solutions cover wide array of homes like mobile homes, seasonal homes, rental homes, etc.
2 6998823 13.8194 American Modern Insurance Customer Service Phone Number. Phone Number of American Modern Insurance is +1-866-884-6167 . American Modern Insurance is a group of insurance companies and offers Home Insurance, Vehicle Insurance and

In [47]:
# dense
output_result("1089177", msp_dev_small_queries,  msp_docs, dense, dense_bm25_worse_item, True)

united home life insurance phone number
[('7087481', 3, 352)]
cut_rank: 352
0 1186352 0.7530218958854675 United Of Omaha toll free customer service number : (800)775-6000. First United American Life Insurance Company toll free customer service number : 1-888-342-7243. Equitable Life Insurance Company of Iowa toll free customer service number : (515) 698-7000. United Home Life toll free customer service number : -4228.
1 2751397 0.7501448392868042 American Income Life Insurance Company's Best Toll-Free/800 Customer Phone Number This is American Income Life Insurance Company's best phone number, the real-time current wait on hold and tools for skipping right through those phone lines to get right to a American Income Life Insurance Company agent.
2 7290222 0.7481680512428284 American Income Life Insurance's Best Toll-Free/800 Customer Phone Number This is American Income Life Insurance's best phone number, the real-time current wait on hold and tools for skipping right through those phon

109 7896853 0.6485235691070557 The customer support phone number of Mercury Insurance is +1-800-956-3728, +1-800-503-3724 (Click phone number to call). The postal and official address, email address and phone number (helpline) of Mercury Insurance Service Center and Mercury Insurance customer service phone number is given below. The helpline of Mercury Insurance customer service phone number may or may not be toll free. Click Here To View Mercury Insurance Customer Service Phone Numbers
110 6998817 0.6481925249099731 This phone number is American Modern Insurance Company's Best Phone Number because 264 customers like you used this contact information over the last 18 months and gave us feedback. Common problems addressed by the customer care unit that answers calls to 800-543-2644 include Get insurance, Make/Chase a claim, Coverage question, Overcharge/Strange charge, Renew coverage and other customer service issues.
111 5646836 0.64745032787323 How to Find an Insurance Policy Number. 

206 7395445 0.6135375499725342 Workplace benefits packages may include life insurance and/or disability insurance from American United Life Insurance CompanyÂ®, a OneAmericaÂ® company. Employers and employees, please use this contact information for assistance with your policy or to request a payment.
207 2798398 0.6133110523223877 Phone Admiral Insurance to discuss your car, home or travel insurance policy on 0843 504 7227. Alternatively you can also phone Admiral on this localised contact number: 02920 601294. This number is more liable to change but tends to be cheaper â especially when calling from a mobile phone.
208 837723 0.6132277846336365 Some More Customer Service Phone Numbers 24/7 Related to Victoria Principal. 1  Principal Bank Customer Service Phone Number: 1-800-986-3343. 2  Principal Financial Group Customer Service Phone Number: +1-800-986-3343. 3  Principal Life Insurance Customer Service Phone Number: +1 320-235-5995.
209 8742266 0.612768828868866 Address of Metlif

330 5593081 0.574683666229248 Ready to call Globe Life Insurance? We called Globe Life Insurance's phone number, tried the various choices in their interactive phone system, and recorded it for you. Click/tap on endpoints to see how to get to them, transcriptions of recorded messages, customer information required, and more. Fastest way to a human.
331 8020262 0.5744850635528564 The customer support phone number of Taste of Home United States is 1800-3442-560 / 1-800-880-3012/ (Click phone number to call). The postal and official address, email address and phone number (helpline) of Taste of Home United States Service Center and Taste of Home United States customer care number is given below.
332 2199236 0.5741500854492188 To be connected to LV Insurance directly you can call the phone number to the left at a standard price of 5p per minute. We provide a Premium rate call routing service for LV Insurance customer service, support and sales phone numbers.enefits of using contact number 

# Robust04

In [143]:
def read_doc_json(path):
    docs = dict()
    with open(path) as f:
        for line in f:
            jtext = json.loads(line)
            docs[jtext["id"]] = jtext["contents"]
    return docs
    

In [144]:
def rob_common(items1, items2):
    common_result = defaultdict(list)
    for qid, item1 in items1.items():
        if qid in items2:
            item2 = items2[qid]
            for i1 in item1:
                for i2 in item2:
                    did1, rr1, tr1 = i1
                    did2, rr2, tr2 = i2
                    if did1 == did2:
                        common_result[qid].append((did1, rr1, tr1, tr2))
            
    return common_result
    

In [181]:
rob_qrels = read_qrel("/home/gaia_data/iida.h/TREC/robust04/qrels.robust2004.txt")
rob_queries = read_query("/home/gaia_data/iida.h/TREC/robust04/04.testset.tsv")
rob_docs = read_doc_json("/home/gaia_data/iida.h/TREC/robust04/doc_jsonl/all_in_one/all_a.jsonl")
rob_bm25 = read_trec("/home/gaia_data/iida.h/TREC/robust04/result/run.robust04.trec")

In [182]:
rob_qrels_pytrec = dict()
for k, vs in rob_qrels.items():
    rob_qrels_pytrec[k] = dict()
    for v in vs:
        rob_qrels_pytrec[k][v] = 1

In [183]:
with open("../test/robust04_1000/sbert/result/max_soft_cos/local_ave/1/rerank_score.json") as f:
    max_sim_idf = json.load(f)
    
with open("../test/robust04_1000/sbert/result/max_soft_cos/local_ave/0/rerank_score.json") as f:
    max_sim = json.load(f)

In [184]:
sim_idf_better, sim_idf_worse = compare(max_sim_idf, max_sim, rob_qrels)

In [185]:
total = 0
for k, vs in sim_idf_better.items():
    for v in vs:
        if v[1] < 20:
            total += 1
            print(k, v)

print(total)
# idf_better

301 ('FBIS3-19199', 11, 10)
301 ('FBIS3-37947', 9, 5)
301 ('FBIS4-45469', 19, 15)
302 ('FBIS4-67720', 16, 13)
302 ('LA013089-0022', 6, 4)
302 ('FBIS3-22702', 3, 2)
302 ('FBIS3-41724', 19, 16)
302 ('FBIS3-41672', 18, 15)
302 ('FBIS3-61373', 7, 6)
303 ('LA071490-0091', 17, 16)
303 ('LA071090-0047', 11, 10)
303 ('LA122990-0029', 10, 9)
303 ('FT931-6554', 13, 12)
303 ('LA081090-0078', 15, 11)
304 ('FT943-1306', 9, 1)
304 ('FR940217-2-00013', 11, 9)
304 ('FR940511-0-00064', 19, 3)
304 ('FR940627-1-00012', 12, 7)
305 ('LA082690-0158', 12, 11)
306 ('FBIS4-59450', 13, 8)
306 ('FT924-8946', 10, 9)
306 ('LA120689-0144', 14, 10)
306 ('FT934-12216', 6, 4)
306 ('LA011790-0021', 18, 13)
306 ('FT933-9659', 16, 12)
306 ('FT933-3875', 9, 6)
307 ('FBIS3-35090', 11, 8)
307 ('FT921-12327', 9, 3)
307 ('LA110890-0070', 15, 12)
307 ('FR940617-2-00073', 14, 6)
307 ('FBIS4-26903', 7, 4)
307 ('FBIS4-4360', 16, 5)
307 ('FT921-12215', 5, 1)
308 ('FT933-5463', 12, 7)
309 ('FT923-5998', 3, 1)
309 ('LA110190-0228', 

In [186]:
total = 0
for k, v in sim_idf_worse.items():
    for v in vs:
        if v[1] < 20:
            total += 1
            print(k, v)
print(total)

301 ('LA080890-0001', 19, 16)
301 ('LA111690-0053', 12, 9)
301 ('LA121789-0120', 18, 11)
301 ('LA112590-0224', 5, 3)
301 ('LA070690-0080', 7, 4)
301 ('FR941019-1-00046', 11, 5)
301 ('FR940803-0-00112', 8, 7)
302 ('LA080890-0001', 19, 16)
302 ('LA111690-0053', 12, 9)
302 ('LA121789-0120', 18, 11)
302 ('LA112590-0224', 5, 3)
302 ('LA070690-0080', 7, 4)
302 ('FR941019-1-00046', 11, 5)
302 ('FR940803-0-00112', 8, 7)
303 ('LA080890-0001', 19, 16)
303 ('LA111690-0053', 12, 9)
303 ('LA121789-0120', 18, 11)
303 ('LA112590-0224', 5, 3)
303 ('LA070690-0080', 7, 4)
303 ('FR941019-1-00046', 11, 5)
303 ('FR940803-0-00112', 8, 7)
304 ('LA080890-0001', 19, 16)
304 ('LA111690-0053', 12, 9)
304 ('LA121789-0120', 18, 11)
304 ('LA112590-0224', 5, 3)
304 ('LA070690-0080', 7, 4)
304 ('FR941019-1-00046', 11, 5)
304 ('FR940803-0-00112', 8, 7)
305 ('LA080890-0001', 19, 16)
305 ('LA111690-0053', 12, 9)
305 ('LA121789-0120', 18, 11)
305 ('LA112590-0224', 5, 3)
305 ('LA070690-0080', 7, 4)
305 ('FR941019-1-00046'

678 ('LA112590-0224', 5, 3)
678 ('LA070690-0080', 7, 4)
678 ('FR941019-1-00046', 11, 5)
678 ('FR940803-0-00112', 8, 7)
679 ('LA080890-0001', 19, 16)
679 ('LA111690-0053', 12, 9)
679 ('LA121789-0120', 18, 11)
679 ('LA112590-0224', 5, 3)
679 ('LA070690-0080', 7, 4)
679 ('FR941019-1-00046', 11, 5)
679 ('FR940803-0-00112', 8, 7)
680 ('LA080890-0001', 19, 16)
680 ('LA111690-0053', 12, 9)
680 ('LA121789-0120', 18, 11)
680 ('LA112590-0224', 5, 3)
680 ('LA070690-0080', 7, 4)
680 ('FR941019-1-00046', 11, 5)
680 ('FR940803-0-00112', 8, 7)
681 ('LA080890-0001', 19, 16)
681 ('LA111690-0053', 12, 9)
681 ('LA121789-0120', 18, 11)
681 ('LA112590-0224', 5, 3)
681 ('LA070690-0080', 7, 4)
681 ('FR941019-1-00046', 11, 5)
681 ('FR940803-0-00112', 8, 7)
682 ('LA080890-0001', 19, 16)
682 ('LA111690-0053', 12, 9)
682 ('LA121789-0120', 18, 11)
682 ('LA112590-0224', 5, 3)
682 ('LA070690-0080', 7, 4)
682 ('FR941019-1-00046', 11, 5)
682 ('FR940803-0-00112', 8, 7)
683 ('LA080890-0001', 19, 16)
683 ('LA111690-0053

In [187]:
bm25_better, bm25_worse = compare(rob_bm25, max_sim, rob_qrels)

In [188]:
total = 0
for k, vs in sim_idf_better.items():
    for v in vs:
        if v[1] < 20:
            total += 1
            print(k, v)

print(total)

301 ('FBIS3-19199', 11, 10)
301 ('FBIS3-37947', 9, 5)
301 ('FBIS4-45469', 19, 15)
302 ('FBIS4-67720', 16, 13)
302 ('LA013089-0022', 6, 4)
302 ('FBIS3-22702', 3, 2)
302 ('FBIS3-41724', 19, 16)
302 ('FBIS3-41672', 18, 15)
302 ('FBIS3-61373', 7, 6)
303 ('LA071490-0091', 17, 16)
303 ('LA071090-0047', 11, 10)
303 ('LA122990-0029', 10, 9)
303 ('FT931-6554', 13, 12)
303 ('LA081090-0078', 15, 11)
304 ('FT943-1306', 9, 1)
304 ('FR940217-2-00013', 11, 9)
304 ('FR940511-0-00064', 19, 3)
304 ('FR940627-1-00012', 12, 7)
305 ('LA082690-0158', 12, 11)
306 ('FBIS4-59450', 13, 8)
306 ('FT924-8946', 10, 9)
306 ('LA120689-0144', 14, 10)
306 ('FT934-12216', 6, 4)
306 ('LA011790-0021', 18, 13)
306 ('FT933-9659', 16, 12)
306 ('FT933-3875', 9, 6)
307 ('FBIS3-35090', 11, 8)
307 ('FT921-12327', 9, 3)
307 ('LA110890-0070', 15, 12)
307 ('FR940617-2-00073', 14, 6)
307 ('FBIS4-26903', 7, 4)
307 ('FBIS4-4360', 16, 5)
307 ('FT921-12215', 5, 1)
308 ('FT933-5463', 12, 7)
309 ('FT923-5998', 3, 1)
309 ('LA110190-0228', 

677 ('FT923-14708', 15, 14)
677 ('LA062490-0095', 10, 9)
677 ('LA060490-0141', 16, 15)
677 ('LA032589-0034', 17, 16)
677 ('LA102089-0186', 3, 2)
677 ('FT943-5813', 5, 4)
678 ('LA031690-0157', 12, 8)
678 ('LA031889-0102', 8, 7)
678 ('LA020289-0049', 13, 10)
678 ('LA051390-0201', 18, 13)
679 ('LA081790-0028', 13, 11)
679 ('LA100989-0084', 18, 15)
679 ('FBIS4-10578', 14, 12)
679 ('FR940721-0-00011', 19, 16)
679 ('LA052890-0022', 12, 10)
679 ('FR940831-1-00024', 15, 14)
679 ('LA032590-0149', 11, 9)
680 ('LA051390-0082', 15, 12)
680 ('LA081789-0137', 1, 0)
680 ('LA070790-0074', 17, 11)
680 ('LA062389-0132', 16, 8)
680 ('LA031190-0209', 18, 13)
680 ('LA032590-0164', 7, 6)
680 ('LA090890-0081', 14, 9)
681 ('FR940706-2-00126', 5, 4)
681 ('FT921-6100', 10, 9)
681 ('LA120589-0016', 3, 2)
681 ('FR941222-0-00029', 11, 6)
681 ('LA111090-0153', 18, 14)
682 ('LA031689-0191', 15, 14)
682 ('LA050690-0040', 13, 12)
682 ('LA031990-0075', 6, 5)
682 ('LA063089-0054', 3, 2)
682 ('LA122989-0140', 9, 8)
682 (

In [189]:
total = 0
for k, vs in sim_idf_worse.items():
    for v in vs:
        if v[1] < 20:
            total += 1
            print(k, v)

print(total)

301 ('FBIS4-46780', 10, 12)
301 ('FBIS4-43965', 14, 16)
301 ('FBIS4-38410', 16, 18)
301 ('FBIS3-60093', 5, 7)
301 ('FBIS3-49567', 7, 9)
301 ('FBIS3-19646', 13, 14)
301 ('FBIS3-21961', 12, 13)
302 ('FR940207-2-00089', 2, 3)
302 ('FR940527-1-00163', 4, 7)
302 ('FR940202-2-00133', 15, 19)
302 ('LA031489-0032', 13, 23)
303 ('FT941-15661', 19, 21)
303 ('LA041690-0035', 9, 14)
303 ('LA051590-0074', 12, 13)
303 ('LA090489-0037', 16, 17)
303 ('LA110590-0076', 14, 15)
304 ('LA041489-0143', 17, 28)
304 ('LA032289-0086', 5, 8)
304 ('FR941205-0-00114', 10, 12)
304 ('FR940906-2-00134', 4, 18)
304 ('FR940419-2-00009', 15, 32)
304 ('FR940930-2-00144', 3, 19)
304 ('FR941019-2-00100', 8, 16)
304 ('FR940617-0-00103', 0, 10)
304 ('FR940617-0-00104', 2, 22)
304 ('FR941011-2-00025', 6, 25)
304 ('FR940404-1-00045', 1, 6)
304 ('FR941003-0-00084', 13, 27)
304 ('FR940406-2-00032', 7, 11)
304 ('FR940617-0-00107', 14, 47)
304 ('FR940429-2-00057', 18, 20)
305 ('LA031490-0055', 6, 9)
305 ('LA112489-0003', 5, 6)
30

432 ('LA122489-0116', 4, 5)
432 ('LA070190-0126', 0, 3)
432 ('LA123089-0130', 18, 23)
432 ('LA010889-0149', 16, 18)
432 ('LA122989-0120', 5, 6)
432 ('LA041689-0062', 7, 9)
432 ('LA022789-0077', 11, 20)
432 ('FT944-1065', 13, 27)
432 ('LA052089-0097', 3, 4)
432 ('LA091090-0043', 14, 54)
433 ('FBIS3-19550', 19, 32)
433 ('FBIS4-11710', 2, 3)
433 ('FBIS4-8802', 3, 4)
433 ('FT941-4064', 16, 24)
433 ('FBIS4-42414', 9, 23)
433 ('FBIS4-8432', 7, 18)
433 ('FT931-2409', 4, 7)
433 ('FBIS4-19773', 13, 27)
433 ('FT923-6599', 10, 20)
433 ('LA040190-0005', 12, 29)
434 ('FBIS4-22567', 11, 13)
434 ('FBIS3-61393', 14, 16)
434 ('FBIS3-24040', 13, 15)
434 ('FT911-778', 16, 21)
434 ('FBIS3-61392', 18, 20)
434 ('FT942-14619', 12, 19)
434 ('FBIS3-56519', 7, 11)
434 ('FBIS4-22418', 4, 6)
435 ('FT943-9327', 18, 29)
435 ('LA032990-0138', 14, 26)
435 ('FT943-3422', 11, 19)
435 ('FT941-12411', 1, 2)
435 ('FT943-5505', 5, 6)
435 ('FT933-11274', 19, 40)
435 ('LA022189-0053', 15, 28)
435 ('LA120589-0035', 16, 25)
43

624 ('LA122589-0045', 4, 5)
624 ('LA121689-0092', 11, 13)
624 ('LA052790-0104', 17, 18)
624 ('LA120689-0114', 9, 10)
625 ('FT931-5046', 10, 18)
625 ('FT931-5642', 11, 22)
626 ('LA030289-0120', 13, 50)
626 ('FBIS4-36847', 7, 13)
626 ('FT924-12932', 9, 10)
626 ('LA071390-0073', 6, 48)
626 ('FT944-17837', 4, 26)
626 ('LA061889-0004', 2, 12)
626 ('FT933-14268', 17, 43)
626 ('LA123090-0081', 1, 20)
626 ('FT943-1818', 14, 61)
627 ('FBIS3-43075', 18, 19)
627 ('FBIS4-16674', 14, 17)
627 ('FBIS4-68433', 19, 21)
627 ('FT921-10747', 2, 3)
627 ('FBIS3-42623', 9, 11)
628 ('LA101990-0030', 13, 17)
628 ('FBIS3-10558', 9, 11)
628 ('LA010790-0034', 1, 4)
628 ('LA122390-0150', 7, 9)
628 ('LA122589-0052', 16, 19)
628 ('LA021590-0188', 18, 27)
628 ('LA122389-0018', 14, 38)
628 ('LA010690-0073', 3, 10)
629 ('LA041789-0046', 11, 28)
629 ('LA070990-0001', 5, 18)
629 ('LA110589-0190', 7, 32)
629 ('LA081989-0080', 4, 15)
629 ('LA051090-0074', 2, 3)
629 ('LA031690-0116', 3, 9)
630 ('FT921-6208', 8, 16)
630 ('FT

In [190]:
common_idf_b = rob_common(sim_idf_better, bm25_better)
common_idf_b

defaultdict(list,
            {'301': [('FBIS4-40720', 644, 539, 542),
              ('FBIS4-8957', 370, 316, 147),
              ('FR941230-2-00127', 922, 860, 377),
              ('FBIS3-61106', 433, 376, 126),
              ('FBIS4-41215', 282, 229, 269),
              ('FBIS3-24039', 443, 389, 13),
              ('FBIS4-2105', 787, 720, 203),
              ('FR940429-0-00128', 919, 857, 366),
              ('FBIS4-40936', 404, 350, 26),
              ('FBIS3-41288', 32, 27, 17),
              ('FBIS4-16126', 268, 219, 249),
              ('FBIS3-24247', 276, 225, 53),
              ('FBIS3-38787', 457, 405, 27),
              ('FBIS4-3230', 601, 525, 226),
              ('FBIS4-25032', 798, 740, 373),
              ('FBIS4-3370', 844, 802, 321),
              ('FBIS3-37418', 155, 97, 6),
              ('FBIS4-41832', 391, 339, 108),
              ('FBIS4-47045', 790, 725, 36),
              ('FBIS4-16951', 86, 55, 8),
              ('FBIS4-46757', 632, 534, 34),
              ('FBI

In [191]:
common_idf_w = rob_common(sim_idf_worse, bm25_worse)
common_idf_w

defaultdict(list,
            {'301': [('FBIS3-17422', 137, 230, 852),
              ('FBIS3-22088', 114, 277, 576),
              ('FBIS3-31749', 129, 234, 579),
              ('FBIS3-21765', 52, 53, 152),
              ('FBIS4-43552', 57, 70, 712),
              ('FBIS4-21302', 31, 43, 218),
              ('FBIS3-26913', 369, 502, 861),
              ('FBIS4-46780', 10, 12, 60),
              ('FBIS3-14832', 69, 83, 427),
              ('FBIS3-24325', 80, 98, 175),
              ('FBIS4-43965', 14, 16, 61),
              ('FBIS3-22049', 55, 66, 233),
              ('FBIS3-60000', 113, 177, 747),
              ('FBIS4-31295', 35, 39, 279),
              ('FBIS3-10169', 715, 876, 949),
              ('FBIS3-21670', 811, 895, 945),
              ('FBIS3-42315', 244, 459, 709),
              ('FBIS3-41285', 245, 460, 708),
              ('FBIS3-26914', 224, 359, 275),
              ('FBIS4-44181', 91, 107, 252),
              ('FBIS4-45333', 23, 25, 63),
              ('FBIS4-33867', 101

In [192]:
print(rob_queries["315"])
rob_docs["LA092690-0034"]

 Unexplained Highway Accidents


'September 26, 1990, Wednesday, Home Edition\nCAUSE OF RENO BUS CRASH PROBED;ACCIDENT: RAIN APPARENTLY CONTRIBUTED TO THE COLLISION THAT INJURED DOZENS OF \nNORWALK SENIOR CITIZENS AND KILLED AN OFF-DUTY PATROL WOMAN. TWELVE PEOPLE \nREMAIN HOSPITALIZED.\nTwelve members of a Norwalk senior citizens group remained hospitalized in Reno \nTuesday as investigators tried to find out why a car driven by an off-duty \nNevada Highway Patrol officer strayed into the path of a tour bus carrying 44 \nseniors.\nThe officer, Lynne T. Hammill, 52, of Carson City, was fatally injured in the \ncollision on U.S. 395, about 10 miles south of Reno.\nPat Dyer, 69, of Norwalk remained in serious condition with a head injury at \nWashoe Medical Center in Reno, hospital spokeswoman Wendy Knorr said.\nEleven other injured passengers were in satisfactory or good condition with \nbroken ribs, back injuries, cuts and bruises at Washoe and two other hospitals. \nThirty-three others, including the bus driver, were

In [193]:
with open("../test/robust04/sbert/stats/df.json") as f:
    rob_df = json.load(f)
    
D = len(rob_docs)
rob_idf = dict()
for k, v in rob_df.items():
    rob_idf[k] = np.log(D/v)

In [194]:
tokenizer = MPNetTokenizer.from_pretrained("microsoft/mpnet-base")

In [195]:
for qid, query in rob_queries.items():
    t_id_query = tokenizer(query)["input_ids"]
    t_query = tokenizer.tokenize(query)
    t_idf = [round(rob_idf[str(t)],3) for t in t_id_query]
    print(qid, query, t_query, np.max(t_idf), t_idf[1:-1])

301  International Organized Crime ['international', 'organized', 'crime'] 4.134 [1.65, 4.134, 3.817]
302  Poliomyelitis and Post-Polio ['pol', '##iom', '##ye', '##lit', '##is', 'and', 'post', '-', 'pol', '##io'] 6.741 [4.635, 6.741, 5.049, 5.531, 3.124, 0.071, 2.733, 0.152, 4.635, 4.379]
303  Hubble Telescope Achievements ['hub', '##ble', 'telescope', 'achievements'] 7.487 [5.326, 2.904, 7.487, 4.625]
304  Endangered Species (Mammals) ['endangered', 'species', '(', 'mammals', ')'] 6.865 [5.175, 4.707, 0.65, 6.865, 0.649]
305  Most Dangerous Vehicles ['most', 'dangerous', 'vehicles'] 4.027 [1.384, 4.027, 3.887]
306  African Civilian Deaths ['african', 'civilian', 'deaths'] 4.955 [3.962, 4.319, 4.955]
307  New Hydroelectric Projects ['new', 'hydroelectric', 'projects'] 6.381 [0.969, 6.381, 3.136]
308  Implant Dentistry ['implant', 'dentistry'] 8.294 [6.795, 8.294]
309  Rap and Crime ['rap', 'and', 'crime'] 4.829 [4.829, 0.071, 3.817]
310  Radio Waves and Brain Cancer ['radio', 'waves', 

623  toxic chemical weapon ['toxic', 'chemical', 'weapon'] 4.953 [4.953, 3.915, 4.824]
624  SDI Star Wars ['sd', '##i', 'star', 'wars'] 5.068 [4.772, 2.146, 3.731, 5.068]
625  arrests bombing WTC ['arrests', 'bombing', 'w', '##tc'] 6.328 [5.25, 5.375, 2.894, 6.328]
626  human stampede ['human', 'stampede'] 7.36 [3.095, 7.36]
627  Russian food crisis ['russian', 'food', 'crisis'] 3.205 [2.87, 3.021, 3.205]
628  U.S. invasion of Panama ['u', '.', 's', '.', 'invasion', 'of', 'panama'] 5.483 [1.906, 0.008, 0.232, 0.008, 4.917, 0.044, 5.483]
629  abortion clinic attack ['abortion', 'clinic', 'attack'] 5.699 [5.615, 5.699, 3.42]
630  Gulf War Syndrome ['gulf', 'war', 'syndrome'] 6.075 [4.113, 2.513, 6.075]
631  Mandela South Africa President ['mandela', 'south', 'africa', 'president'] 5.59 [5.59, 2.302, 3.842, 1.688]
632  southeast Asia tin mining ['southeast', 'asia', 'tin', 'mining'] 4.966 [4.88, 3.719, 4.966, 4.243]
633  Welsh devolution ['welsh', 'dev', '##ol', '##ution'] 5.697 [5.697, 4

In [196]:
evaluator = pytrec_eval.RelevanceEvaluator(rob_qrels_pytrec, { 'ndcg_cut'})
eval_max_sim_idf = evaluator.evaluate(max_sim_idf)
eval_max_sim = evaluator.evaluate(max_sim)

for k in rob_qrels:
    if eval_max_sim_idf[k]["ndcg_cut_20"] < eval_max_sim[k]["ndcg_cut_20"]:
        print(k, eval_max_sim_idf[k]["ndcg_cut_20"], eval_max_sim[k]["ndcg_cut_20"])

301 0.8484928994236981 0.8516006854709552
304 0.7726141046928571 0.9659370277440175
307 0.8956768451367346 0.9307787414438842
308 0.9295807397554787 1.0
314 0.6320886011744618 0.6399287020483503
321 0.9636437120114612 0.9671350294324229
325 0.6732738928183061 0.7713654349803661
327 0.9249033963366486 0.949404317025383
328 0.9295807397554787 0.9644899900915856
329 0.9263159606882019 0.926941441559471
332 0.812831332687059 0.8782978563379756
340 0.7941614637791558 0.821871661000261
342 0.9659370277440175 0.9676617088516802
344 0.9176206204291528 1.0
346 0.8234149658629552 0.8297028663539519
347 0.9665625086152866 0.9676617088516802
350 0.8893589131273558 0.8916732298167267
354 0.9292557582488419 0.9626932496335553
379 0.7278209542379318 0.7706733530637763
409 0.9188350684097895 0.9644899900915856
422 0.9636437120114612 1.0
432 0.9572416874849684 0.9676617088516802
437 0.9676617088516802 1.0
439 0.9671350294324229 1.0
443 0.9316250195240087 0.9603789329441844
615 0.9204411836994597 0.9224

KeyError: '672'

In [197]:
common_idf_b["421"]

[('FT944-8187', 374, 327, 10),
 ('FR940919-0-00054', 372, 326, 220),
 ('FBIS4-21517', 641, 611, 76),
 ('FR940527-0-00058', 870, 859, 807),
 ('FT941-5285', 531, 502, 26),
 ('FT943-13338', 710, 693, 133),
 ('FR940727-1-00031', 557, 532, 65),
 ('LA091290-0106', 653, 629, 440),
 ('FR940830-2-00148', 734, 708, 730),
 ('FBIS3-35997', 812, 797, 268),
 ('FBIS4-22769', 422, 379, 297),
 ('FR941222-1-00071', 498, 474, 44),
 ('FBIS4-20692', 41, 38, 31),
 ('FBIS4-21486', 784, 772, 504),
 ('FR941222-0-00093', 333, 292, 259),
 ('FR940919-0-00110', 536, 514, 478),
 ('FR940711-2-00057', 572, 543, 168),
 ('FR941222-0-00086', 365, 320, 180),
 ('FT944-501', 636, 615, 393),
 ('FR940727-1-00028', 988, 986, 649),
 ('FBIS3-60739', 887, 879, 727),
 ('FT943-7475', 482, 451, 4),
 ('FR940830-1-00061', 919, 915, 877),
 ('FT942-14095', 769, 748, 292),
 ('FR940128-2-00068', 277, 234, 232),
 ('FR940919-0-00116', 478, 449, 399),
 ('FR940919-0-00086', 267, 226, 226),
 ('FT923-12551', 746, 728, 456),
 ('FR941222-0-00094

In [162]:
common_idf_w["421"]

[('FBIS4-20692', 16, 19, 31),
 ('FR941222-0-00088', 47, 50, 96),
 ('FT942-13005', 10, 12, 45),
 ('FBIS4-45670', 7, 8, 17),
 ('FBIS3-41604', 8, 9, 41),
 ('LA080189-0091', 41, 42, 81),
 ('FBIS4-42062', 6, 7, 16),
 ('FT944-7809', 45, 46, 71),
 ('FR940728-0-00052', 30, 34, 50),
 ('FBIS3-27014', 55, 58, 93),
 ('FBIS4-21485', 23, 32, 48),
 ('LA092390-0194', 29, 33, 42),
 ('LA122489-0217', 9, 10, 51),
 ('FR941206-1-00134', 2, 3, 13),
 ('FBIS4-22439', 12, 13, 47),
 ('FR940805-0-00048', 25, 31, 57),
 ('FR940727-1-00036', 56, 62, 85),
 ('FT941-6348', 14, 16, 15),
 ('FT921-12783', 57, 60, 84)]

In [163]:
def hit_num(score, qrels):
    qid_hit = defaultdict(list)
    for qid, qds in qrels.items():
        rank_did=[i[0] for i in sorted(score[qid].items(), key=lambda x: -x[1])]
        for qd in qds:
            try:
                rank = rank_did.index(qd)
                qid_hit[qid].append((qd, rank))
            except:
                continue
                
    return qid_hit

In [164]:
# true_relevance = np.asarray([[10, 0, 0, 1, 5]])
true_relevance = np.asarray([[1, 1, 1, 1, 0]])
scores = np.asarray([[.1, .2, .3, 4, 70]])
ndcg_score(true_relevance, scores)

0.7606395682357033

In [165]:
max_sim_idf_nums["421"]

[('FBIS3-19124', 24),
 ('FBIS3-22150', 54),
 ('FBIS3-22152', 14),
 ('FBIS3-22249', 39),
 ('FBIS3-22447', 99),
 ('FBIS3-22451', 26),
 ('FBIS3-22887', 25),
 ('FBIS3-27014', 58),
 ('FBIS3-27134', 15),
 ('FBIS3-41604', 9),
 ('FBIS4-20692', 19),
 ('FBIS4-21485', 32),
 ('FBIS4-21517', 98),
 ('FBIS4-22439', 13),
 ('FBIS4-25731', 61),
 ('FBIS4-42062', 7),
 ('FBIS4-45670', 8),
 ('FBIS4-45728', 97),
 ('FBIS4-53866', 5),
 ('FBIS4-66480', 0),
 ('FR940106-1-00017', 93),
 ('FR940106-1-00019', 68),
 ('FR940505-1-00098', 49),
 ('FR940511-1-00075', 67),
 ('FR940511-1-00080', 36),
 ('FR940511-1-00091', 84),
 ('FR940527-0-00057', 40),
 ('FR940727-1-00031', 27),
 ('FR940727-1-00036', 62),
 ('FR940727-1-00043', 81),
 ('FR940727-1-00047', 48),
 ('FR940728-0-00052', 34),
 ('FR940805-0-00048', 31),
 ('FR940810-2-00043', 59),
 ('FR940822-1-00033', 85),
 ('FR940912-1-00021', 69),
 ('FR940919-0-00094', 65),
 ('FR940919-0-00095', 77),
 ('FR940919-0-00105', 75),
 ('FR940919-0-00111', 11),
 ('FR941013-1-00062', 47)

In [166]:
qid = "421"
max_sim_idf_did=[i[0] for i in sorted(max_sim_idf[qid].items(), key=lambda x: -x[1])]
max_sim_did=[i[0] for i in sorted(max_sim[qid].items(), key=lambda x: -x[1])]
bm25_did = [i[0] for i in sorted(rob_bm25[qid].items(), key=lambda x: -x[1])]

rank_max_sim_idf_did = []
rank_max_sim_did = []
rank_bm25_did = []

for qd in rob_qrels[qid]:
    try:
        rank_idf = max_sim_idf_did.index(qd)
        rank = max_sim_did.index(qd)
        rank_max_sim_idf_did.append((qd, rank_idf))
        rank_max_sim_did.append((qd, rank))
        rank_bm25 = bm25_did.index(qd)
        rank_bm25_did.append((qd, rank_bm25))
    except:
        continue

rank_max_sim_idf_did = [i for i in sorted(rank_max_sim_idf_did, key=lambda x: x[1])]
rank_max_sim_did = [i for i in sorted(rank_max_sim_did, key=lambda x: x[1])]
rank_bm25_did = [i for i in sorted(rank_bm25_did, key=lambda x: x[1])]

for i, j, k in zip(rank_max_sim_idf_did, rank_max_sim_did, rank_bm25_did):
    print(i, j, k)

('FBIS4-66480', 0) ('FBIS4-66480', 0) ('FT943-9990', 0)
('FT943-6184', 1) ('FT943-6184', 1) ('FR940511-1-00080', 1)
('FT942-12757', 2) ('FR941206-1-00134', 2) ('FR941222-1-00024', 2)
('FR941206-1-00134', 3) ('FT942-12757', 3) ('FT942-8483', 3)
('FT933-4383', 4) ('FT933-4383', 4) ('FT943-7475', 4)
('FBIS4-53866', 5) ('FBIS4-53866', 5) ('FBIS3-22152', 5)
('FBIS4-42062', 7) ('FBIS4-42062', 6) ('FBIS3-27134', 6)
('FBIS4-45670', 8) ('FBIS4-45670', 7) ('LA061889-0180', 7)
('FBIS3-41604', 9) ('FBIS3-41604', 8) ('FT924-8553', 8)
('LA122489-0217', 10) ('LA122489-0217', 9) ('LA062989-0089', 9)
('FR940919-0-00111', 11) ('FT942-13005', 10) ('FT944-8187', 10)
('FT942-13005', 12) ('FR940919-0-00111', 11) ('FT934-11924', 11)
('FBIS4-22439', 13) ('FBIS4-22439', 12) ('FT943-14601', 12)
('FBIS3-22152', 14) ('FBIS3-22152', 13) ('FR941206-1-00134', 13)
('FBIS3-27134', 15) ('FT941-6348', 14) ('FBIS3-22447', 14)
('FT941-6348', 16) ('FBIS3-27134', 15) ('FT941-6348', 15)
('LA031289-0073', 17) ('FBIS4-20692', 

In [167]:
rob_queries["421"]

' industrial waste disposal'

In [168]:
rob_docs[max_sim_idf_did[6]]

'July 2, 1990, Monday, Home Edition\nPLASTIC IS \'GREENER\' THAN PAPER, TRADE COUNCIL CLAIMS\nPaper or plastic? The debate over which is more environmentally sound is about \nto heat up as the paper and plastic industries compete for favor in the \ncheckout line.\nEnvironmentalists have an answer of their own: neither is good.\nThe plastics industry today will attack the widely held notion that paper \ngrocery bags and other containers are environmentally superior to plastic. An \nindustry group will unveil studies that conclude plastic comes out ahead over \nthe life of both materials.\nThe studies, sponsored by the Council for Solid Waste Solutions, compare the \nso-called cradle-to-grave energy costs and environmental impacts of various \ndisposable paper and plastic items. This means studying them from raw materials \nthrough manufacture and consumer use to recycling and final disposal.\nBesides shopping bags, the studies compare polystyrene foam and cardboard cups \nand plates, as

In [169]:
rob_docs[max_sim_idf_did[5]]

'CSO \n   [Text] The Environment Agency will launch a three-year \n program beginning in this fiscal year to survey a variety of \n methods for rendering harmless substances that destroy the ozone \n layer such as special and substitute freons. The agency intends \n to survey existing methods of disposing of industrial waste as \n well as newly developing methods of disposal, pursue the \n commercialization of the latter methods and link the study to \n the construction of a social infrastructure for disposal of \n substances that destroy the ozone layer. \n   To be targeted in the survey will be the rotary kiln method \n currently used in the disposal of industrial waste, the cement \n kiln method used in conjunction with the manufacture of cement, \n and the plasma decomposition method being developed by the \n Ministry of International Trade and Industry\'s Institute of \n Industrial Technology. The survey will include a review of \n issues that should be handled by the government i

In [132]:
print(max_sim_did.index(max_sim_idf_did[6]))
print(bm25_did.index(max_sim_idf_did[6]))

18
77


In [128]:
tokenizer.tokenize(rob_docs[max_sim_idf_did[6]])

['july',
 '2',
 ',',
 '1990',
 ',',
 'monday',
 ',',
 'home',
 'edition',
 'plastic',
 'is',
 "'",
 'greene',
 '##r',
 "'",
 'than',
 'paper',
 ',',
 'trade',
 'council',
 'claims',
 'paper',
 'or',
 'plastic',
 '?',
 'the',
 'debate',
 'over',
 'which',
 'is',
 'more',
 'environmentally',
 'sound',
 'is',
 'about',
 'to',
 'heat',
 'up',
 'as',
 'the',
 'paper',
 'and',
 'plastic',
 'industries',
 'compete',
 'for',
 'favor',
 'in',
 'the',
 'check',
 '##out',
 'line',
 '.',
 'environmental',
 '##ists',
 'have',
 'an',
 'answer',
 'of',
 'their',
 'own',
 ':',
 'neither',
 'is',
 'good',
 '.',
 'the',
 'plastics',
 'industry',
 'today',
 'will',
 'attack',
 'the',
 'widely',
 'held',
 'notion',
 'that',
 'paper',
 'grocery',
 'bags',
 'and',
 'other',
 'containers',
 'are',
 'environmentally',
 'superior',
 'to',
 'plastic',
 '.',
 'an',
 'industry',
 'group',
 'will',
 'un',
 '##ve',
 '##il',
 'studies',
 'that',
 'conclude',
 'plastic',
 'comes',
 'out',
 'ahead',
 'over',
 'the',
 