In [1]:
import pandas as pd
import numpy as np
import re
import os
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from collections import Counter
from concurrent.futures import ThreadPoolExecutor, as_completed

nltk.download('stopwords')
nltk.download('punkt')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\User\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\User\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [2]:
df_train = pd.read_csv('train.csv')
df_test = pd.read_csv('test.csv')
stop_words = set(stopwords.words('english'))

In [3]:

# Assuming the dataset is loaded into a pandas DataFrame named 'df'
# and the text column is the second column (index 1)

def preprocess_text(text):
    """
    Convert text to lower-case, remove punctuation, numbers, and excess whitespaces.
    """
    # Convert to lower case
    text = text.lower()
    # Remove punctuation and numbers
    text = re.sub(r'[^\w\s]', '', text)
    text = re.sub(r'\d+', '', text)
    # Remove excess white spaces
    text = re.sub(r'\s+', ' ', text).strip()
    # Remove stopwords
    text = ' '.join([word for word in text.split() if word not in stop_words])
    return text

In [4]:
def create_vocabulary(df, column_index=2, num_rows=200000, outputSize=200):
    """
    Create a vocabulary from a pandas DataFrame.
    """
    # Select a subset of rows
    df_subset = df.sample(n=min(num_rows, len(df)))

    # Initialize a list to store all words
    all_words = []

    # Iterate over each row in the dataframe
    for text in df_subset.iloc[:, column_index]:
        # Preprocess the text
        processed_text = preprocess_text(text)
        # Tokenize the text
        words = nltk.word_tokenize(processed_text)
        # Append to the list of all words
        all_words.extend(words)

    # Count the frequency of each word
    word_freq = Counter(all_words).most_common(outputSize)
    vocab_freq = np.zeros(len(word_freq), dtype=np.int32)

    vocab = dict()
    for i, (word,count) in enumerate(word_freq):
        vocab[word] = i
        vocab_freq[i] = count

    return vocab, vocab_freq

# 1. Result for Text Data Parsing and Vocabulary Selection

In [5]:
train_vocab, train_vocab_freq = create_vocabulary(df_train)
test_vocab, test_vocab_freq = create_vocabulary(df_test)
print("Train Vocab:")
print(train_vocab)
print("Test Vocab:")
print(test_vocab)

Train Vocab:
{'said': 0, 'new': 1, 'us': 2, 'reuters': 3, 'ap': 4, 'two': 5, 'first': 6, 'monday': 7, 'wednesday': 8, 'tuesday': 9, 'thursday': 10, 'company': 11, 'friday': 12, 'inc': 13, 'one': 14, 'world': 15, 'yesterday': 16, 'year': 17, 'last': 18, 'york': 19, 'president': 20, 'million': 21, 'oil': 22, 'corp': 23, 'united': 24, 'would': 25, 'sunday': 26, 'week': 27, 'years': 28, 'people': 29, 'today': 30, 'three': 31, 'government': 32, 'could': 33, 'quot': 34, 'time': 35, 'group': 36, 'game': 37, 'percent': 38, 'saturday': 39, 'software': 40, 'night': 41, 'prices': 42, 'iraq': 43, 'next': 44, 'security': 45, 'announced': 46, 'minister': 47, 'microsoft': 48, 'season': 49, 'team': 50, 'second': 51, 'internet': 52, 'international': 53, 'may': 54, 'washington': 55, 'billion': 56, 'day': 57, 'former': 58, 'state': 59, 'states': 60, 'officials': 61, 'back': 62, 'market': 63, 'says': 64, 'win': 65, 'business': 66, 'news': 67, 'victory': 68, 'city': 69, 'top': 70, 'four': 71, 'european': 7

In [6]:
def create_bit_vector(text, vocab):
    """
    Create a bit vector for a single document.
    """

    # Preprocess the text
    processed_text = preprocess_text(text)

    # Tokenize the text
    words = nltk.word_tokenize(processed_text)

    # Create a bit vector for this document
    bit_vector = np.zeros(len(vocab), dtype=np.uint8)
    for word in words:
        if word in vocab:
            bit_vector[vocab[word]] = 1
    return bit_vector

def create_freq_vector(text, vocab):
    """
    Create a bit vector for a single document.
    """

    # Preprocess the text
    processed_text = preprocess_text(text)

    # Tokenize the text
    words = nltk.word_tokenize(processed_text)
    words_freq = Counter(words)
    # Create a bit vector for this document
    freq_vector = np.zeros(len(vocab))
    for word in words:
        if word in vocab:
            freq_vector[vocab[word]] = words_freq[word]
    return freq_vector

def create_bit_vectors(df, vocab, column_index=2, offset=0, num_rows=10000):
    """
    Create bit vectors for each document in the DataFrame.
    """
    num_rows = min(num_rows, len(df)-offset)
    bit_vectors_partial = np.zeros((num_rows, len(vocab)), dtype=np.uint8)
    for i in range(offset, offset+num_rows):
        row = df.iloc[i]
        text = row.iloc[column_index]
        bit_vectors_partial[i-offset] = create_bit_vector(text, vocab)
    return bit_vectors_partial

def create_freq_vectors(df, vocab, column_index=2, offset=0, num_rows=10000):
    """
    Create bit vectors for each document in the DataFrame.
    """
    num_rows = min(num_rows, len(df)-offset)
    freq_vectors_partial = np.zeros((num_rows, len(vocab)))
    for i in range(offset, offset+num_rows):
        row = df.iloc[i]
        text = row.iloc[column_index]
        freq_vectors_partial[i-offset] = create_freq_vector(text, vocab)
    return freq_vectors_partial

In [21]:
def create_vectors_parallel(df, vocab, func):
    num_documents = len(df)
    num_workers = os.cpu_count() * 2
    chunk_size = (num_documents + num_workers - 1) // num_workers
    print(f'Creating bit vectors for {num_documents} documents using {num_workers} workers with chunk size {chunk_size}')
    bit_vectors = np.zeros((num_documents, len(vocab)), dtype=np.uint8)
    inputs = [(df, vocab, 2, i, chunk_size) for i in range(0, num_documents, chunk_size)]

    with ThreadPoolExecutor(max_workers=num_workers) as executor:
        futures = [executor.submit(func, *input) for input in inputs]
        
        i = 0
        for future in futures:
            bit_vectors_partial = future.result()
            size = bit_vectors_partial.shape[0]
            bit_vectors[i:i+size] = bit_vectors_partial
            i += chunk_size
    return bit_vectors


In [22]:
bit_vectors_train = create_vectors_parallel(df_train, train_vocab, create_bit_vectors)

Creating bit vectors for 119999 documents using 24 workers with chunk size 5000


In [23]:
bit_vectors_test = create_vectors_parallel(df_test, train_vocab, create_bit_vectors)

Creating bit vectors for 7599 documents using 24 workers with chunk size 317


# 2. Results for Document Relevance with Vector Space Basic Model

'internet washington official' all appears in the test set. Thus answers the third part Test your implementation for words from the test-set in the dataset

In [24]:
queries = ['olympic gold athens', 'reuters stocks friday', 'investment market prices', 'internet washington official']

In [25]:
def Query(q_vec, d_vecs, df, n):
    similarity = np.dot(d_vecs, q_vec)
    if n > 0:
        n_indices = np.argsort(similarity)[-n:][::-1]
    else:
        n_indices = np.argsort(similarity)[:-n][::-1]
    for rank, i in enumerate(n_indices):
        print(f'Rank{rank} Score{similarity[i]}')
        print(f'{df.iloc[i, 2]}')
        print()
    print()
    return n_indices

## 2.1 Top 10 in Train

In [26]:
for i, text in enumerate(queries):
    q_bit_vector = create_bit_vector(text, train_vocab)
    print(f'Query {i}: {text}')
    Query(q_bit_vector, bit_vectors_train, df_train, 10)

Query 0: olympic gold athens
Rank0 Score2
Athens, Aug 19. (PTI):Leander Paes and Mahesh Bhupathi overcame an injury scare and world number one singles #39; player Roger Federer of Switzerland to advance to the quarterfinals of the men #39;s doubles tennis event at the Olympic Games here on Wednesday. 

Rank1 Score2
ATHENS: China, the dominant force in world diving for the best part of 20 years, won six out of eight Olympic titles in Athens and prompted speculation about a clean sweep when they stage the Games in Beijing in 2008.

Rank2 Score2
 ATHENS (Reuters) - American swimmer Michael Phelps won an  Olympic record-equaling eighth medal at the Athens Games when  the U.S. team took gold in the men's 4x100 meters medley relay  Saturday.

Rank3 Score2
US cyclist Tyler Hamilton will keep his gold medal from the Athens Games even though a test found illegal blood doping, the International Olympic Committee said.

Rank4 Score2
ATHENS : Felix Sanchez gave the Dominican Republic their first e

## 2.2 Top 5 in Test

In [27]:
for i, text in enumerate(queries):
    q_bit_vector = create_bit_vector(text, train_vocab)
    print(f'Query {i}: {text}')
    Query(q_bit_vector, bit_vectors_test, df_test, 5)

Query 0: olympic gold athens
Rank0 Score2
Many of Britain #39;s Olympic medal winners had already done a lap of honour in Athens, the civic reception and some even appeared on A Question of Sport.

Rank1 Score2
AMSTERDAM  Cyclist Leontien Zijlaard-Van Moorsel won the first gold medal for the Netherlands at the Athens Olympic Games on Wednesday. 

Rank2 Score2
 ATHENS (Reuters) - Hungarian Olympic discus champion Robert  Fazekas will lose his gold medal and be expelled from the Games  after breaking doping rules, the International Olympic  Committee (IOC) said Tuesday.

Rank3 Score2
ATHENS : Some 70,000 spectators filled the Athens Olympic stadium to watch the densely choreographed and emotionally-charged opening ceremony of the 12th Paralympics, the world #39;s premier competition for disabled athletes.

Rank4 Score2
Ruben Magnano, who coached Argentina to the Olympic basketball gold medal in Athens, resigned Thursday to accept a coaching job in Italy.


Query 1: reuters stocks friday


## 2.3 Bottom 5 in Test

In [29]:
for i, text in enumerate(queries):
    q_bit_vector = create_bit_vector(text, train_vocab)
    print(f'Query {i}: {text}')
    Query(q_bit_vector, bit_vectors_test, df_test, -2)

Query 0: olympic gold athens
Rank0 Score0
AFTER being named as the 2006 US Ryder Cup team captain by the PGA of America at a press conference in Florida last night, Tom Lehman insisted he saw the chance to halt Americas recent dismal showing in the biennial match with Europe as an opportunity 

Rank1 Score0
SPACE.com - TORONTO, Canada -- A second\team of rocketeers competing for the  #36;10 million Ansari X Prize, a contest for\privately funded suborbital space flight, has officially announced the first\launch date for its manned rocket.


Query 1: reuters stocks friday
Rank0 Score0
BANGKOK : Thai Prime Minister Thaksin Shinawatra said the government would prosecute 300 Muslims detained at a riot this week that led to the deaths of 87 protesters, while another 900 would be released.

Rank1 Score0
SPACE.com - TORONTO, Canada -- A second\team of rocketeers competing for the  #36;10 million Ansari X Prize, a contest for\privately funded suborbital space flight, has officially announced th

In [30]:
freq_vectors_train = create_vectors_parallel(df_train, train_vocab, create_freq_vectors)

Creating bit vectors for 119999 documents using 24 workers with chunk size 5000


In [31]:
freq_vectors_test = create_vectors_parallel(df_test, train_vocab, create_freq_vectors)

Creating bit vectors for 7599 documents using 24 workers with chunk size 317


In [33]:
# TF-IDF BM25
doc_freq = np.sum(bit_vectors_train, axis=0)
k = 10
IDF = np.log((len(bit_vectors_train) + 1) / doc_freq)

In [34]:
TF_train = (k + 1) * freq_vectors_train / (freq_vectors_train + k)
TF_test = (k + 1) * freq_vectors_test / (freq_vectors_test + k)

# 3. Results for Document Relevance with Vector Space TF-IDF Model

## 3.1 Top 10 in train

In [39]:
for text in queries:
    q_freq_vector = create_freq_vector(text, train_vocab)
    q_IDF = q_freq_vector * IDF
    print(f'Query: {text}')
    Query(q_IDF, TF_train, df_train, 10)


Query: olympic gold athens
Rank0 Score18.61579993629746
ATHENS -- There was an elegant symmetry to it -- from Athens to Athens, from glory back to glory. The US women's soccer team won its first Olympic gold medal eight years ago in Athens, Ga., beginning its four-year reign on top of the world. Last night, after a mighty struggle, the Americans returned there, holding off relentless Brazil, 2-1, ...

Rank1 Score16.387777415999523
ATHENS (Reuters) - Greek Olympic 200 metres champion Costas Kenteris and his fellow Greek Olympic silver medallist Katerina Thanou say they are withdrawing from the Athens Games. 

Rank2 Score16.387777415999523
ATHENS, Greece - Brazilian sports officials blamed inadequate course security for a defrocked priest's bizarre attack on the Olympic marathon leader, and said Monday they will appeal to world track authorities for a duplicate gold medal.    The criticism of Athens Olympic organizers, who have been praised for their overall security, came as former prie

## 3.2 Top 5 in Test

In [37]:
for text in queries:
    q_freq_vector = create_freq_vector(text, train_vocab)
    q_IDF = q_freq_vector * IDF
    print(f'Query: {text}')
    Query(q_IDF, TF_test, df_test, 5)

Query: olympic gold athens
Rank0 Score16.387777415999523
ATHENS (Reuters) - Greek sprinters Costas Kenteris and Katerina Thanou have arrived at an Athens hotel for an International Olympic Committee (IOC) hearing into their missed doped tests, a saga that has shamed and angered the Olympic host ...

Rank1 Score12.702070225004409
 ATHENS (Reuters) - A weary Michael Phelps targeted his  fourth Olympic gold medal in Athens, turning his attention on  Wednesday to the 200 meters individual medley and settling for  the second-fastest overall time in the heats.

Rank2 Score12.702070225004409
 ATHENS (Reuters) - The U.S. women's basketball team showed  their men how to win gold Saturday as around 70,000 spectators  flocked to the Olympic stadium for a hectic athletics program  on the penultimate night of the Athens Games.

Rank3 Score12.702070225004409
ATHENS : Some 70,000 spectators filled the Athens Olympic stadium to watch the densely choreographed and emotionally-charged opening ceremony o

## 3.3 Bottom 5 in Test

In [40]:
for text in queries:
    q_freq_vector = create_freq_vector(text, train_vocab)
    q_IDF = q_freq_vector * IDF
    print(f'Query: {text}')
    Query(q_IDF, TF_test, df_test, -5)

Query: olympic gold athens
Rank0 Score0.0
CHICAGO (CBS.MW) - By upping the ante a bit, Constellation Brands has made an apparently successful bid to gobble up winemaker Robert Mondavi in a \$1.

Rank1 Score0.0
Worldwide semiconductor sales will hit an all-time high in 2004 but stay relatively flat in 2005 before climbing again over the next two years, according to the Semiconductor Industry Association.

Rank2 Score0.0
The British National Health Service (NHS) has signed a massive software licensing deal with Microsoft. The deal will ultimately save the NHS \$625 million in licensing fees, as well as requiring that Microsoft 

Rank3 Score0.0
AFTER being named as the 2006 US Ryder Cup team captain by the PGA of America at a press conference in Florida last night, Tom Lehman insisted he saw the chance to halt Americas recent dismal showing in the biennial match with Europe as an opportunity 

Rank4 Score0.0
SPACE.com - TORONTO, Canada -- A second\team of rocketeers competing for the  #36

In [41]:
train_doc_len = np.sum(bit_vectors_train, axis=1)
test_doc_len = np.sum(bit_vectors_test, axis=1)
avg_doc_len = np.mean(train_doc_len)
b = 0.5
train_normalizer = 1 - b + b * train_doc_len / avg_doc_len
test_normalizer = 1 - b + b * test_doc_len / avg_doc_len
TF_train_norm = TF_train / train_normalizer.reshape(-1, 1)
TF_test_norm = TF_test / test_normalizer.reshape(-1, 1)

# 5. Document Relevance with Vector Space

## 5.1 Top 10 in Train

In [42]:
for text in queries:
    q_freq_vector = create_freq_vector(text, train_vocab)
    q_IDF = q_freq_vector * IDF
    print(f'Query: {text}')
    t = Query(q_IDF, TF_train_norm, df_train, 10)

Query: olympic gold athens
Rank0 Score18.007200072889667
ATHENS Imagine, just for the length of a breezy, sun-kissed summer evening in Athens, that you had not heard a word about Michael Phelps before these Olympics began. That you knew nothing about his childhood in Baltimore; the failed Olympic dreams of his ...

Rank1 Score16.169951600393812
ATHENS (Reuters) - Greek Olympic 200 metres champion Costas Kenteris and his fellow Greek Olympic silver medallist Katerina Thanou say they are withdrawing from the Athens Games. 

Rank2 Score16.169951600393812
AS the excitement of the Athens Olympic Games fades, the build-up to Beijing 2008 is in full swing. The countdown to China began last weekend when Beijing #39;s mayor took possession of the Olympic flag at the Athens closing ceremony.

Rank3 Score16.169951600393812
ATHENS  Four-time judo world champion Noriko Anno broke her Olympic jinx by winning gold in the women #39;s 78-kilogram event Thursday at the Athens Games but defending Olympic

## Top 5 in Test

In [43]:
for text in queries:
    q_freq_vector = create_freq_vector(text, train_vocab)
    q_IDF = q_freq_vector * IDF
    print(f'Query: {text}')
    t = Query(q_IDF, TF_test_norm, df_test, 5)

Query: olympic gold athens
Rank0 Score17.993186763274394
ATHENS (Reuters) - Greek sprinters Costas Kenteris and Katerina Thanou have arrived at an Athens hotel for an International Olympic Committee (IOC) hearing into their missed doped tests, a saga that has shamed and angered the Olympic host ...

Rank1 Score15.718776760076985
ATHENS : Some 70,000 spectators filled the Athens Olympic stadium to watch the densely choreographed and emotionally-charged opening ceremony of the 12th Paralympics, the world #39;s premier competition for disabled athletes.

Rank2 Score13.861238669876537
ATHENS, Greece - Winning on whitewater runs in the family for Frenchman Benoit Peschier, though an Olympic gold is something new. Peschier paddled his one-man kayak aggressively but penalty free in both his semifinal and final runs on the manmade Olympic ...

Rank3 Score12.533234712023782
 ATHENS (Reuters) - A weary Michael Phelps targeted his  fourth Olympic gold medal in Athens, turning his attention on  We

## Bottom 5 in Test

In [44]:
for text in queries:
    q_freq_vector = create_freq_vector(text, train_vocab)
    q_IDF = q_freq_vector * IDF
    print(f'Query: {text}')
    t = Query(q_IDF, TF_test_norm, df_test, -5)

Query: olympic gold athens
Rank0 Score0.0
CHICAGO (CBS.MW) - By upping the ante a bit, Constellation Brands has made an apparently successful bid to gobble up winemaker Robert Mondavi in a \$1.

Rank1 Score0.0
Worldwide semiconductor sales will hit an all-time high in 2004 but stay relatively flat in 2005 before climbing again over the next two years, according to the Semiconductor Industry Association.

Rank2 Score0.0
The British National Health Service (NHS) has signed a massive software licensing deal with Microsoft. The deal will ultimately save the NHS \$625 million in licensing fees, as well as requiring that Microsoft 

Rank3 Score0.0
AFTER being named as the 2006 US Ryder Cup team captain by the PGA of America at a press conference in Florida last night, Tom Lehman insisted he saw the chance to halt Americas recent dismal showing in the biennial match with Europe as an opportunity 

Rank4 Score0.0
SPACE.com - TORONTO, Canada -- A second\team of rocketeers competing for the  #36

In [45]:
from gensim.models import Word2Vec
from gensim.models import KeyedVectors
filename = 'GoogleNews-vectors-negative300.bin'
model = KeyedVectors.load_word2vec_format(filename, binary=True)

In [46]:
def preprocess_text_to_vectors(texts):
    text_vectors = []
    for i, text in enumerate(texts):
        text_tmp = preprocess_text(text)
        words_tmp = nltk.word_tokenize(text_tmp)
        vec_tmp = np.zeros((len(words_tmp), 300))
        for j, word in enumerate(words_tmp):
            if word in model:
                vec_tmp[j] = model[word]
        text_vectors.append(vec_tmp)
    return text_vectors

In [47]:
query_vectors = preprocess_text_to_vectors(queries)
doc_vectors_train = preprocess_text_to_vectors(df_train.iloc[:, 2])
doc_vectors_test = preprocess_text_to_vectors(df_test.iloc[:, 2])

In [54]:
def Query_Vec(q_vec, d_vecs, df, n):
    similarity = np.zeros(len(d_vecs))
    for j, d_vec in enumerate(d_vecs):
        z = np.dot(d_vec, q_vec.T)
        likelihood = 1/(1+np.exp(-z))
        similarity[j] = np.mean(np.log(likelihood))
    if n > 0:
        n_indices = np.argsort(similarity)[-n:][::-1]
    else:
        n_indices = np.argsort(similarity)[:-n][::-1]
    for rank, i in enumerate(n_indices):
        print(f'Rank{rank} Score{similarity[i]}')
        print(f'{df.iloc[i, 2]}')
        print()
    print()

# 4. Document Relevance with Word2Vec

## 4.1 Top 10 in Train

In [55]:
for i, q_vec in enumerate(query_vectors):
    print(f'Query {i}: {queries[i]}')
    Query_Vec(q_vec, doc_vectors_train, df_train, 10)

Query 0: olympic gold athens
Rank0 Score-0.20356909896471287
Reuters - Southeast Asian nations, Australia and

Rank1 Score-0.21045240811339674
By MATT MOORE     STOCKHOLM, Sweden (AP) -- American researchers Dr. Richard Axel and Linda B...

Rank2 Score-0.2107508471284433
 ATHENS (Reuters) - Argentina beat Italy 84-69 to win the  men's Olympic basketball gold Saturday, the country's first  medal in the sport.

Rank3 Score-0.2193627479228787
AP - Germany won the FIFA Under-19 Women's World Championship on Saturday with a 2-0 victory over China.

Rank4 Score-0.22292000186278452
 ATHENS (Reuters) - Paul Hamm (U.S.) won the men's  gymnastics individual all-around gold medal at the Olympics on  Wednesday.

Rank5 Score-0.2235205836521121
British canoeists Nick Smith and Stuart Bowman are out of the men's C2 doubles.

Rank6 Score-0.22390211290010625
ATHENS -- Trojan hurdler Felix Sanchez of the Dominican Republic won the gold medal in the 400-meter hurdle today (Aug. 26) at the 2004 Athens Oly

## 4.2 Top 5 in Test

In [56]:
for i, q_vec in enumerate(query_vectors):
    print(f'Query {i}: {queries[i]}')
    Query_Vec(q_vec, doc_vectors_test, df_test, 5)

Query 0: olympic gold athens
Rank0 Score-0.26172010633952325
 ATHENS (Reuters) - Aaron Peirsol won his second gold medal  at the Athens Olympics Thursday after winning an appeal against  his disqualification from the men's 200 meter backstroke.

Rank1 Score-0.2814404197183473
 ST LOUIS (Reuters) - So much for the "Curse of the  Bambino."

Rank2 Score-0.2829745387113325
ATHENS, Aug 27: Australia won the Olympic men #39;s hockey tournament for the first time in history on Friday, beating the Netherlands 2-1 with a golden goal.

Rank3 Score-0.28337620512640677
James Carter of Baltimore finished fourth in the finals of the 400-meter hurdles today, missing out on a medal. Felix Sanchez, of the Dominican Republic, won the gold medal.

Rank4 Score-0.2920932097960169
 ATHENS (Reuters) - Carly Patterson upstaged Russian diva  Svetlana Khorkina to become the first American in 20 years to  win the women's Olympic gymnastics all-round gold medal on  Thursday.


Query 1: reuters stocks friday
Rank0

## 4.3 Bottom 5 in Test

In [57]:
for i, q_vec in enumerate(query_vectors):
    print(f'Query {i}: {queries[i]}')
    Query_Vec(q_vec, doc_vectors_test, df_test, -5)

Query 0: olympic gold athens
Rank0 Score-0.7059181959864829
elta Air Lines said yesterday that it was cutting the pay of executives and other salaried workers by 10 percent and making other changes meant to help it avoid a bankruptcy filing.

Rank1 Score-0.7089601934765838
Sept. 30, 2004Reacting to calls from pharmaceutical retailers, distributors and manufacturers, EPCglobal has added a new action group to specifically study the pharmaceutical industry 

Rank2 Score-0.7105233482760018
 quot;NVIDIA and Intel Corporation announced that the companies have signed a broad, multi-year patent cross-license agreement spanning multiple product lines and product generations.

Rank3 Score-0.715113979151027
The chairman of Amvescap said Wednesday that the company planned to wrap its US mutual fund businesses into one following a \$450 million settlement with regulators over improper trading.

Rank4 Score-0.7334950534011974
An appeals court ruled Thursday that federal energy regulators shirked the