In [None]:
# This code was originally written in google colab. 
# Check my colab in the following link:
# https://colab.research.google.com/drive/1bLp7NvPLzTrmLfyhWPJY1iYi8sRTyvgB?usp=sharing

In [None]:
# Process
# 1. Read data (train and validation)
# 2. Preprocessing & feature extraction
# 3. BUILD AND TRAIN LDA
# 4. BUILD AND TRAIN WordVector
# 5. Hyperparameter tuning
# 6. Read and process test data
# 7. Prediction
# 8. Export submission files

In [None]:
!pip install numba



In [None]:
# Importo libraries
#  common 
import pandas as pd
import numpy as np
from collections import Counter
import re
from timeit import default_timer as timer  
from numba import jit, cuda 

# languange processing imports
import nltk
from gensim.corpora import Dictionary
from nltk.stem.porter import PorterStemmer
from nltk.stem import WordNetLemmatizer   
from nltk.tokenize import TweetTokenizer
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from textblob import TextBlob

# LDA & W2V imports
from gensim.models.ldamulticore import LdaMulticore
from gensim.models.word2vec import Word2Vec

from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.linear_model import LogisticRegression

# hyperparameter training imports
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import make_pipeline
from sklearn.metrics import classification_report

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('vader_lexicon')



[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Unzipping corpora/wordnet.zip.
[nltk_data] Downloading package vader_lexicon to /root/nltk_data...


True

In [None]:
## 1. Read data (train and validation)

In [None]:
#train_data = pd.read_csv('../input/train.csv')
#train_data = pd.read_csv('train_sampled.tsv', encoding="utf-8", delimiter = '\t')#, lineterminator='\n')
collist =  ['sentence', 'label', 'wordcount', 'web', 'email' , 'hashtag', 'sentiment', 'percent', 'subjectivity' ]
excollist =  ['web', 'email' , 'hashtag', 'sentiment', 'percent', 'symbol' ]

train_data = pd.read_csv('train_9168.tsv', encoding="utf-8", delimiter = '\t' )#, lineterminator='\n')
dev_data = pd.read_csv('dev.tsv', encoding="utf-8", delimiter = '\t' )#, lineterminator='\n')

corpus_data = pd.concat([train_data, dev_data]) 
train_data.head(3)

Unnamed: 0,sentence,label,length
0,why houston flooding isn‘t a sign of climate c...,1,867
1,The U.N. Intergovernmental Panel on Climate Ch...,1,4560
2,Bureau Now Sets Strict Limits on CoolingOVER r...,1,2524


In [None]:
dev_data = pd.read_csv('dev.tsv', encoding="utf-8", delimiter = '\t' )
train_data = pd.read_csv('train_final.tsv', encoding="utf-8", delimiter = '\t' )

In [None]:
# Baseline model
baseline_model = make_pipeline(CountVectorizer(ngram_range=(1,3)), LogisticRegression()).fit(train_data['sentence'], train_data['label'])
#baseline_model = make_pipeline(TfidfVectorizer(smooth_idf=False, sublinear_tf=False, norm=None, analyzer='word'), LogisticRegression()).fit(train_data['sentence'], train_data['label'])

baseline_predicted = baseline_model.predict(dev_data['sentence'])
print(classification_report(dev_data['label'], baseline_predicted))

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


              precision    recall  f1-score   support

           0       0.84      0.62      0.71        50
           1       0.70      0.88      0.78        50

    accuracy                           0.75       100
   macro avg       0.77      0.75      0.75       100
weighted avg       0.77      0.75      0.75       100



In [None]:
## 2. Preprocessing & feature extraction

In [None]:
#columnlist = ['sentence', 'label', 'wordcount', 'web', 'email' , 'sentiment', 'polarity', 'subjectivity' ]#, names= columnlist,  header=None

stopwords = nltk.corpus.stopwords.words('english') 
tt = TweetTokenizer()
sid = SentimentIntensityAnalyzer()
start = timer() 
lemm = WordNetLemmatizer()                                      
stemmer = PorterStemmer()

for col in excollist:
    train_data[col] = 0

for idx, df in train_data.iterrows():
    text = df['sentence']
    train_data['web'].iloc[idx] = 1 if 'http' in text or '.net' in text or '.com' in text or 'www' in text else 0
    train_data['email'].iloc[idx] = 1 if 'email' in text or '@' in text  else 0     
    train_data['hashtag'].iloc[idx] = 1 if '#' in text  else 0  

    text = re.sub("((\S+)?(http(s)?)(\S+))|((\S+)?(www)(\S+))|((\S+)?(\@)(\S+)?)", " ", text)
    text = re.sub("[^a-zA-Z ]", "", text)
    train_data['sentiment'].iloc[idx] = sid.polarity_scores(text)['compound']
    train_data['percent'].iloc[idx] = 1 if 'percent' in text or 'per cent' in text else 0  
    train_data['symbol'].iloc[idx] = 1 if '%' in text else 0  
    #sentiment2 = TextBlob(text).sentiment
    #train_data['polarity'].iloc[idx] = sentiment2.polarity
    #train_data['subjectivity'].iloc[idx] = sentiment2.subjectivity

print("wit GPU:", timer()-start)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)


wit GPU: 24.65901859099995


In [None]:
## 3. BUILD AND TRAIN LDA

In [None]:
def get_good_tokens(sentence):
    replaced_punctation = list(map(lambda token: re.sub('[^0-9A-Za-z!?]+', '', token), sentence))
    removed_punctation = list(filter(lambda token: token, replaced_punctation))
    return removed_punctation

def w2v_preprocessing(df):
    df['sentence'] = df.sentence.str.lower()
    df['document_sentences'] = df.sentence.str.split('.')  # split texts into individual sentences
    df['tokenized_sentences'] = list(map(lambda sentences:
                                         list(map(nltk.word_tokenize, sentences)),
                                         df.document_sentences))  # tokenize sentences
    df['tokenized_sentences'] = list(map(lambda sentences:
                                         list(map(get_good_tokens, sentences)),
                                         df.tokenized_sentences))  # remove unwanted characters
    df['tokenized_sentences'] = list(map(lambda sentences:
                                         list(filter(lambda lst: lst, sentences)),
                                         df.tokenized_sentences))  # remove empty lists

start = timer() 
w2v_preprocessing(corpus_data)

def lda_get_good_tokens(df):
    df['sentence'] = df.sentence.str.lower()
    df['tokenized_text'] = list(map(nltk.word_tokenize, df.sentence))
    df['tokenized_text'] = list(map(get_good_tokens, df.tokenized_text))

lda_get_good_tokens(corpus_data) 

def remove_stopwords(df):
    stopwords = nltk.corpus.stopwords.words('english')
    df['stopwords_removed'] = list(map(lambda doc:
                                       [word for word in doc if word not in stopwords],
                                       df['tokenized_text']))

remove_stopwords(corpus_data)

def stem_words(df):
    lemm = nltk.stem.WordNetLemmatizer()
    df['lemmatized_text'] = list(map(lambda sentence:
                                     list(map(lemm.lemmatize, sentence)),
                                     df.stopwords_removed))

    p_stemmer = nltk.stem.porter.PorterStemmer()
    df['stemmed_text'] = list(map(lambda sentence:
                                  list(map(p_stemmer.stem, sentence)),
                                  df.lemmatized_text))

stem_words(corpus_data)

dictionary = Dictionary(documents=corpus_data.stemmed_text.values)  #train_data
dictionary.filter_extremes(no_above=0.8, no_below=3)
dictionary.compactify()  

def document_to_bow(df):
    df['bow'] = list(map(lambda doc: dictionary.doc2bow(doc), df.stemmed_text))
    
document_to_bow(corpus_data)

def lda_preprocessing(df):
    lda_get_good_tokens(df)
    remove_stopwords(df)
    stem_words(df)
    document_to_bow(df)

print("TIME :", timer()-start)

TIME : 32.60631617099989


In [None]:
%%timeit
corpus = corpus_data.bow  ##train_data.bow

num_topics = 100

def get_ldamodel(corpus, dictionary, num_topics):    
    model = LdaMulticore(corpus=corpus,
                          id2word=dictionary,
                          num_topics=num_topics,
                          workers=4,
                          chunksize=1000, #4000
                          passes=10,
                          per_word_topics=True,
                          alpha='asymmetric')
    
    return model

LDAmodel = get_ldamodel(corpus, dictionary, num_topics)   

  diff = np.log(self.expElogbeta)


CPU times: user 1min 7s, sys: 8.86 s, total: 1min 16s
Wall time: 2min 29s


In [None]:
def document_to_lda_features(lda_model, document):
    topic_importances = LDAmodel.get_document_topics(document, minimum_probability=0)
    topic_importances = np.array(topic_importances)
    return topic_importances[:,1]

start = timer() 
train_data['lda_features'] = list(map(lambda doc:
                                      document_to_lda_features(LDAmodel, doc),
                                      train_data.bow))
print("time :", timer()-start) 

time : 11.99986723300026


In [None]:
## 4. BUILD AND TRAIN WordVector

In [None]:
sentences = []
for sentence_group in train_data.tokenized_sentences:
    sentences.extend(sentence_group)

num_features = 200    # Word vector dimensionality
min_word_count = 3    # Minimum word count
num_workers = 4       # Number of threads to run in parallel
context = 6           # Context window size
downsampling = 1e-3   # Downsample setting for frequent words


def getW2Vmodel(sentences, num_workers, num_features, min_word_count, context, downsampling):
    model = Word2Vec(sentences=sentences,
                      sg=1,
                      hs=0,
                      workers=num_workers,
                      size=num_features,
                      min_count=min_word_count,
                      window=context,
                      sample=downsampling,
                      negative=5,
                      iter=6)
    return model

start = timer() 
W2Vmodel = getW2Vmodel(sentences, num_workers, num_features, min_word_count, context, downsampling )
print("time :", timer()-start)

time : 29.260960986999635


In [None]:
def get_w2v_features(w2v_model, sentence_group):
    words = np.concatenate(sentence_group)  
    index2word_set = set(w2v_model.wv.vocab.keys())      
    featureVec = np.zeros(w2v_model.vector_size, dtype="float32")    

    nwords = 0    
    for word in words:
        if word in index2word_set: 
            featureVec = np.add(featureVec, w2v_model[word])
            nwords += 1.
            
    if nwords > 0:
        featureVec = np.divide(featureVec, nwords)
    return featureVec

start = timer() 
train_data['w2v_features'] = list(map(lambda sen_group:
                                      get_w2v_features(W2Vmodel, sen_group),
                                      train_data.tokenized_sentences))
print("with GPU:", timer()-start)

  if __name__ == '__main__':


with GPU: 15.85646318399995


In [None]:
## 5. Hyperparameter tuning

In [None]:
def get_cross_validated_model(model, param_grid, X, y, nr_folds=5):
    #scoring='neg_log_loss'
    grid_cv = GridSearchCV(model, param_grid=param_grid, scoring='f1', cv=nr_folds, n_jobs=-1, verbose=True)
    best_model = grid_cv.fit(X, y) 

    result_df = pd.DataFrame(best_model.cv_results_)
    show_columns = ['mean_test_score', 'mean_train_score', 'rank_test_score']
    for col in result_df.columns:
        if col.startswith('param_'):
            show_columns.append(col)
    display(result_df.sort_values(by='rank_test_score').head())
    
    return best_model

In [None]:
#X_train_metas = np.array( pd.concat([train_data.length, train_data.web, train_data.email,train_data.hashtag, train_data.sentiment, train_data.percent, train_data.symbol ], axis= 1) ) 
X_train_metas = np.array( pd.concat([ train_data.web, train_data.email,
                                     train_data.hashtag, train_data.sentiment, train_data.percent ,train_data.symbol  ], axis= 1) ) 
X_train_lda = np.array(list(map(np.array, train_data.lda_features)))
X_train_w2v = np.array(list(map(np.array, train_data.w2v_features)))
X_train_ldaw2v = np.append(X_train_lda, X_train_w2v, axis=1)
X_train_combined = np.append(X_train_metas, X_train_ldaw2v, axis=1)
X_train_ldametas = np.append(X_train_metas, X_train_lda, axis=1)

In [None]:
# store all models in a dictionary
models = dict()

# meta features only
lr = LogisticRegression()
param_grid = {'penalty': ['l1', 'l2']}
models['best_lr_lda'] = get_cross_validated_model(lr, param_grid, X_train_lda, train_data.label)
models['best_lr_w2v'] = get_cross_validated_model(lr, param_grid, X_train_w2v, train_data.label)
models['best_lr_ldaw2v'] = get_cross_validated_model(lr, param_grid, X_train_ldaw2v, train_data.label)
models['best_lr_ldawmetas'] = get_cross_validated_model(lr, param_grid, X_train_ldametas, train_data.label)
models['best_lr_combined_all'] = get_cross_validated_model(lr, param_grid, X_train_combined, train_data.label)

Fitting 5 folds for each of 2 candidates, totalling 10 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:    1.4s finished


Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_penalty,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
1,0.085705,0.015394,0.002397,0.000649,l2,{'penalty': 'l2'},0.649165,0.649412,0.736573,0.706522,0.719346,0.692203,0.036315,1
0,0.004441,0.001202,0.0,0.0,l1,{'penalty': 'l1'},,,,,,,,2


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.


Fitting 5 folds for each of 2 candidates, totalling 10 fits


[Parallel(n_jobs=-1)]: Done   7 out of  10 | elapsed:    0.4s remaining:    0.2s
[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:    0.5s finished


Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_penalty,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
1,0.171298,0.031425,0.003174,0.000575,l2,{'penalty': 'l2'},0.700611,0.722793,0.803874,0.822943,0.836186,0.777281,0.054971,1
0,0.003131,0.000651,0.0,0.0,l1,{'penalty': 'l1'},,,,,,,,2


Fitting 5 folds for each of 2 candidates, totalling 10 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done   7 out of  10 | elapsed:    0.7s remaining:    0.3s
[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:    1.0s finished


Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_penalty,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
1,0.321477,0.077043,0.003155,0.000583,l2,{'penalty': 'l2'},0.748988,0.737705,0.847775,0.862651,0.87619,0.814662,0.059026,1
0,0.014276,0.0023,0.0,0.0,l1,{'penalty': 'l1'},,,,,,,,2


Fitting 5 folds for each of 2 candidates, totalling 10 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done   7 out of  10 | elapsed:    0.3s remaining:    0.1s
[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:    0.4s finished


Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_penalty,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
1,0.119161,0.01583,0.00289,0.001141,l2,{'penalty': 'l2'},0.697561,0.660287,0.746803,0.730667,0.686981,0.70446,0.030932,1
0,0.003204,0.000612,0.0,0.0,l1,{'penalty': 'l1'},,,,,,,,2


Fitting 5 folds for each of 2 candidates, totalling 10 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:    1.1s finished
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_penalty,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
1,0.365882,0.078355,0.003147,0.000519,l2,{'penalty': 'l2'},0.781893,0.743802,0.849188,0.864734,0.862651,0.820453,0.048846,1
0,0.009614,0.003052,0.0,0.0,l1,{'penalty': 'l1'},,,,,,,,2


In [None]:
for name, model in models.items():
    print("Model {} has a test score of: {:0.4f}".format(name, float(model.best_score_)))

Model best_lr_lda has a test score of: -0.1626
Model best_lr_w2v has a test score of: -0.1308
Model best_lr_ldaw2v has a test score of: -0.1087
Model best_lr_ldawmetas has a test score of: -0.1614
Model best_lr_combined_all has a test score of: -0.1181


In [None]:
## 6. Read and process test data

In [None]:
test_data = pd.read_csv('dev.tsv', encoding="utf-8", delimiter = '\t' )
test_data = test_data.sort_values(by='label')
test_data = test_data[:55]
test_data = test_data.sample(frac = 1) 

labels = []

df2 = pd.DataFrame(columns=['sentence', 'label', 'length'])
for i, row in test_data.iterrows():
    s = row['sentence']
    label = row['label']
    length = row['length']
    labels.append(label) 

    df2= df2.append({'sentence': s, 'label': label, 'length': length }, ignore_index=True)


test_data = df2
for col in excollist:
    test_data[col] = 0


for idx, df in test_data.iterrows():
    #print(idx)
    text = df['sentence']
    test_data['web'].iloc[idx] = 1 if 'http' in text or '.net' in text or '.com' in text or 'www' in text else 0
    test_data['email'].iloc[idx] = 1 if 'email' in text or '@' in text  else 0     
    test_data['hashtag'].iloc[idx] = 1 if '#' in text  else 0  
    text = re.sub("((\S+)?(http(s)?)(\S+))|((\S+)?(www)(\S+))|((\S+)?(\@)(\S+)?)", " ", text)
    text = re.sub("[^a-zA-Z ]", "", text)
    test_data['sentiment'].iloc[idx] = sid.polarity_scores(text)['compound']
    test_data['percent'].iloc[idx] = 1 if 'percent' in text or 'per cent' in text else 0  
    test_data['symbol'].iloc[idx] = 1 if '%' in text else 0  


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)


In [None]:
lda_preprocessing(test_data)
w2v_preprocessing(test_data)

test_data['lda_features'] = list(map(lambda doc:
                                     document_to_lda_features(LDAmodel, doc),
                                     test_data.bow))

test_data['w2v_features'] = list(map(lambda sen_group:
                                     get_w2v_features(W2Vmodel, sen_group),
                                     test_data.tokenized_sentences))

  if __name__ == '__main__':


In [None]:
X_test_metas = np.array( pd.concat([test_data.web, test_data.email,
                                     test_data.hashtag, test_data.sentiment,test_data.percent,test_data.symbol ], axis= 1) ) 

X_test_lda = np.array(list(map(np.array, test_data.lda_features)))
X_test_w2v = np.array(list(map(np.array, test_data.w2v_features)))
X_test_ldaw2v = np.append(X_test_lda, X_test_w2v, axis=1)
X_test_combined = np.append(X_test_metas, X_test_ldaw2v, axis=1)
X_test_ldametas = np.append(X_test_metas, X_test_lda, axis=1)

In [None]:
## 7. Prediction

In [None]:
#submission_predictions = models['best_lr_ldawmetas'].predict(X_test_ldametas)

In [None]:
#submission_predictions = models['best_lr_lda'].predict(X_test_lda)

In [None]:
#submission_predictions = models['best_lr_w2v'].predict(X_test_w2v)

In [None]:
#submission_predictions = models['best_lr_ldaw2v'].predict(X_test_ldaw2v)

In [None]:
submission_predictions = models['best_lr_combined_all'].predict(X_test_combined)

In [None]:
print(submission_predictions)

[0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 1 1 0
 0 0 0 1 1 1 0 0 0 0 0 0 0 1 1 0 0 1]


In [None]:
test_label = test_data['label']
print(len(test_label), len(submission_predictions))

55 55


In [None]:
result = classification_report(labels, submission_predictions)
print ('\n clasification report:\n', result)


 clasification report:
               precision    recall  f1-score   support

           0       0.95      0.80      0.87        50
           1       0.23      0.60      0.33         5

    accuracy                           0.78        55
   macro avg       0.59      0.70      0.60        55
weighted avg       0.89      0.78      0.82        55



In [None]:
## 8. Export submission files

In [None]:
result = np.array(submission_predictions)
submission_df = pd.DataFrame(data=result, columns=['prediction'])
print(submission_df)

      prediction
0              0
1              0
2              0
3              0
4              1
...          ...
1405           0
1406           0
1407           1
1408           1
1409           0

[1410 rows x 1 columns]


In [None]:
import json
from google.colab import files

#{"test-0": {"label": 0}, "test-1": {"label": 0}, ... ,}
data = {}
df = submission_df['prediction']
for idx, rows in enumerate(df):
    label = {}
    _id = 'test-'+str(idx)
    label["label"] = rows
    data[_id] = label

jsonfilename = 'test-output.json'
with open(jsonfilename, 'w') as jsonFile:
    jsonFile.write(json.dumps(data))

files.download('test-output.json')


In [None]:
sdf = submission_df.sort_values(by='prediction')

c0 = 0
c1 = 0
for i,df in sdf.iterrows():
    result = df['prediction']
    if result ==0:
        c0+=1
    else:
        c1+=1

print(c0, c1)

1285 125


In [None]:
test_data = pd.read_csv('dev.tsv', encoding="utf-8", delimiter = '\t' )
test_data = test_data.sort_values(by='label')
test_data = test_data[:55]
print(test_data)
#test_data = test_data.sample(frac = 1) 

                                             sentence  label  length
33  Gadget Gift Guide: Picks for Pet LoversWhile P...      0    5942
68  With every flood, public anger over the climat...      0    5806
34  Mass melting of Antarctic ice sheet led to thr...      0    3190
80  EU urged to adopt meat tax to tackle climate e...      0    4729
36  The Coalition wants to turn scientists into la...      0    5529
78  This winter in Europe was hottest on record by...      0    2780
41  Victorian police officer charged with murderA ...      0     807
74  Facebook, Groupon, Netflix Drive the Next Big ...      0   13511
45  A Top Gun fantasy that came trueIt was the sum...      0    5400
46  Uruguay 2 Holland 3: match reportSo now Gio va...      0    5326
47  Obama 'damaging the presidency' with immigrati...      0    4940
48  Orange and T-Mobile merge networksThe roaming ...      0    1052
98  Why don’t we treat the climate crisis with the...      0    6334
30  Microsoft Wants Its Wearable O