In [2]:
from heapq import heapify
import os
import json
from typing import Counter
import pandas as pd
import re
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import pickle

import math

import warnings
warnings.filterwarnings("ignore")

import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer, PorterStemmer, SnowballStemmer, LancasterStemmer
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk import pos_tag

from sklearn.feature_extraction.text import CountVectorizer
import sklearn.model_selection
from sklearn.model_selection import RepeatedKFold, cross_val_score, GridSearchCV
import sklearn.preprocessing as preproc
from sklearn.feature_extraction import text
from sklearn.svm import LinearSVR
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression, ElasticNet, ElasticNetCV, LassoLars, SGDRegressor
from sklearn.metrics import confusion_matrix, mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler, MinMaxScaler, MaxAbsScaler
from sklearn.pipeline import make_pipeline
from sklearn.svm import SVR

from IPython.display import FileLink, FileLinks

from tqdm import tqdm

!pip install rouge-score
from rouge_score import rouge_scorer

import psutil
import gc
from collections import Counter

Collecting rouge-score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25ldone
Building wheels for collected packages: rouge-score
  Building wheel for rouge-score (setup.py) ... [?25ldone
[?25h  Created wheel for rouge-score: filename=rouge_score-0.1.2-py3-none-any.whl size=24955 sha256=c6b9ce26f0fa6c7db8bc322030586f9f1190b27e5a7458f28fab5020d0483bfe
  Stored in directory: /root/.cache/pip/wheels/84/ac/6b/38096e3c5bf1dc87911e3585875e21a3ac610348e740409c76
Successfully built rouge-score
Installing collected packages: rouge-score
Successfully installed rouge-score-0.1.2
[0m

In [3]:

def json_to_df(json_path,type):
    with open(json_path, "r", encoding="utf-8") as f: 
        lines = [eval(l) for l in f.readlines()]

    # exclude lines with surrogates in their text/summary
    surr = [ i for i,l in enumerate(lines) for k in l.keys() if k in ["text","summary"] and re.search(r'[\uD800-\uDFFF]', l[k])!=None ]

    lines = [ l for i,l in zip( range(len(lines)),lines ) if i not in surr ]

    cols=[ "title",	"date",	"text",	"summary", "compression", "coverage", "density", "compression_bin", "coverage_bin"]

    # we need only the extractive summaries as we are building an extractive summarizer
    data=[ [ l[k] for k in l.keys() if k in cols ] for l in lines if l["density_bin"]=="extractive" ]
    df = pd.DataFrame(data,columns=cols)

    df.to_csv(f"..{os.sep}Data{os.sep}DataFrames{os.sep}{type}_set.csv", header=True, index=False )

    return df


# text processing functions

# A list of contractions from http://stackoverflow.com/questions/19790188/expanding-english-language-contractions-in-python
contractions = { "ain't": "am not", "aren't": "are not", "can't": "cannot", "can't've": "cannot have", "'cause": "because", "could've": "could have", "couldn't": "could not", "couldn't've": "could not have", "didn't": "did not", "doesn't": "does not", "don't": "do not", "hadn't": "had not", "hadn't've": "had not have", "hasn't": "has not", "haven't": "have not", "he'd": "he would", "he'd've": "he would have", "he'll": "he will", "he's": "he is", "how'd": "how did", "how'll": "how will", "how's": "how is", "i'd": "i would", "i'll": "i will", "i'm": "i am", "i've": "i have", "isn't": "is not", "it'd": "it would", "it'll": "it will", "it's": "it is", "let's": "let us", "ma'am": "madam", "mayn't": "may not", "might've": "might have", "mightn't": "might not", "must've": "must have", "mustn't": "must not", "needn't": "need not", "oughtn't": "ought not", "shan't": "shall not", "sha'n't": "shall not", "she'd": "she would", "she'll": "she will", "she's": "she is", "should've": "should have", "shouldn't": "should not", "that'd": "that would", "that's": "that is", "there'd": "there had", "there's": "there is", "they'd": "they would", "they'll": "they will", "they're": "they are", "they've": "they have", "wasn't": "was not", "we'd": "we would", "we'll": "we will", "we're": "we are", "we've": "we have", "weren't": "were not", "what'll": "what will", "what're": "what are", "what's": "what is", "what've": "what have", "where'd": "where did", "where's": "where is", "who'll": "who will", "who's": "who is", "won't": "will not", "wouldn't": "would not", "you'd": "you would", "you'll": "you will", "you're": "you are" }


def sentence_cleaning(text, remove_stopwords = True, sub_contractions=True, stemming=True):
    global pbar_cleaning
    pbar_cleaning.update(1)
    
    # Convert words to lower case
    text = text.lower()
    toks = word_tokenize(text)
      
    # Replace contractions with their longer forms 
    if sub_contractions:
        new_text = []
        for word in toks:
            if word in contractions:
                new_text.append(contractions[word])
            else:
                new_text.append(word)
    
    text = " ".join(new_text)

    # Format words and remove unwanted characters
    text = re.sub(r'https?:\/\/.*[\r\n]*', '', text, flags=re.MULTILINE)
    text = re.sub(r'\<a href', ' ', text)
    text = re.sub(r'&amp;', '', text) 
    text = re.sub(r'[_"\-;%()|+&=*%.,!?:#$@\[\]/]', ' ', text)
    text = re.sub(r'<br />', ' ', text)
    text = re.sub(r'\'', ' ', text)

    toks_clean = word_tokenize(text)
    
    if remove_stopwords:
        stops = set(stopwords.words("english"))
        toks_clean = [w for w in toks_clean if not w in stops]

    
    if stemming: 
        stemmer=SnowballStemmer(language="english")
        toks_clean=[ stemmer.stem(w) for w in toks_clean ]

    text = " ".join(toks_clean)
    
    return text, toks_clean


def rouge_scoring(sentence,summary,sc_type="rougeL",score="fmeasure"):
    global pbar
    pbar.update(1)
    r_scorer=rouge_scorer.RougeScorer(["rouge1","rouge2","rougeL"])
    r_L=r_scorer.score(summary,sentence)
    score_ind={"precision":0, "recall":1, "fmeasure":2}
    
    return r_L[sc_type][score_ind[score]]


def text_processing(df,data_type,df_dir,sc_type="rougeL"):
    global pbar
    cols=["sentence", "summary", "text"] 
    # new_df=pd.DataFrame()

    # sentence split 
    sentences=[ sent_tokenize(t) for t in df["text"].values ]

    summaries=df["summary"].values
    sent_sum_text=[ [ s,summary,t  ] for s_list,summary,t in zip( sentences, summaries, df["text"] ) for s in s_list ]
    new_df=pd.DataFrame(sent_sum_text, columns=cols)
    new_df["text_id"]=new_df["text"].factorize()[0]
    new_df["chosen"]= 0
    ind = new_df[[ s in t for s,t in zip( new_df["sentence"], new_df["summary"] ) ]].index
    new_df.loc[ind,"chosen"]=1
    del new_df["text"]
    # for c in new_df.columns:
    #   new_df[c]=new_df[c].astype(str)

    # labels
    # columns -> sentence: 0, summary: 1, text: 2
    pbar = tqdm(total=new_df.shape[0] )
    new_df["rougeL"]= new_df.apply(lambda row: rouge_scoring(row["sentence"],row["summary"], sc_type=sc_type, score="fmeasure" ), axis=1)
    print(new_df["rougeL"])

    new_df["summary"].to_csv(os.path.join(df_dir,f"{data_type}_data_{sc_type}_summaries.csv"), header=True, index=False)
    del new_df["summary"]
    
    new_df.to_csv(os.path.join(df_dir,f"{data_type}_data_{sc_type}.csv"), header=True, index=False)

    return new_df


def add_chosen_text_id(df):
    cols=["sentence", "summary", "text"] 
    # new_df=pd.DataFrame()

    # sentence split 
    sentences=[ sent_tokenize(t) for t in df["text"].values ]
    summaries=df["summary"].values
    
    sent_sum_text=[ [ s,summary,t  ] for s_list,summary,t in zip( sentences, summaries, df["text"] ) for s in s_list ]
    new_df=pd.DataFrame(sent_sum_text, columns=cols)
    
    new_df["text_id"]=new_df["text"].factorize()[0]
    new_df["chosen"]= 0
    ind = new_df[[ s in t for s,t in zip( new_df["sentence"], new_df["summary"] ) ]].index
    new_df.loc[ind,"chosen"]=1
    
    del new_df["text"]
    del new_df["summary"]
    del new_df["sentence"]
    
    return new_df


def create_labels(data_dir,df_dir="/kaggle/input/summarizer-data/"):
#     train_df = pd.read_csv(os.path.join(df_dir,"train_set.csv"))
    dev_df = pd.read_csv(os.path.join(df_dir,"dev_set.csv"))
    test_df = pd.read_csv(os.path.join(df_dir,"test_set.csv"))

#     train_df1, train_df2, train_df3, train_df4 = np.array_split(train_df, 4)
    splits=4
#     train_data_list=[]
#     for i,train_df in enumerate(np.array_split(train_df, splits)):
#         train_data = text_processing(train_df,f"train{i+1}",data_dir,"rougeL")
#         train_data_list.append(train_data)

    dev_data = text_processing(dev_df,"dev",data_dir,"rougeL")
    test_data = text_processing(test_df,"test",data_dir,"rougeL")
    
    return train_data_list, dev_data, test_data


def df_add_tid(data_dir, df_dir):
    sc_type="rougeL"
    data_labels_dir="/kaggle/input/summarizer-data"

#     data_type="train"
#     train_df = pd.read_csv(os.path.join(df_dir,f"{data_type}_set.csv"))
#     splits=4
#     for i,df in enumerate(np.array_split(train_df, 4)):
#         if i==2:
#             data_type=f"train{i+1}"
#             print(data_type+"\n")
#             train_ch_tid_df=add_chosen_text_id(df)
#             train_rougeL=pd.read_csv(os.path.join(data_labels_dir,f"{data_type}_data_{sc_type}.csv"))
#             print(train_rougeL.columns, train_rougeL.shape)
#             train_rougeL["text_id"]=train_ch_tid_df["text_id"]
#             train_rougeL["chosen"]=train_ch_tid_df["chosen"]
#             print(train_rougeL.columns, train_rougeL.shape)
#             train_rougeL.to_csv(os.path.join(data_dir,f"{data_type}_data_{sc_type}_tid.csv"), header=True, index=False)

    data_type="dev" 
    dev_df = pd.read_csv(os.path.join(df_dir,f"{data_type}_set.csv"))
    dev_ch_tid_df=add_chosen_text_id(dev_df)
    dev_rougeL=pd.read_csv(os.path.join(data_labels_dir,f"{data_type}_data_{sc_type}.csv"))
    print(dev_rougeL.columns, dev_rougeL.shape)
    dev_rougeL["text_id"]=dev_ch_tid_df["text_id"]
    dev_rougeL["chosen"]=dev_ch_tid_df["chosen"]
    print(dev_rougeL.columns, dev_rougeL.shape)
    dev_rougeL.to_csv(os.path.join(data_dir,f"{data_type}_data_{sc_type}_tid.csv"), header=True, index=False)

    data_type="test"
    test_df = pd.read_csv(os.path.join(df_dir,f"{data_type}_set.csv"))
    test_ch_tid_df=add_chosen_text_id(test_df)
    test_rougeL=pd.read_csv(os.path.join(data_labels_dir,f"{data_type}_data_{sc_type}.csv"))
    print(test_rougeL.columns, test_rougeL.shape)
    test_rougeL["text_id"]=test_ch_tid_df["text_id"]
    test_rougeL["chosen"]=test_ch_tid_df["chosen"]
    print(test_rougeL.columns, test_rougeL.shape)
    test_rougeL.to_csv(os.path.join(data_dir,f"{data_type}_data_{sc_type}_tid.csv"), header=True, index=False)

    
def data_stats(df):
    groups = df.groupby("chosen")
    print(groups.describe()["rougeL"])

    
def thematic_ratio(them_words, word_list):
    them_occ= sum( [ word_list.count(w) for w in set(them_words)&set(word_list)])
    them_ratio=them_occ/len(word_list)
    return them_ratio


def s_position(t_position,tot_sent):
    N=tot_sent
    th=0.2*N
    min_p= th*N
    max_p= 2*th*N
    
    if t_position==tot_sent or t_position==1:
        pos=1.0
    else: 
        pos=math.cos((t_position - min_p)*((1/max_p) - min_p))
        
    return pos
    
    
def prop_nouns(tokens):
    if type(tokens)!=type(list):
        tokens=eval(tokens)
    pos= nltk.pos_tag(tokens)
    tags_count=Counter(tag for _, tag in pos if tag=="NNP" or tag=="NNPS")
    return tags_count["NNP"]+tags_count["NNPS"] 


def feature_df(df, data_dir, data_type):
    global pbar_cleaning
    feat_df=pd.DataFrame()
    
    # tokenize
    pbar_cleaning=tqdm(total=train_data.shape[0], leave=True)
    df["tokens"] = df["sentence"].apply(lambda x: sentence_cleaning(x)[1]) 
    

    # create sentence features
    # 1. thematic words
    col = df.groupby("text_id")["tokens"].apply(sum)
    thematic_cols= pd.DataFrame({"text_id": col.index, "thematic": [ [ t[0] for t in Counter(x).most_common(10) ] for x in col ]})  
    df=df.join(thematic_cols["thematic"], on='text_id' )
    feat_df["thematic_ratio"] = df.apply(lambda row: thematic_ratio(row.thematic, row.tokens) if len(row.tokens)>0 else 0.0, axis=1)

    # 2. sentence position in the text
    feat_df["text_position"] = df.groupby("text_id").cumcount().add(1)
    df["tot_sent"] = df.groupby("text_id")["sentence"].transform(len)
    feat_df["s_position"] = df.apply(lambda row: s_position(row.text_position,row.tot_sent), axis=1)

    # 3. sentence length - threshold=3
    threshold=3
    feat_df["len"]= df["tokens"].apply(lambda x: 0 if len(x)<threshold else len(x))

    # 4. sentence position - paragraph relative
    feat_df['s_pos_par'] = feat_df["s_position"].values
    feat_df.loc[feat_df.s_pos_par!=1.0, 's_pos_par']=0.0

    # 5. numerals
    feat_df["num_ratio"]=df["tokens"].apply(lambda x: sum( [ 1 for t in x if t.isnumeric() ] )/len(x) if len(x)>0 else 0 )


    # ?. Term Frequency-Inverse Sentence Frequency


    # ?. proper nouns - not so useful
    # train_data_feats["NNPs"]=train_data_feats["tokens"].apply(lambda x: prop_nouns(x) )




    # train_data_feats=train_data[["len","text_position"]]
    feat_df.to_csv(os.path.join(data_dir,f"{data_type}_set_feats.csv"), header=True, index=False)

    return feat_df



# SVM_scaler =  StandardScaler()
# LR_scaler =  MinMaxScaler()
# KNN_scaler =  StandardScaler()
# # classifier parameters
# KNN_n_num = 9
# LR_C = 1.0
# SVM_C = 0.001
# algos={
#   "ElNet": make_pipeline(SVM_scaler, SVC(C=SVM_C)),
#   "LR":  make_pipeline(LR_scaler, LogisticRegression(C=LR_C)),
#   "KNN": make_pipeline(KNN_scaler, KNeighborsClassifier(n_neighbors = KNN_n_num)),
# }


def classifier_training(model,X_train,y_train):
    model.fit(X_train,y_train)
    preds=model.predict(X_train)
    c_rep=classification_report(y_train,preds)
    c_rep_dict=classification_report(y_train,preds,output_dict=True)
    return model, c_rep, c_rep_dict

def classifier_validation(model,X_dev,y_dev):
    preds=model.predict(X_dev)
    c_rep = classification_report(y_dev,preds)
    c_rep_dict=classification_report(y_dev,preds,output_dict=True)
    return c_rep, c_rep_dict

def classifier_test(model,X_test,y_test):
    preds=model.predict(X_test)
    c_rep = classification_report(y_test,preds)
    c_rep_dict=classification_report(y_test,preds,output_dict=True)
    return c_rep, c_rep_dict


# Training - Validation - Test pipeline
def classifier_T_V_T(X_train, y_train, X_dev, y_dev, X_test, y_test, algo_type="LR"):
    model=algos[algo_type]

    model,c_rep_train,c_rep_dict_train=classifier_training(model,X_train,y_train)
    c_rep_dev,c_rep_dict_dev=classifier_validation(model,X_dev,y_dev)
    c_rep_test,c_rep_dict_test=classifier_validation(model,X_test,y_test)

    return model, c_rep_train, c_rep_dict_train, c_rep_dev, c_rep_dict_dev, c_rep_test, c_rep_dict_test



In [4]:
# memory clean
# for v in globals():
#     print(v)
#     if str(v) not "__name__":
#         del v
gc.collect()

23

In [5]:
df_dir="/kaggle/input/summarizer-data"

data_dir= "/kaggle/working/Data/DataFrames"
if not os.path.exists(data_dir):
    os.makedirs(data_dir)

# load saved dataframes and create labels
# train_data_list, dev_data, test_data = create_labels(data_dir)

# load dataframes with labels
# df_add_tid(data_dir, df_dir)

FileLinks(".")

In [6]:
# load training dataset
sc_type="rougeL"
data_type="train1"
train_data=pd.read_csv(os.path.join(df_dir,f"{data_type}_data_{sc_type}_tid.csv"))
data_stats(train_data)
labels=train_data["rougeL"].values
del train_data["rougeL"]


sc_type="rougeL"
data_type="dev"
dev_data=pd.read_csv(os.path.join(df_dir,f"{data_type}_data_{sc_type}_tid.csv"))
data_stats(dev_data)
labels_dev=dev_data["rougeL"].values
del dev_data["rougeL"]


sc_type="rougeL"
data_type="test"
test_data=pd.read_csv(os.path.join(df_dir,f"{data_type}_data_{sc_type}_tid.csv"))
data_stats(test_data)
labels_test=test_data["rougeL"].values
del test_data["rougeL"]

            count      mean       std  min       25%       50%       75%  max
chosen                                                                       
0       2310412.0  0.105404  0.120294  0.0  0.050000  0.083333  0.122449  1.0
1        126615.0  0.349349  0.234293  0.0  0.177295  0.306569  0.461538  1.0
            count      mean       std  min       25%       50%       75%  max
chosen                                                                       
0       1019122.0  0.109952  0.127384  0.0  0.051948  0.088889  0.128205  1.0
1         29440.0  0.457427  0.297148  0.0  0.216498  0.392638  0.666667  1.0
            count      mean       std  min       25%       50%       75%  max
chosen                                                                       
0       1006718.0  0.109932  0.127422  0.0  0.051948  0.088889  0.128205  1.0
1         29216.0  0.456340  0.299931  0.0  0.210740  0.389262  0.666667  1.0


In [7]:
def load_datasets(df_dir, sc_type="rougeL"):
    # train_data_feats=pd.read_csv("/kaggle/input/summarizer-data/train1_set_feats.csv")
    data_type="train1"
    print(f"Loading {data_type.capitalize()} Data . . ")
    # train_data=feature_df(train_data, data_dir, data_type)
    train_data=pd.read_csv(f"/kaggle/input/summarizer-data/{data_type}_set_feats.csv") 

    train_data_sent=pd.read_csv(os.path.join(df_dir,f"{data_type}_data_{sc_type}_tid.csv"))

    train_data_feats=train_data
    train_data["sentence"]=train_data_sent["sentence"]

    train_data_feats=train_data_feats[["len", "s_position", "thematic_ratio", "s_pos_par", "num_ratio"]]
    # lens= StandardScaler().fit_transform(np.array(train_data_feats["len"]).reshape(-1,1) )
    # train_data_feats["len"]=lens
    # train_data_feats


    data_type="dev"
    print(f"Loading {data_type.capitalize()} Data . . ")
    # dev_data=feature_df(dev_data, data_dir, data_type)
    dev_data=pd.read_csv(f"/kaggle/input/summarizer-data/{data_type}_set_feats.csv") 

    dev_data_sent=pd.read_csv(os.path.join(df_dir,f"{data_type}_data_{sc_type}_tid.csv"))
    dev_data_feats=dev_data
    dev_data["sentence"]=dev_data_sent["sentence"]
    dev_data_feats=dev_data_feats[["len", "s_position", "thematic_ratio", "s_pos_par", "num_ratio"]]


    data_type="test"
    print(f"Loading {data_type.capitalize()} Data . . ")
    # test_data=feature_df(test_data, data_dir, data_type)
    test_data=pd.read_csv(f"/kaggle/input/summarizer-data/{data_type}_set_feats.csv") 

    test_data_sent=pd.read_csv(os.path.join(df_dir,f"{data_type}_data_{sc_type}_tid.csv"))
    test_data_feats=test_data
    test_data["sentence"]=test_data_sent["sentence"]
    test_data_feats=test_data_feats[["len", "s_position", "thematic_ratio", "s_pos_par", "num_ratio"]]
    
    return train_data, train_data_feats, dev_data, dev_data_feats, test_data, test_data_feats




In [8]:
score_type="rougeL"
train_data, train_data_feats, dev_data, dev_data_feats, test_data, test_data_feats = load_datasets(df_dir, sc_type=score_type)

FileLinks(".")

Loading Train1 Data . . 
Loading Dev Data . . 
Loading Test Data . . 


In [None]:
# # tokenize
# global pbar_cleaning
# pbar_cleaning=tqdm(total=train_data.shape[0], leave=True)
# train_data["tokens"] = train_data["sentence"].apply(lambda x: sentence_cleaning(x)[1]) 

# train_data_feats=pd.DataFrame()
# # create sentence features
# # 1. thematic words
# col = train_data.groupby("text_id").tokens.apply(sum)
# thematic_cols= pd.DataFrame({"text_id": col.index, "thematic": [ [ t[0] for t in Counter(x).most_common(10) ] for x in col ]})  
# train_data=train_data.join(thematic_cols["thematic"], on='text_id' )
# train_data_feats["thematic_ratio"] = train_data.apply(lambda row: thematic_ratio(row.thematic, row.tokens) if len(row.tokens)>0 else 0.0, axis=1)

# # 2. sentence position in the text
# train_data["text_position"] = train_data.groupby("text_id").cumcount().add(1)
# train_data["tot_sent"] = train_data.groupby("text_id")["sentence"].transform(len)
# train_data_feats["s_position"] = train_data.apply(lambda row: s_position(row.text_position,row.tot_sent), axis=1)

# # 3. sentence length - threshold=3
# threshold=3
# train_data_feats["len"]= train_data["tokens"].apply(lambda x: 0 if len(x)<threshold else len(x))

# # 4. sentence position - paragraph relative
# train_data_feats['s_pos_par'] = train_data_feats["s_position"].values
# train_data_feats.loc[train_data_feats.s_pos_par!=1.0, 's_pos_par']=0.0

# # 5. numerals
# train_data_feats["num_ratio"]=train_data_feats["tokens"].apply(lambda x: sum( [ 1 for t in eval(x) if t.isnumeric() ] )/len(x) )


# # ?. Term Frequency-Inverse Sentence Frequency


# # ?. proper nouns - not so useful
# # train_data_feats["NNPs"]=train_data_feats["tokens"].apply(lambda x: prop_nouns(x) )




# # train_data_feats=train_data[["len","text_position"]]
# train_data_feats.to_csv(os.path.join(data_dir,f"{data_type}_set_feats.csv"), header=True, index=False)

In [None]:
# train_data_feats=train_data[["len","text_position","thematic_words", ]]
# data_type="train1"
# train_data.to_csv(os.path.join(data_dir,f"{data_type}_set_feats.csv"), header=True, index=False)

In [None]:
# FileLinks(".")

In [59]:
# sc_type="rougeL"
# data_type="train1"
# train_data=pd.read_csv(os.path.join(df_dir,f"{data_type}_data_{sc_type}_tid.csv"))
# data_stats(train_data)
# labels=train_data["rougeL"].values

# sc_type="rougeL"
# data_type="dev"
# dev_data=pd.read_csv(os.path.join(df_dir,f"{data_type}_data_{sc_type}_tid.csv"))
# data_stats(dev_data)
# labels=dev_data["rougeL"].values

            count      mean       std  min       25%       50%       75%  max
chosen                                                                       
0       2310412.0  0.105404  0.120294  0.0  0.050000  0.083333  0.122449  1.0
1        126615.0  0.349349  0.234293  0.0  0.177295  0.306569  0.461538  1.0
            count      mean       std  min       25%       50%       75%  max
chosen                                                                       
0       1019122.0  0.109952  0.127384  0.0  0.051948  0.088889  0.128205  1.0
1         29440.0  0.457427  0.297148  0.0  0.216498  0.392638  0.666667  1.0


In [None]:
# test_num=1000

# model = make_pipeline(MinMaxScaler(), ElasticNet(alpha=5e-3, warm_start=True, random_state=99, fit_intercept=True))
# model.fit(train_data_feats, labels)

# cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)
# # evaluate model
# scores_mae = cross_val_score(model, train_data_feats.iloc[:test_num,:] , labels[:test_num], scoring='neg_mean_absolute_error', cv=cv, n_jobs=-1)
# scores_mse = cross_val_score(model, train_data_feats.iloc[:test_num,:] , labels[:test_num], scoring='neg_mean_squared_error', cv=cv, n_jobs=-1)
# # scores_r2 = cross_val_score(model, train_data_feats.iloc[:test_num,:] , labels[:test_num], scoring='r2', cv=cv, n_jobs=-1)

# # force scores to be positive
# scores_mae = np.absolute(scores_mae)
# scores_mse = np.absolute(scores_mse)
# # scores_r2 = np.absolute(scores_r2)
# print('Mean MAE: %.3f (%.3f)' % (np.mean(scores_mae), np.std(scores_mae)))
# print('Mean MSE: %.3f (%.3f)' % (np.mean(scores_mse), np.std(scores_mse)))
# # print('Mean R2: %.3f (%.3f)' % (np.mean(scores_r2), np.std(scores_r2)))

# print(labels)
# print(model.predict(train_data_feats))
# # svr_results(labels[:test_num],  train_data_feats.iloc[:test_num,:], model)

In [None]:
# test_num=10000

# # model = make_pipeline(MinMaxScaler(), ElasticNetCV(random_state=99))
# model =   make_pipeline(StandardScaler(),ElasticNet(alpha=5e-6, random_state=99, fit_intercept=True))
# model.fit(train_data_feats, labels)

# cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)
# # evaluate model
# scores_mae = cross_val_score(model, train_data_feats.iloc[:test_num,:] , labels[:test_num], scoring='neg_mean_absolute_error', cv=cv, n_jobs=-1)
# scores_mse = cross_val_score(model, train_data_feats.iloc[:test_num,:] , labels[:test_num], scoring='neg_mean_squared_error', cv=cv, n_jobs=-1)
# # scores_r2 = cross_val_score(model, train_data_feats.iloc[:test_num,:] , labels[:test_num], scoring='r2', cv=cv, n_jobs=-1)

# # force scores to be positive
# scores_mae = np.absolute(scores_mae)
# scores_mse = np.absolute(scores_mse)
# # scores_r2 = np.absolute(scores_r2)
# print('Mean MAE: %.3f (%.3f)' % (np.mean(scores_mae), np.std(scores_mae)))
# print('Mean MSE: %.3f (%.3f)' % (np.mean(scores_mse), np.std(scores_mse)))
# # print('Mean R2: %.3f (%.3f)' % (np.mean(scores_r2), np.std(scores_r2)))

# print(labels)
# print(model.predict(train_data_feats))

In [None]:
# test_num=1000

# # model = make_pipeline(MinMaxScaler(), ElasticNetCV(random_state=99))
# eps=0
# model = make_pipeline(StandardScaler(), LinearSVR(epsilon=eps, C=5e-1, fit_intercept=True, intercept_scaling=1.0, loss="squared_epsilon_insensitive"))
# # model = LinearSVR(epsilon=eps, C=5e-4, fit_intercept=True)
# model.fit(train_data_feats, labels)

# cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)
# # evaluate model
# scores_mae = cross_val_score(model, train_data_feats.iloc[:test_num,:] , labels[:test_num], scoring='neg_mean_absolute_error', cv=cv, n_jobs=-1)
# scores_mse = cross_val_score(model, train_data_feats.iloc[:test_num,:] , labels[:test_num], scoring='neg_mean_squared_error', cv=cv, n_jobs=-1)
# # scores_r2 = cross_val_score(model, train_data_feats.iloc[:test_num,:] , labels[:test_num], scoring='r2', cv=cv, n_jobs=-1)

# # force scores to be positive
# scores_mae = np.absolute(scores_mae)
# scores_mse = np.absolute(scores_mse)
# # scores_r2 = np.absolute(scores_r2)
# print('Mean MAE: %.3f (%.3f)' % (np.mean(scores_mae), np.std(scores_mae)))
# print('Mean MSE: %.3f (%.3f)' % (np.mean(scores_mse), np.std(scores_mse)))
# # print('Mean R2: %.3f (%.3f)' % (np.mean(scores_r2), np.std(scores_r2)))

# print(labels)
# print(model.predict(train_data_feats))

In [None]:
# grid search - Dev Set Tuning
def estimator_tuning(X, y, estimator=SGDRegressor(), scaler=StandardScaler()):
    model_grid = make_pipeline(scaler, estimator )

    param_grid = {
        "sgdregressor__alpha": [8.192e-10],
        "sgdregressor__tol": [6.4e-5],
        "sgdregressor__epsilon": [3.2e-4],
        "sgdregressor__loss": ["squared_error"],
        "sgdregressor__penalty": ["elasticnet"],
        "sgdregressor__learning_rate": ["adaptive"]
    }
    # "sgdregressor__alpha": 5.0 ** -np.arange(2, 7)
    # "sgdregressor__tol": 10.0 ** -np.arange(2, 7)
    # "sgdregressor__loss": ["squared_error", "huber", "epsilon_insensitive"]
    # sgdregressor__penalty": ["l2", "l1", "elasticnet"]
    # "sgdregressor__learning_rate": ["constant", "optimal", "invscaling", "adaptive"]

    g_search = GridSearchCV(model_grid, param_grid, verbose=9, return_train_score=True, cv=2)
    g_search.fit(X, y)
    
    with open(os.path.join("/kaggle/working/Data",f"{estimator}_grid_search_results.txt"), "w", encoding="utf-8" ) as writer:
          writer.write(f"Best ParametersL:\n{g_search.best_params_}\n\n\n{g_search.cv_results_}")

    print(f"Best score: { g_search.best_score_}\nParams: {g_search.best_params_}")
    return g_search.best_estimator_


best_model=estimator_tuning(dev_data_feats,labels_dev)

### Best sofar

In [9]:
def estimator_training(train_data_feats, labels, test_data_feats, labels_test):
    model = make_pipeline(StandardScaler(), SGDRegressor(alpha=8.192e-10, max_iter=1000, tol=6.4e-5, epsilon=3.2e-4, learning_rate="adaptive", loss="squared_error", penalty="elasticnet"))
    # model = LinearSVR(epsilon=eps, C=5e-4, fit_intercept=True)
    model.fit(train_data_feats, labels)

    cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)
    
    # evaluate model
    test_samples=1000
    # MAE
    scores_mae = cross_val_score(model, test_data_feats.iloc[:test_samples,:] , labels_test[:test_samples], scoring='neg_mean_absolute_error', cv=cv, n_jobs=-1)
    scores_mae = np.absolute(scores_mae)
    print('Mean MAE: %.3f (%.3f)' % (np.mean(scores_mae), np.std(scores_mae)))
    # MSE
    scores_mse = cross_val_score(model, test_data_feats.iloc[:test_samples,:] , labels_test[:test_samples], scoring='neg_mean_squared_error', cv=cv, n_jobs=-1)
    scores_mse = np.absolute(scores_mse)
    print('Mean MSE: %.3f (%.3f)' % (np.mean(scores_mse), np.std(scores_mse)))
    # R2    
    scores_r2 = cross_val_score(model, test_data_feats.iloc[:test_samples,:] , labels_test[:test_samples], scoring='r2', cv=cv, n_jobs=-1)
#     scores_r2 = np.absolute(scores_r2)
    print('Mean R2: %.3f (%.3f)' % (np.mean(scores_r2), np.std(scores_r2)))

#     print(labels_test)
#     print(model.predict(test_data_feats))
    
    return model


model=estimator_training(train_data_feats, labels, test_data_feats, labels_test)

pkl_filepath=os.path.join("/kaggle/working/Data","SGD_model.pkl")
with open(pkl_filepath,"wb") as model_writer:
    pickle.dump(model, model_writer)

Mean MAE: 0.069 (0.007)
Mean MSE: 0.016 (0.005)
Mean R2: 0.155 (0.159)


In [20]:
# preds_df=pd.DataFrame({"text_id": train_data["text_id"].values, "text_position": train_data["text_position"].values, "sentence": train_data["sentence"].values, "pred_rougeL": model.predict(train_data_feats)})

# n_largest=preds_df.groupby(["text_id"])["text_position","pred_rougeL"].apply(lambda x: x.nlargest(3,columns=["pred_rougeL"]).sort_index())

In [48]:
def scoring(pred_summary, ref_summary):
    print(pred_summary+"\n\n"+ref_summary)
    r_scorer=rouge_scorer.RougeScorer(["rouge1","rouge2","rougeL"], use_stemmer=True)
#     rougeL=r_scorer.score(ref_summary,pred_summary)
    rouge=r_scorer.score(ref_summary,pred_summary)
    score_ind={"precision":0, "recall":1, "fmeasure":2}
    return rouge["rouge1"][score_ind["fmeasure"]], rouge["rouge2"][score_ind["fmeasure"]], rouge["rougeL"][score_ind["fmeasure"]]

def summarization(sentences):
    return ".".join(sentences)

def create_summary(df,s_num,th=0.19):
    n_largest=df.groupby(["text_id"])["text_position","sentence","pred_rougeL"].apply(lambda x: x.nlargest(s_num,columns=["pred_rougeL"]).sort_index())
    max_rL=max(n_largest["pred_rougeL"])
    sent = [ n_largest.loc[n_largest["pred_rougeL"].idxmax(),"sentence"] ]
    sent.extend( [ s for s,r in zip(n_largest["sentence"].values, n_largest["pred_rougeL"].values) if r > th and r!=max_rL] )
    summary = summarization( sent)   
    return summary

def get_ref_summary(ref_summaries,doc_id):
    return ref_summaries.loc[ref_summaries.text_id==doc_id, "summary"].values[0]

def get_preds(model,docX):
    return model.predict(docX)

def single_doc_summarizer(model,X,ref_summaries,th,s_num=4):
    global doc_p_bar
    docs=X.groupby("text_id")
    tot_r1=[]
    tot_r2=[]
    tot_rL=[]
    for i,d in enumerate(docs):
        doc_p_bar.update(1)
#         if i>10:
#             break
        document=d[1]
        cols=["len", "s_position", "thematic_ratio", "s_pos_par", "num_ratio"]
        feats=document[cols]

        preds=get_preds(model,feats)
        document["pred_rougeL"]=preds.tolist()
        pred_summary=create_summary(document,s_num,th)
        ref_summary=get_ref_summary(ref_summaries,i)
        r1,r2,rL = scoring(pred_summary,ref_summary)
        tot_r1.append(r1)
        tot_r2.append(r2)
        tot_rL.append(rL)
        
        
        del document
        del feats
        del preds
        del pred_summary
        del ref_summary
        gc.collect()

    return tot_r1, tot_r2, tot_rL



def doc_summary(model,document,ref_summaries,doc_id,th,s_num=4) :
    global doc_p_bar, tot_r1, tot_r2, tot_rL
    
#         if i>10:
#             break
    cols=["len", "s_position", "thematic_ratio", "s_pos_par", "num_ratio"]
    feats=document[cols]


    preds=get_preds(model,feats)
    document["pred_rougeL"]=preds.tolist()
    pred_summary=create_summary(document,s_num,th)
    ref_summary=get_ref_summary(ref_summaries,doc_id)
    r1,r2,rL = scoring(pred_summary,ref_summary)
    tot_r1.append(r1)
    tot_r2.append(r2)
    tot_rL.append(rL)
    doc_p_bar.postfix[1] = np.mean(tot_r1)
    doc_p_bar.postfix[3] = np.mean(tot_r2)
    doc_p_bar.postfix[5] = np.mean(tot_rL)
    doc_p_bar.postfix[6]["value"] = doc_id
    doc_p_bar.update(1)

    del document
    del feats
    del preds
    del pred_summary
    del ref_summary
    gc.collect()
    
    return r1, r2, rL

In [None]:
# sc_type="rougeL"
# data_type="train1"
# train_data=pd.read_csv(os.path.join(df_dir,f"{data_type}_data_{sc_type}_tid.csv"))
# # train_data["text_position"] = train_data.groupby("text_id").cumcount().add(1)
# data_stats(train_data)
# labels=train_data["rougeL"].values

# sc_type="rougeL"
# data_type="test"
# test_data=pd.read_csv(os.path.join(df_dir,f"{data_type}_data_{sc_type}_tid.csv"))
# test_data["text_position"] = test_data.groupby("text_id").cumcount().add(1)
# data_stats(test_data)
# labels=test_data["rougeL"].values

In [43]:
# summaries=pd.read_csv("/kaggle/input/summarizer-data/train_summaries.csv")
# train_len=train_data.groupby("text_id").size().shape[0]

# summaries=summaries.iloc[:train_len,:]
# train_data_feats["summary"]=summaries["summary"]
# train_data_feats["sentence"]=train_data["sentence"]
# train_data_feats["text_id"]=train_data["text_id"]
# train_data_feats["text_position"]=train_data["text_position"]

# X=train_data_feats
# y=pd.DataFrame(labels)


# score_type="rougeL"
# train_data, train_data_feats, dev_data, dev_data_feats, test_data, test_data_feats = load_datasets(df_dir, sc_type=score_type)

sc_type="rougeL"
data_type="test"
test_data=pd.read_csv(os.path.join(df_dir,f"{data_type}_data_{sc_type}_tid.csv"))
test_data["text_position"] = test_data.groupby("text_id").cumcount().add(1)
data_stats(test_data)
labels=test_data["rougeL"].values

summaries=pd.read_csv("/kaggle/input/summarizer-data/test_summaries.csv")
summaries=pd.DataFrame(summaries["summary"])
summaries["text_id"]=summaries.index
# summaries.to_csv("/kaggle/working/Data/DataFrames/test_summaries.csv", header=True, index=False)
test_len=test_data.groupby("text_id").size().shape[0]
# summaries["text_id"]=summaries.index
# if "summary" not in summaries.columns:
#     summaries["summary"]=summaries["sum_clean"]
#     del summaries["sum_clean"]

# summaries=summaries.iloc[:dev_len,:]
# dev_data_feats["summary"]=summaries["sum_clean"]
test_data_feats["sentence"]=test_data["sentence"]
test_data_feats["text_id"]=test_data["text_id"]
test_data_feats["text_position"]=test_data["text_position"]

X=test_data_feats
y=pd.DataFrame(labels_test)

            count      mean       std  min       25%       50%       75%  max
chosen                                                                       
0       1006718.0  0.109932  0.127422  0.0  0.051948  0.088889  0.128205  1.0
1         29216.0  0.456340  0.299931  0.0  0.210740  0.389262  0.666667  1.0


In [49]:
tot_r1=[]
tot_r2=[]
tot_rL=[]
global doc_p_bar
doc_p_bar=tqdm(total=summaries.shape[0], desc="Dev Set Summary Scoring", bar_format="{postfix[0]}: {postfix[1]:.10f} | {postfix[2]}: {postfix[3]:.10f} | {postfix[4]}: {postfix[5]:.10f} ( {postfix[6][value]}/{postfix[7]} )", postfix=["Mean Rouge1", np.mean(tot_r1), "Mean Rouge2", np.mean(tot_r2), "Mean RougeL", np.mean(tot_rL), dict(value=0) ,summaries.shape[0]], leave=True)

threshold=0.3
scores=X.groupby("text_id").apply(lambda x: doc_summary(model,x,summaries,x.text_id.values[0],threshold) )

print(f"\nMean Rouge1: {np.mean(r1)}\nMean Rouge2: {np.mean(r2)}\nMean RougeL: {np.mean(rL)}\n\n")


Mean Rouge1: 0.4309419020 | Mean Rouge2: 0.3651046939 | Mean RougeL: 0.4148576511 ( 23/54311 )

Mean Rouge1: 0.2568807339 | Mean Rouge2: 0.2056074766 | Mean RougeL: 0.2201834862 ( 0/54311 )[A

By MATT SCHWARTZ in Houston and WENDELL JAMIESON in New York Daily News Writers

Saturday, October 14th 1995, 4:22AM

Bleeding from a massive chest wound, Tejano star Selena cried, "Help me!

Bleeding from a massive chest wound, Tejano star Selena cried, "Help me! Help me! I've been shot!"and then named her killer with her dying breath. Shaken witnesses yesterday told a spellbound Houston courtroom how the blood-covered, mortally wounded 23-year-old Hispanic singing sensation burst into the lobby of the Corpus Christi Days Inn last March 31. Gasping for breath, Selena told motel workers that Yolanda Saldivar the president of her fan club shot



Mean Rouge1: 0.2568807339 | Mean Rouge2: 0.2056074766 | Mean RougeL: 0.2201834862 ( 0/54311 )[A

By MATT SCHWARTZ in Houston and WENDELL JAMIESON in New York Daily News Writers

Saturday, October 14th 1995, 4:22AM

Bleeding from a massive chest wound, Tejano star Selena cried, "Help me!

Bleeding from a massive chest wound, Tejano star Selena cried, "Help me! Help me! I've been shot!"and then named her killer with her dying breath. Shaken witnesses yesterday told a spellbound Houston courtroom how the blood-covered, mortally wounded 23-year-old Hispanic singing sensation burst into the lobby of the Corpus Christi Days Inn last March 31. Gasping for breath, Selena told motel workers that Yolanda Saldivar the president of her fan club shot



Mean Rouge1: 0.5045871560 | Mean Rouge2: 0.4704049844 | Mean RougeL: 0.4801223242 ( 1/54311 )[A

By HOLLY RAMER, Associated Press

CONCORD, N.H. -- A sick American engineer who was successfully evacuated from the South Pole to New Zealand is awaiting the results of medical tests after having what doctors believed was a stroke in August.

By HOLLY RAMER, Associated Press CONCORD, N.H. -- A sick American engineer who was successfully evacuated from the South Pole to New Zealand is awaiting the results of medical tests after having what doctors believed was a stroke in August.



Mean Rouge1: 0.4059317759 | Mean Rouge2: 0.3579940497 | Mean RougeL: 0.3755556606 ( 2/54311 )[A

Everyone is so excited about all the new businesses that are moving into the notoriously down-market Mid-Market area of San Francisco.

Breathless stories have extolled the benefits of having all these tech-savvy, &quot;new economy&quot; companies move into a stretch of real estate that's been withering legitimate businesses for decades.  It certainly seemed like a good opportunity to take a look at some of the businesses that have thrived in this neighborhood.  Mid-Market and the Tenderloin are home to a thriving ecosystem of micro industries, and these entrepreneurs are in it for the long haul.  Corner for stolen goods&quot;Fenced goods dealers are all at the southwest corner of Market and Seventh,&quot; Deleano Seymour told me as we walked around the neighborhood.  The northeast corner of Market is pretty quiet, but farther down the block, between Seventh and Sixth, there's another, larger, group of young gentlemen who are usually there.  Just weed, stolen goods, and then the women 


Mean Rouge1: 0.3995117759 | Mean Rouge2: 0.3599801454 | Mean RougeL: 0.3752108837 ( 3/54311 )[A

Vaslav Nijinsky, the great Russian dancer who was driven from the stage by madness in 1917 at the peak of his fame, recorded his thoughts and ravings in the next two years in a diary that he signed ''God Nijinsky.''

Vaslav Nijinsky, the great Russian dancer who was driven from the stage by madness in 1917 at the peak of his fame, recorded his thoughts and ravings in the next two years in a diary that he signed ''God Nijinsky.''   Although the diary was published in 1946 while Nijinsky was still alive, and the original handwritten volumes were sold at auction in 1979 for more than $100,000, a long-suppressed preface by the psychoanalyst Alfred Adler has now been printed for the first time, in The Archives of General Psychiatry.   The preface is the only published first-hand professional opinion of Nijinsky, whose mental illness led to consultations with all the foremost experts of his day, including Sigmund Freud and Carl Jung.  Its publication was prevented by Nijinsky's wife, Romola,


Mean Rouge1: 0.3829264799 | Mean Rouge2: 0.3175273142 | Mean RougeL: 0.3626757364 ( 4/54311 )[A

· Callie Shell, her camera and the would-be president guardianweekly.co.uk\/politics

Rhoda Buchanan:  For 18 months, photographer Callie Shell has enjoyed intimate access to the man who would be the 44th president of the United States of America



Mean Rouge1: 0.3371512685 | Mean Rouge2: 0.2721662693 | Mean RougeL: 0.3197934883 ( 5/54311 )[A

A unit of CBS Corp. said Wednesday it has formed a joint venture with billionaire Anil Ambani's Reliance Broadcast Network to launch television channels in India, joining a host of foreign broadcasters hoping to tap a fast-growing market.

The channels will broadcast series such as "Hawaii Five-0," as well as current shows like "CSI" and "Entertainment Tonight" via cable or satellite.



Mean Rouge1: 0.3815458215 | Mean Rouge2: 0.3239297994 | Mean RougeL: 0.3663577638 ( 6/54311 )[A

On a recent morning, New Line Cinema bosses Bob Shaye and Michael Lynne sat in their offices on Seventh Ave. looking over audience research on their mega movie "The Lord of the Rings," which opens Wednesday.

On a recent morning, New Line Cinema bosses Bob Shaye and Michael Lynne sat in their offices on Seventh Ave. looking over audience research on their mega movie "The Lord of the Rings," which opens Wednesday.
   "It has incredible awareness," Lynne said, jumping up from his seat. "This is the moment before something very incredible can happen."
   "I tend to be conservative," Shaye said more hesitantly. "With some



Mean Rouge1: 0.3573667775 | Mean Rouge2: 0.2879375994 | Mean RougeL: 0.3438662819 ( 7/54311 )[A

Now that Matthew Weiner has put the lie to “Leave It to Beaver” and “Make Room for Daddy,” revealing the denial, need, and aggression behind the perfect 1950s family façade, we wait.

At the end of the first half of season seven, all signs pointed to a happy ending, to the tune of “The Best Things in Life Are Free.”



Mean Rouge1: 0.3537729569 | Mean Rouge2: 0.2628475432 | Mean RougeL: 0.3344796537 ( 8/54311 )[A

July 18, 2014: Audio released by Ukraine’s security agency appears to reveal separatists and Russian military officials acknowledging the shooting down of a civilian plane.

Phone calls allegedly intercepted by pro-Russian rebels fighting in eastern Ukraine appear to confirm that the separatists shot down a commercial aircraft with 295 people onboard, including 27 Australians.



Mean Rouge1: 0.3307026881 | Mean Rouge2: 0.2459453190 | Mean RougeL: 0.3131633215 ( 9/54311 )[A

Only in New York.

ONLY IN NEW YORK: Some of today's most provocative and authentic Latin Caribbean literature is being created by Dominican, Cuban and Puerto Rican women writers in Flushing, 1,500 miles from the sun of their native islands. 
  They have been doing it for three years in the living room of Prof. Daisy De Filippis' unpretentious home, where the group comes together one Sunday per month. She and Nunzio, her husband, cook for the group. On



Mean Rouge1: 0.3651099427 | Mean Rouge2: 0.2849736853 | Mean RougeL: 0.3490321900 ( 10/54311 )[A

Tim Cook thinks he knows how to put $59.7 billion to good use

One of the things that’s keeping Apple’s AAPL market cap from overtaking Exxon Mobil’s XOM — besides Steve Jobs’ health problems and the world’s unquenchable thirst for petroleum products — is the fear that the company will do something stupid with the nearly $60 billion in cash and marketable securities that seems to be burning a hole in Wall Street’s pocket..“We think that that was an absolutely fantastic use of Apple’s cash.”

[Follow Philip Elmer-DeWitt on Twitter @philiped]

Tim Cook thinks he knows how to put $59.7 billion to good use One of the things that's keeping Apple's market cap from overtaking Exxon Mobil's -- besides Steve Jobs' health problems and the world's unquenchable thirst for petroleum products -- is the fear that the company will do something stupid with the nearly $60…



Mean Rouge1: 0.3759989215 | Mean Rouge2: 0.3009873006 | Mean RougeL: 0.3611579190 ( 11/54311 )[A

Discussions about startups often focus on founders or investors, but most people in the startup game are regular employees.

Discussions about startups often focus on founders or investors, but most people in the startup game are regular employees. So how do you find a startup job? By David Beisel, contributor “I want to work for a startup.” It’s a common statement, but a “startup” can be very different things. The primary dimension on which…



Mean Rouge1: 0.3581644121 | Mean Rouge2: 0.2794882077 | Mean RougeL: 0.3398722030 ( 12/54311 )[A

NEW YORK — When a woman calls Fidelity about her investments there, her questions are likely to be handled a bit differently from those asked by a man.

Fidelity, which provides employer-based retirement accounts for more than 13 million workers, along with other retirement plan providers, have begun tailoring their messages for specific employees — including women, but also Hispanics, members of specific generations or income groups, or those, say, who may be borrowing significant sums from their 401(k) plans. Other providers are testing ways to nudge workers to save more with just one click.



Mean Rouge1: 0.4009534513 | Mean Rouge2: 0.3275223272 | Mean RougeL: 0.3838807228 ( 13/54311 )[A

The co-pilot responsible for the Germanwings crash repeatedly suggested the captain take a break and leave the cockpit, before locking him out and slamming the pane into the French Alps.

The co-pilot responsible for the Germanwings crash repeatedly suggested the captain take a break and leave the cockpit, before locking him out and slamming the pane into the French Alps.



Mean Rouge1: 0.4045282218 | Mean Rouge2: 0.3276077373 | Mean RougeL: 0.3857692349 ( 14/54311 )[A

WASHINGTON, Jan. 26— A large national gathering of Christian evangelicals, including hundreds of television and radio broadcasters, was urged today to bolster the campaign by evangelical political activists to combat secular humanism, which was described as ''the established religion of America.''.Mr. Sweeting also criticized television evangelism for running the risks of ''extravagant spending and an elegant lifestyle,'' resorting to ''unwise methods of fund-raising ... substituting entertainment for the word of God'' and ''adding our own culture to the Gospel.''

A large national gathering of Christian evangelicals, including hundreds of television and radio broadcasters, was urged today to bolster the campaign by evangelical political activists to combat secular humanism, which was described as ''the established religion of America.''   The challenge was delivered by the Rev. D. James Kennedy, the pastor of Coral Ridge Presbyterian Church in Fort Lauderdale, Fla., and a leading figu


Mean Rouge1: 0.4052139129 | Mean Rouge2: 0.3324165013 | Mean RougeL: 0.3875583958 ( 15/54311 )[A

BADEN— BADEN, West Germany, Sept. 29 - A Roy al Canadian Mountie in abright red tunic converses in German, and a C anadian Indian dressed in white deerskin and wearing a headdress of eagle feathers asks, ''Wie gehts?

BADEN, West Germany, Sept. 29 - A Roy al Canadian Mountie in abright red tunic converses in German, and a C anadian Indian dressed in white deerskin and wearing a headdress of eagle feathers asks, ''Wie gehts?'', which means ''How are you?''   There are beautiful Korean dresses called ''Han Boks'' on display and signs that say ''Sverige,'' which means ''Sweden.'' There are Japanese lanterns, and, from the Italians, a re-creation of an entire range of the Alps, 15 feet high with ski trails down its slopes.   The scene is the Alte Bahnhof, the old train station that has been divided into five sections and converted to an exhibition hall where cities from Canada, Italy, Sweden, Japan and South Korea are displaying the pleasures and advantages of holding a Winter or Summer Ol


Mean Rouge1: 0.4044304239 | Mean Rouge2: 0.3348756743 | Mean RougeL: 0.3872619417 ( 16/54311 )[A

HOUSTON, March 27— After hours of concern over predicted weather conditions at the landing site, officials of the space agency decided today not to order the space shuttle Columbia back to earth a day early but to proceed with plans for a landing Monday as scheduled.

After hours of concern over predicted weather conditions at the landing site, officials of the space agency decided today not to order the space shuttle Columbia back to earth a day early but to proceed with plans for a landing Monday as scheduled.   The seven-day test flight of the re-usable winged spaceship is now set to end at 2:27 P.M. Eastern standard time on a desert landing strip at the White Sands Missile Range in southern New Mexico.   By going the full duration, the astronauts, Col. Jack R. Lousma of the Marine Corps and Col. C. Gordon Fullerton of the Air Force, will have circumnavigated the earth 115 times, logging 3.4 million miles since their launching last Monday. Improved Weather Forecast   The decision to


Mean Rouge1: 0.4108454432 | Mean Rouge2: 0.3442776658 | Mean RougeL: 0.3945805653 ( 17/54311 )[A

Credit ratings agencies play a significant role in whether or not a company — or a country — falls into fiscal catastrophe.

Credit ratings agencies play a significant role in whether or not a company -- or a country -- falls into fiscal catastrophe. For now, they're all leaving Japan alone. The three largest credit rating agencies have said that it's too early to decide whether the recent earthquake will lead to downgrades for Japanese sovereign debt. Fitch…



Mean Rouge1: 0.4283984091 | Mean Rouge2: 0.3631293563 | Mean RougeL: 0.4129467751 ( 18/54311 )[A

LendingClub made a splashy debut Thursday on the New York Stock Exchange, pricing above the range at $15, then taking off like a rocket in early trading—at one point climbing 67 percent higher than the initial offering price.

LendingClub made a splashy debut Thursday on the New York Stock Exchange, pricing above the range at $15, then taking off in early trading.



Mean Rouge1: 0.4296435065 | Mean Rouge2: 0.3571753506 | Mean RougeL: 0.4127631625 ( 19/54311 )[A

Sean Penn and Kate del Castillo are reportedly under investigation for meeting with El Chapo while he was on the run.

Hollywood actor Sean Penn is reportedly under investigation for an interview he conducted with Mexican drug lord Joaquin "El Chapo" Guzm&aacute;n.



Mean Rouge1: 0.4144432606 | Mean Rouge2: 0.3409401074 | Mean RougeL: 0.3961657027 ( 20/54311 )[A

As he put it, “I don’t exactly think Google is going to be interested in me.”

Tech workers are in high demand in Massachusetts and around the country, but experienced tech workers say this hot job market is passing them by.



Mean Rouge1: 0.4125730567 | Mean Rouge2: 0.3414618930 | Mean RougeL: 0.3950901753 ( 21/54311 )[A

The also-ran presidential candidate Fred Thompson has proposed a voluntary flat tax.

The also-ran presidential candidate Fred Thompson has proposed a voluntary flat tax. Back when another candidate, some guy associated with a magazine that competes against Fortune, advocated a flat tax, I always thought it was a kooky idea. Now, after doing a modicum of reporting on tax policy, I'm not so sure the idea is…



Mean Rouge1: 0.4104362761 | Mean Rouge2: 0.3419401965 | Mean RougeL: 0.3936818481 ( 22/54311 )[A

NEW HAVEN, April 27— Vincent Scully did not want to retire, but Yale University insisted, and so it was time for the last lecture of the class he started teaching in 1947.

Vincent Scully did not want to retire, but Yale University insisted, and so it was time for the last lecture of the class he started teaching in 1947. He was not expecting it to be all that different from the others until he arrived on Friday to see that the audience more or less constituted the course title: Modern Architecture.    Sitting there among the freshmen taking the introductory course was Philip Johnson, 85 years old, the designer of the A.T. & T. Building and so much else. Not far away was Maya Lin, 31, who designed the Vietnam War Memorial during her senior year at Yale in 1981. Scattered around the auditorium were dozens of other architects somewhere in between those two ages.



Mean Rouge1: 0.4309419020 | Mean Rouge2: 0.3651046939 | Mean RougeL: 0.4148576511 ( 23/54311 )[A

The firstborn son of Germany’s Prince Ferdinand was a no-show Monday at Manhattan Family Court, leaving his countess ex-wife cooling her heels — and no closer to collecting on the $2.5 million she says he owes in child support.

The firstborn son of Germany’s Prince Ferdinand was a no-show Monday at Manhattan Family Court, leaving his countess ex-wife cooling her heels — and no closer to collecting on the $2.5 million she …



Mean Rouge1: 0.4489825980 | Mean Rouge2: 0.3855449643 | Mean RougeL: 0.4335169722 ( 24/54311 )[A

Metal legends Danzig have a strict no photo policy at all shows -- and failure to follow said policy will get you a quick ass whupping ... as one fan just learned in Southern Cali.

Metal legends Danzig have a strict no photo policy at all shows -- and failure to follow said policy will get you a quick ass whupping ... as one…



Mean Rouge1: 0.4574432187 | Mean Rouge2: 0.3947223113 | Mean RougeL: 0.4425503938 ( 25/54311 )[A

A drunk and belligerent Johnny Depp wound up like a “baseball pitcher” and hurled a cell phone at his wife Amber Heard’s face during a violent meltdown two days before she filed for divorce, the actress claimed Friday.

Johnny Depp wound up like a “baseball pitcher” and hurled a cell phone at wife Amber Heard’s face during a drunken rage.



Mean Rouge1: 0.4436569813 | Mean Rouge2: 0.3806250859 | Mean RougeL: 0.4292960430 ( 26/54311 )[A

More from AOL.com: Canada sounds alarm over aboriginal teenage suicide epidemic Florida mother shot in the back by her 4-year-old son Police spread fake Ebola-infected meth story in hopes of fooling drug users

A Bridgewater, Conn., family wrote an impassioned and poignant obituary for their son, who died of a heroin overdose.



Mean Rouge1: 0.4378709498 | Mean Rouge2: 0.3675000829 | Mean RougeL: 0.4192489737 ( 27/54311 )[A

Family flees dream home to escape poisonous spiders 'bleeding out of the walls'

The home left abandoned by the Trost family in a country club in suburban St. Louis.

%Slideshow-232044%  By RYAN GORMAN  A Mi ouri family was forced to flee their home when it was infested with thousands of brown recluse spiders.  Brian and Susan Tro



Mean Rouge1: 0.4261992281 | Mean Rouge2: 0.3561350359 | Mean RougeL: 0.4073207915 ( 28/54311 )[A

Photo: Michael Macor, The Chronicle

Chronicle columnists,Phil Matier and Andy Ross , stand for a photograph inside the studio on Thursday Jan. 29, 2008 in San Francisco,Calif.

According to Cisneros, the Dalys bought their first home from a young couple who have one child and had another on the way.  Daly declined to speak to us, but told The Chronicle's Marisa Lagos that his parents took out a line of equity to pay for each home, and that he plans to pay them back - in part, by refinancing the properties and his condo in San Francisco, leaving him with a mortgage on all three.  Top political fundraiser Paige Barry Arata, who worked on both of Newsom's mayoral campaigns and served as his confidential secretary at City Hall during his first term, is bowing out as the finance director for his gubernatorial bid and returning to her old job at City Hall.  Barry Arata's departure comes as the campaign is coming under increasing pressure to bring in more money to stay competitive with Newso


Mean Rouge1: 0.4419751961 | Mean Rouge2: 0.3740752187 | Mean RougeL: 0.4237057414 ( 29/54311 )[A

was not under oath, but he still told the truth to TMZ on his way out of jail ... he was drunk when cops busted him for DUI.

Kevin Hart was not under oath, but he still told the truth to TMZ on his way out of jail ... he was drunk when cops busted him for DUI. As TMZ first…



Mean Rouge1: 0.4346738879 | Mean Rouge2: 0.3637440638 | Mean RougeL: 0.4143711870 ( 30/54311 )[A

GANDER, Newfoundland — It could have been a short, sweet story: planes get diverted, local people pitch in to help stranded passengers.."We are good people."

Half a dozen or so isolated communities in Newfoundland have been embraced by strangers who dropped from the sky and changed their lives.



Mean Rouge1: 0.4263119567 | Mean Rouge2: 0.3537150584 | Mean RougeL: 0.4066244892 ( 31/54311 )[A

A speculative comment on an online article has theorised how Boris Johnson was passed a poisoned chalice by Eton buddy David Cameron the morning after the referendum, and that Brexit may never happen.

If true, it could be a brilliant act of political manoeuvring that saves David Cameron from forever being remembered as the bungling UK Prime Minister that blew it for Britain.



Mean Rouge1: 0.4137733698 | Mean Rouge2: 0.3433116743 | Mean RougeL: 0.3946649454 ( 32/54311 )[A

Earlier this month, clothing manufacturer and retailer American Apparel, Inc. filed for Chapter 11 bankruptcy.

The company will need to either move production offshore or automate production — perhaps even use cutting-edge robotics — if it ever hopes to prosper again.



Mean Rouge1: 0.4305227021 | Mean Rouge2: 0.3620741979 | Mean RougeL: 0.4119602326 ( 33/54311 )[A

Playboy's first non-nude issue has been on sale for the past month, and the media empire claims its re-robed strategy is working with advertisers.

Playboy's first non-nude issue has been on sale for the past month, and the media empire claims its re-robed strategy is working with advertisers.
“I think Cuomo expects Hillary to be indicted on the email scandal and that he also expects [Vice President] Joe Biden to come in as the nominee and that he’d like to be his running mate,’’ Cox told The Post.

Gov. Andrew Cuomo has sent the strongest signal yet that he’s ready to run for president if a Democrat fails to win in November, even as top Democrats confide that Cuomo’s presidential campaign is …



Mean Rouge1: 0.4265002460 | Mean Rouge2: 0.3527573220 | Mean RougeL: 0.4070103993 ( 34/54311 )[A
Mean Rouge1: 0.4310205097 | Mean Rouge2: 0.3580446132 | Mean RougeL: 0.4120574156 ( 35/54311 )[A

Turns out familiarity isn't the only thing that breeds contempt ... so does not paying child support, because

cited for contempt of court for allegedly falling WAY behind, and also failing to maintain a million-dollar policy on his life.

Turns out familiarity isn't the only thing that breeds contempt ... so does not paying child support, because Shauna Sand wants Lorenzo Lamas cited…



Mean Rouge1: 0.4358721967 | Mean Rouge2: 0.3633592287 | Mean RougeL: 0.4174081313 ( 36/54311 )[A

Walmart can afford to buy as it builds, but even a galloping “unicorn,” as start-ups valued at more than $1 billion are known, will struggle to tow the lumbering retailer into a competitive position online.

Even a galloping “unicorn” will struggle to tow the lumbering retailer into a competitive position online.



Mean Rouge1: 0.4503370121 | Mean Rouge2: 0.3796833510 | Mean RougeL: 0.4323463844 ( 37/54311 )[A

You know that it’s gotten bad when Dan Rather and Joe Scarborough agree on something.

You know that it’s gotten bad when Dan Rather and Joe Scarborough agree on something.



Mean Rouge1: 0.4511267796 | Mean Rouge2: 0.3806850944 | Mean RougeL: 0.4323810983 ( 38/54311 )[A

ST. LOUIS—The City Museum here has a five-story jungle gym with two real-life jets kids can climb on.

The City Museum in St. Louis has a five-story jungle gym with two real-life jets kids can climb on. There's an enclosed "Monster Slide" that drops riders the length of three staircases, and a rooftop Ferris wheel. Despite the whiff of danger, or perhaps because of it, the City Museum is one of St. Louis's most popular attractions.



Mean Rouge1: 0.4573403302 | Mean Rouge2: 0.3873284146 | Mean RougeL: 0.4390518606 ( 39/54311 )[A

VIDEOCambridge native Casey Affleck hosted this week’s “Saturday Night Live,” which, of course, means the episode wouldn’t have been complete without at least one or two jabs at Boston.

This week’s episode wouldn’t have been complete without at least one or two jabs at Boston. It delivered.



Mean Rouge1: 0.4481519550 | Mean Rouge2: 0.3781063095 | Mean RougeL: 0.4302989251 ( 40/54311 )[A

Who did Charity Navigator rate highly?

Should you be considering an act of generosity this holiday season, it's always important to research a charity before clicking 'donate.'



Mean Rouge1: 0.4398439814 | Mean Rouge2: 0.3693131395 | Mean RougeL: 0.4224061383 ( 41/54311 )[A

Note, too, that leather seats and satellite navigation systems are options on the SE and XLE, while stability and traction control (packaged as a single option) can be had with all four trim grades.

The sixth-generation Camry looks and handles better than the previous incarnation, but the changes will in no way alienate those who have long prized the sedan for its comfort and durability.



Mean Rouge1: 0.4328778303 | Mean Rouge2: 0.3609196591 | Mean RougeL: 0.4158363018 ( 42/54311 )[A

Surprisingly enough, Baselitz is selling Germany short.

George Baselitz's exhibition of woodcuts is a series of hand grenades thrown at the ruling cliches of the Renaissance, writes Jonathan Jones



Mean Rouge1: 0.4289930900 | Mean Rouge2: 0.3528992222 | Mean RougeL: 0.4101797245 ( 43/54311 )[A

It looks like Apple fanboys will have two new gadgets to stand in line for this fall..Bloomberg also notes that the iPads could be using anti-reflective coating on its screens to make the device more readable, but the move could slow down production time.

New devices could put Apple back on a more stable footing in the mobile device world.



Mean Rouge1: 0.4239089348 | Mean Rouge2: 0.3457709783 | Mean RougeL: 0.4039138874 ( 44/54311 )[A

• Partners HealthCare posts $22m loss, first in 15 years

• Boston Medical Center, Tufts in merger talks

• Syre: Neighborhood’s ills give pain to Partners

• Partners pays a price after bid for South Shore Hospital

• Partners HealthCare leader to step down

• Partners HealthCare chief met a trail of resistance

Everyone seems to be blaming Partners HealthCare for driving up medical costs. The mammoth organization is trying to change that. Too bad it’s by squeezing the paychecks of workers on the bottom rung.



Mean Rouge1: 0.4158567137 | Mean Rouge2: 0.3384141489 | Mean RougeL: 0.3962870929 ( 45/54311 )[A

The recent closing of the Reading Athletic Club in Reading is the latest example of the potential risks of gym memberships.

Before you sign a contract, know what you’re agreeing to and make sure you’re joining a gym that you’ll like.



Mean Rouge1: 0.4154438573 | Mean Rouge2: 0.3392805208 | Mean RougeL: 0.3960756663 ( 46/54311 )[A

MOSCOW, Sept. 11— The Soviet Union's campaign against the Polish independent trade union, Solidarity, took a new turn today as meetings were held at factories in Moscow and elsewhere at which Solidarity was denounced for actions at its congress this week.

The Soviet Union's campaign against the Polish independent trade union, Solidarity, took a new turn today as meetings were held at factories in Moscow and elsewhere at which Solidarity was denounced for actions at its congress this week.   At Moscow's giant Zil truck and auto plant, and at similar gatherings in Leningrad and other cities, workers were assembled to approve an open letter that spoke of a bid by Solidarity for a ''counterrevolution.'' The letter, approved in Moscow, accused the Polish union of trying to provoke workers in the Soviet Union and in other Eastern-bloc nations to ''betray'' Communism.   The factory meetings were portrayed as an effort by Soviet workers to warn Poles of the dangers of backing Solidarity. But 


Mean Rouge1: 0.4141129707 | Mean Rouge2: 0.3393806336 | Mean RougeL: 0.3951400489 ( 47/54311 )[A

DALLAS CHOLESTEROL, a vital substance but a lethal one when it gets in the wrong places, is never more deadly than in people born with a genetic defect that dooms them to an early death from heart disease.

DALLAS CHOLESTEROL, a vital substance but a lethal one when it gets in the wrong places, is never more deadly than in people born with a genetic defect that dooms them to an early death from heart disease.   Seven years ago, two medical scientists elucidated how this defect endangers its victims and how the body's cells use cholesterol. Now, they are studying promising new treatments that may have general application, preventing heart disease by drastically lowering blood cholesterol levels.   The doctors, Michael S. Brown and Joseph L. Goldstein of the University of Texas Health Sciences Center here, are known locally as ''the gold dust twins'' for the many prizes and generous private support their work has drawn. Their investigations revealed that some people are born with an erro


Mean Rouge1: 0.4224072879 | Mean Rouge2: 0.3487398099 | Mean RougeL: 0.4038138245 ( 48/54311 )[A

WASHINGTON, July 15 (Reuters) - President Barack Obama, seeking to sell the Iran nuclear deal to skeptical U.S. lawmakers and the American public, insisted on Wednesday that the landmark agreement was the best way to avoid a nuclear arms race and more war in the Middle East.

WASHINGTON, July 15- President Barack Obama, seeking to sell the Iran nuclear deal to skeptical U.S. lawmakers and the American public, insisted on Wednesday that the landmark agreement was the best way to avoid a nuclear arms race and more war in the Middle East. Obama made his case in a nationally televised news conference a day after Iran and six world powers...



Mean Rouge1: 0.4282286792 | Mean Rouge2: 0.3555572366 | Mean RougeL: 0.4099997935 ( 49/54311 )[A

(New throughout, adds court ruling, comment from Tullow, background)

HAMBURG, April 25 (Reuters) - An international maritime tribunal on Saturday ruled that Ghana can continue developing a $4.9 billion dollar offshore oil project in an area caught up in a border dispute with Ivory Coast but must not start new drilling.

HAMBURG, April 25- An international maritime tribunal on Saturday ruled that Ghana can continue developing a $4.9 billion dollar offshore oil project in an area caught up in a border dispute with Ivory Coast but must not start new drilling. Ivory Coast in February asked the Hamburg- based International Tribunal for the Law of the Sea to issue a moratorium on oil...



Mean Rouge1: 0.4234900158 | Mean Rouge2: 0.3487195975 | Mean RougeL: 0.4047375597 ( 50/54311 )[A

Holding an executive level job may come with one less perq these days: the ability to take a restful vacation.

About two-thirds of executives have postponed or canceled vacation plans in the last year, a new study by recruitment firm Korn Ferry found.



Mean Rouge1: 0.4175960742 | Mean Rouge2: 0.3421399824 | Mean RougeL: 0.3991974381 ( 51/54311 )[A

ET on FX.

"Threesomes are amateur hour compared to what's going on here," the producer tells PEOPLE



Mean Rouge1: 0.4104905599 | Mean Rouge2: 0.3358040568 | Mean RougeL: 0.3924326393 ( 52/54311 )[A

Click ahead for the list.By Winston WooPosted 18 Mar 2011*Country-by-country data based on International Monetary Fund World Economic Database, October 2010; United Nations Development Program's Human Development Index; Transparency International's Corruption Perceptions Index.

So, which dictators have been in power the longest and how have their economies fared under their rule? Find out!



Mean Rouge1: 0.4051050173 | Mean Rouge2: 0.3308004569 | Mean RougeL: 0.3873754225 ( 53/54311 )[A

FORTUNE – In textbook economics, lower interest rates typically spur higher investments.

For all the attention policymakers have placed on the Fed’s actions over interest rates, the cost of borrowing is far from the problem.



Mean Rouge1: 0.3997025010 | Mean Rouge2: 0.3248933059 | Mean RougeL: 0.3822895061 ( 54/54311 )[A

Deorick and Denis Williams with the happy parents.

Caught in the chaos and excitement that follows the arrival of a new baby, a US grandmother unwittingly invited a complete stranger to her daughter-in-law&rsquo;s birthing suite.



Mean Rouge1: 0.4093419754 | Mean Rouge2: 0.3358139219 | Mean RougeL: 0.3922344716 ( 55/54311 )[A

Nearly two years after its first public hearings on the matter, the Federal Trade Commission finally weighed in with new native advertising guidelines for print and digital publishers.

Nearly two years after its first public hearings on the matter, the Federal Trade Commission finally weighed in with new native advertising guidelines for print and digital publishers. While it doe…



Mean Rouge1: 0.4058030039 | Mean Rouge2: 0.3300240267 | Mean RougeL: 0.3868792678 ( 56/54311 )[A

“It depends on how late the game is … I try to have sex as much as I can,” Munn said.

Aaron Rodgers can still remember his close encounter of the third kind. The Packers quarterback revealed on Pete Holmes’ “You Made It Weird” podcast, as reported by the Milwaukee …



Mean Rouge1: 0.4019516213 | Mean Rouge2: 0.3244303991 | Mean RougeL: 0.3827432996 ( 57/54311 )[A

(TARIQ CAMP, Iraq) — Iraq’s special forces completed a troop buildup around Fallujah on Sunday ahead of an operation to retake the Islamic State-held city west of Baghdad, a military officer said, as the militants attacked a newly-liberated town to the west.

The city is one of the last major ISIS strongholds in Iraq



Mean Rouge1: 0.3971392201 | Mean Rouge2: 0.3190232258 | Mean RougeL: 0.3776221063 ( 58/54311 )[A

“And I finished it,” Trump added of his debate rejoinder: “I guarantee you there is no problem.”

The GOP insurgency barely laid a Mitt on him. Donald Trump eked out victories in Kentucky and Louisiana on Saturday, with Texas Sen. Ted Cruz hot on his heels in those two delegate-rich states. Tru…



Mean Rouge1: 0.3912490335 | Mean Rouge2: 0.3139715265 | Mean RougeL: 0.3719632592 ( 59/54311 )[A

Jewish American history books don't usually evoke images of the California Gold Rush.

[...] among the waves of risk-takers flooding the Sierra foothills in the late 1840s and early '50s were masses of Jews from Germany willing to brave a rough-hewn life in order to stake their claim, pan for gold and hit the fiscal big time.  "Much of (this California history) is omitted from most of what you'd call general history books of Jewish history, which really means New York Jews, " says Marc Dollinger, who holds the Richard and Rhoda Goldman endowed chair in Jewish Studies and Social Responsibility at San Francisco State University.  Dollinger and Ava F. Khan, a historian of western Jewish life who has served as a visiting professor of history at UC Berkeley and UC Davis, have edited a large-format book, "California Jews" (Brandeis University Press), which spotlights some of this hidden history.  The book's 15 essays highlight other lesser-known aspects of Jewish history as well: the influen


Mean Rouge1: 0.3973809615 | Mean Rouge2: 0.3193439022 | Mean RougeL: 0.3784062481 ( 60/54311 )[A

TAMPA, Fla.— Hillary Clinton named Virginia Sen. Tim Kaine to be her running mate Friday night, turning to a seasoned politician from a battleground state and a figure long seen as the favorite to complete the Democratic presidential ticket.

Hillary Clinton picked Virginia Sen. Tim Kaine as her running mate, turning to a seasoned politician from a battleground state and a figure long seen as the favorite for the slot.



Mean Rouge1: 0.4023380481 | Mean Rouge2: 0.3247406762 | Mean RougeL: 0.3836645206 ( 61/54311 )[A

Care.com Inc. said Wednesday that it has received a $46.4 million investment from a growth-equity fund linked to Google parent company Alphabet Inc. and used the funds to buy back company shares..The investment by Google Capital makes it the largest shareholder of Care.com., which is an online marketplace offering a range of services ranging...

Care.com said Wednesday that it has received a $46.35 million investment from a growth-equity fund linked to Google parent company Alphabet Inc. and used the funds to buy back company shares.



Mean Rouge1: 0.3974719707 | Mean Rouge2: 0.3196666031 | Mean RougeL: 0.3786167322 ( 62/54311 )[A

I would love to meet someone who genuinely wants to find a life partner, to love and be loved, someone who feels that life is pretty good but would be even better sharing it with someone special.

Telegraph Dating Member Profile: snooky48 - in need of stimulation. I'm cute and sassy and a little bit sarcy! I'm definitely not your average 48 year old. I...



Mean Rouge1: 0.3979709641 | Mean Rouge2: 0.3194373000 | Mean RougeL: 0.3794058062 ( 63/54311 )[A

“Modern life,” Hemingway once wrote, “is often a mechanical oppression and liquor is the only mechanical relief.” It’s a pertinent observation if we remember that it was written in an age before other drugs had become widely available, but it’s also one of the premises of Olivia Laing’s charming and gusto-driven look at the alcoholic insanity of six famous writers, namely John Cheever, Tennessee Williams, John Berryman, Ernest Hemingway, F. Scott Fitzgerald and Raymond Carver.

In “The Trip to Echo Spring,” Olivia Laing chronicles the alcoholism of six writers: John Cheever, Tennessee Williams, John Berryman, Ernest Hemingway, F. Scott Fitzgerald and Raymond Carver.



Mean Rouge1: 0.3995765102 | Mean Rouge2: 0.3211427955 | Mean RougeL: 0.3810540360 ( 64/54311 )[A

WASHINGTON—Retired Army Lt. Gen. Michael Flynn, a possible vice presidential pick for presumptive Republican nominee Donald Trump, came out in favor of abortion rights on Sunday, taking a stand on a divisive issue that puts him sharply at odds with the majority of Republican officeholders, activists and voters..Instead, they timidly nibble around the edges of the battlefields from Africa to the Middle East, and act as if each fight, whether in Syria, Iraq, Nigeria, Libya or Afghanistan, can be peacefully resolved by diplomatic effort.”

Write to Byron Tau at byron.tau@wsj.com

Retired Army Lt. Gen. Michael Flynn, a possible vice presidential pick by Donald Trump, came out in favor of abortion rights Sunday, putting him sharply at odds with the majority of Republicans.



Mean Rouge1: 0.3976310514 | Mean Rouge2: 0.3163496194 | Mean RougeL: 0.3776628736 ( 65/54311 )[A

TEMPE, Ariz. -- David Johnson's NFL career is only 28 games old, and already he is being compared to some of the great running backs of the game.

Johnson joins Hall of Famer, Marshall Faulk, as the only players to have 1,000 yards rushing and 700 yards receiving through 12 games.



Mean Rouge1: 0.3989136607 | Mean Rouge2: 0.3176716839 | Mean RougeL: 0.3792391326 ( 66/54311 )[A

A woman has been sexually assaulted by two men outside her home in a small town in southern WA..The second man is described as fair skinned, of medium build with shoulder-length brown hair, and was wearing a white t-shirt, blue jeans and sandals.

A woman has been sexually assaulted by two men outside her home in a town about 40km northeast of Albany.



Mean Rouge1: 0.4070294643 | Mean Rouge2: 0.3265398836 | Mean RougeL: 0.3876400743 ( 67/54311 )[A

WASHINGTON — Republicans poised to control the Federal Communications Commission next month said they will revisit the Net neutrality regulation “as soon as possible,” laying out plans to address a rule they’ve opposed and that Democrats support.

Republicans poised to control the Federal Communications Commission next month said they’d revisit the net neutrality regulation “as soon as possible,” laying out plans to address a rule they’ve opposed and that Democrats support.



Mean Rouge1: 0.4015357850 | Mean Rouge2: 0.3218750281 | Mean RougeL: 0.3824233862 ( 68/54311 )[A

"I'm going unescorted," Kennedy told me yesterday, "but there will be a lot of single men at my table."

Note to Lil' Kim video director Kirk Fraser: You can say "The check's in the mail"only so many times before your pants catch on fire. Fraser repeatedly promised to make things right after the Daily News reported a month ago that he bounced more than 18 checks to crew members who toiled furiously on the last-minute music video shoots in September, before Kim went to jail.



Mean Rouge1: 0.4037186857 | Mean Rouge2: 0.3248200763 | Mean RougeL: 0.3846305278 ( 69/54311 )[A

BY MARISA GUTHRIE DAILY NEWS STAFF WRITER

Monday, August 9th 2004, 9:35AM

Jane Pauley and the folks behind her new daytime talk show are trying to lure female viewers by addressing a highly sensitive issue - hair styles.

By MARISA GUTHRIE DAILY NEWS STAFF WRITER J ane Pauley and the folks behind her new daytime talk show are trying to lure female viewers by addressing a highly sensitive issue - hair styles. Pauley's, not theirs. In fact, one of the promo spots for "The Jane Pauley Show,"premiering Aug. 30 at 11 a.m. on WNBC\/Ch. 4, is a comical montage of the longtime newswoman's many different hair styles. See Jane with long,



Mean Rouge1: 0.4004699723 | Mean Rouge2: 0.3211099684 | Mean RougeL: 0.3813848732 ( 70/54311 )[A

DAVID HANDSCHUH DAILY NEWS Among those who helped treat Handschuh at Bayonne Hospital were (l. to r.) emergency room

manager Patricia Carey, and nurses Pattie Spuma and Susan Reyes.

By DAVID HANDSCHUH DAILY NEWS photographer Don't worry, Brother. We'll get you out."Buried in chunks of rubble and debris, my legs crushed, I first heard those words - my introduction to the men and women I now know as my guardian angels: Fire Lt. Tom McGoff, Firefighters Tommy Michel and Jimmy Hart, who found me, dug me out and went in search of others. Firefighters Phil McArdle and Jeff Borkowski, who carried me to a



Mean Rouge1: 0.3949840823 | Mean Rouge2: 0.3167112017 | Mean RougeL: 0.3761604228 ( 71/54311 )[A

STEAMER NIAGARA, MISSISSIPPI RIVER July 6, 1863.

On the cloudy 3d, at 7 A.M. a flag of truce was shown on the right of the left wing, General ORD'S corps at which signal firing ceased except at the extrence left, where the signal was seen.



Mean Rouge1: 0.3955864655 | Mean Rouge2: 0.3175937865 | Mean RougeL: 0.3770171798 ( 72/54311 )[A

LOS ANGELES — The 17th annual

, the largest and most prestigious public literary event in the country, today announced its 2012

.

LOS ANGELES &mdash; The 17th annual  Los Angeles Times 

Festival of Books , the largest and most prestigious public literary event in the country, today announced its 2012  program schedule . Returning to the 

beautiful University of Southern California campus on April 21-22, the Festival is an L.A. institution known for its celebration 

of the written word in all its forms and is expected to draw more than 150,000 attendees.



Mean Rouge1: 0.3979590381 | Mean Rouge2: 0.3209213917 | Mean RougeL: 0.3796373429 ( 73/54311 )[A

Those who think that if a little vitamin C is good, more must be better should think again, says a team of British researchers, who found that a supplement of 500 milligrams a day could damage people's genes.

Those who think that if a little vitamin C is good, more must be better should think again, says a team of British researchers, who found that a supplement of 500 milligrams a day could damage people's genes.    Many Americans take that much, or more, in hopes of preventing colds and reaping the widely celebrated  antioxidant benefits of vitamin C. Antioxidants, which block cellular and molecular damage caused by the highly reactive molecules called free radicals, are believed to protect against heart disease, cancer, eye disorders like cataracts and macular degeneration, and other chronic health problems.



Mean Rouge1: 0.3932748145 | Mean Rouge2: 0.3166987418 | Mean RougeL: 0.3750101677 ( 74/54311 )[A

“That’s good enough for me.”

Lou Reed was uncompromising and visionary, chasing his muse through a haunted house of topics that were still taboo when he emerged with the Velvet Underground in the mid 1960s. Rock music had never quite addressed the seedy underbelly that Reed explored with such candor and compassion. His songs were a flashlight in the dark, grappling head on with spirituality, drug addiction, and sexual appetites and behaviors that well beyond the Beatles’ “I Want to Hold Your Hand.” His native New York City was his palette, and he painted its more unsavory characters – junkies, hustlers, renegades – in broad strokes and vivid hues. Reed, who died on Sunday at 71, often got less credit for another sterling achievement. He wrote some of popular music’s most unabashedly beautiful songs, from “I’ll Be Your Mirror” to “Femme Fatale.”



Mean Rouge1: 0.3946172753 | Mean Rouge2: 0.3189467571 | Mean RougeL: 0.3765898317 ( 75/54311 )[A

Gene-splicing technology is capable of revolutionizing important areas of agriculture, but such applications are hampered by a shortage of Federal money and trained scientists, according to an independent survey of the field by two consulting concerns.

Gene-splicing technology is capable of revolutionizing important areas of agriculture, but such applications are hampered by a shortage of Federal money and trained scientists, according to an independent survey of the field by two consulting concerns.   The long-range potential for gene-splicing technology in agriculture is far greater than in medicine, spokesmen for the concerns said in announcing the publication of their report yesterday. Yet the Federal Government spends only about one-tenth as much, they said, in relevant basic agricultural research as it does in health-related studies. They estimated that the total Federal support of basic gene-splicing research related to agriculture was currently $6 million a year.



Mean Rouge1: 0.3989772317 | Mean Rouge2: 0.3241321096 | Mean RougeL: 0.3811809092 ( 76/54311 )[A

PORTLAND, Ore. — A federal judge has struck down Oregon’s same-sex marriage ban, saying it is unconstitutional.

PORTLAND, Ore. — A federal judge has struck down Oregon's same-sex marriage ban, saying it is unconstitutional. U.S. District Judge Michael McShane threw out the voter-approved ban Monday.



Mean Rouge1: 0.4037620568 | Mean Rouge2: 0.3294535455 | Mean RougeL: 0.3861910043 ( 77/54311 )[A

Leonard Riggio, founder and chairman of Barnes & Noble (NYSE: BKS), has expressed interest in acquiring the company’s retail business and spinning out its Nook tablet unit.www.barnesandnobleinc.com

The Knight Capital Group (NYSE: KCG) plans to sell its credit-brokerage group to Stifel Financial Corp. (NYSE: SF), according to Bloomberg..http:\/\/www.drtv.com

Sign up for Dan’s daily email newsletter on deals and deal-makers: GetTermSheet.com

Leonard Riggio, founder and chairman of Barnes & Noble (NYSE: BKS), has expressed interest in acquiring the company’s retail business and spinning out its Nook tablet unit.www.barnesandnobleinc.com The Knight Capital Group (NYSE: KCG) plans to sell its credit-brokerage group to Stifel Financial Corp. (NYSE: SF), according to Bloomberg. Knight Capital recently agreed to be acquired by Getco.www.knight.com Opera Software (OSLO: OPERA)…



Mean Rouge1: 0.4007558475 | Mean Rouge2: 0.3258672911 | Mean RougeL: 0.3828942290 ( 78/54311 )[A

U.S. President Barack Obama on Tuesday nominated former Bank of Hawaii chief executive Allan Landon to join the U.S. Federal Reserve’s board of governors.

If confirmed, Allan Landon would have a permanent vote on monetary policy and help implement the 2010 Dodd-Frank financial oversight law.



Mean Rouge1: 0.4078011897 | Mean Rouge2: 0.3334637007 | Mean RougeL: 0.3901600850 ( 79/54311 )[A

WASHINGTON — The Federal Aviation Administration on Tuesday issued permits to use drones to monitor crops and photograph properties for sale, marking the first time permission has been granted to companies involved in agriculture and real estate.

The Federal Aviation Administration on Tuesday issued permits to use drones to monitor crops and photograph properties for sale, marking the first time permission has been granted to companies involved ­agriculture and real estate.



Mean Rouge1: 0.4038442647 | Mean Rouge2: 0.3293970702 | Mean RougeL: 0.3859101653 ( 80/54311 )[A

For Karl Marx, it was the "spectre haunting Europe"; for Richard Nixon, a "curious and twisted philosophy".

Francesca Martin: Fragrance designer Christophe Laudamiel has put together a collection of extinct or conceptual smells at the Reg Vardy gallery in Sunderland, released to visitors by means of motion sensors



Mean Rouge1: 0.3994913602 | Mean Rouge2: 0.3254284308 | Mean RougeL: 0.3817733343 ( 81/54311 )[A

Detlev Guenzel was found guilty of "murder motivated by sexual lust and disturbing the peace of the dead".

A German former police officer has been sentenced to eight years and six months in jail for killing a willing victim he met on a website for cannibalism fetishists.



Mean Rouge1: 0.3966978340 | Mean Rouge2: 0.3220833834 | Mean RougeL: 0.3786674508 ( 82/54311 )[A

The complexity of the homeowner policy, in ''plain English'' or not, clearly calls out for the professional guidance of a qualified broker in a reputable firm.

Nowadays, when liability claims can be substantial, even bankrupting, consumers have a greater need to understand what they are buying.   This is especially true since insurance can now be ordered over the telephone as easily as pizza and since agents are often too busy to explain the fine print on the policies, which yield relatively low sales commissions. (By July 1, however, homeowner polices must be written in ''plain English'' according to New York State law.)   One of the biggest personal risks is a liability claim that you or a member of your family has negligently injured someone or damaged his property. Most basic homeowner policies contain $25,000 personal liability coverage, but many experts call it inadequate in view of the potential risks and current court awards. This basic coverage also has a few odd exclusions, 


Mean Rouge1: 0.4002660948 | Mean Rouge2: 0.3264574324 | Mean RougeL: 0.3824478337 ( 83/54311 )[A

As part of an agreement designed to save thousands of jobs, Conrail employees in the Transport Workers Union have voted to accept deferrals of wage increases over the next three years, union leaders announced yesterday.

As part of an agreement designed to save thousands of jobs, Conrail employees in the Transport Workers Union have voted to accept deferrals of wage increases over the next three years, union leaders announced yesterday.   The union, which represents 4,000 of Conrail's 71,000 employees, was the first of 13 unions that are party to the accord to report its ratification by members.



Mean Rouge1: 0.4011801314 | Mean Rouge2: 0.3279770304 | Mean RougeL: 0.3834052860 ( 84/54311 )[A

BREMEN, West Germany— When the space shuttle Columbia thundered off its launching pad at Cape Canaveral, Fla., earlier this month, a small circle of German engineers, eyes fixed on a television screen here, were particularly excited.

When the space shuttle Columbia thundered off its launching pad at Cape Canaveral, Fla., earlier this month, a small circle of German engineers, eyes fixed on a television screen here, were particularly excited.   The engineers were sitting across a field from the metal buildings that house the assembly line for Spacelab, a reusable orbiting laboratory scheduled to go into space in the shuttle's voluminous cargo bay in late 1983.   ''Columbia is America's dream,'' said Manfred Fuchs, director of project development at Erno Raumfahrttechnik, the West German aerospace company that is Spacelab's prime contractor. ''If that dream doesn't work, ours won't either.''



Mean Rouge1: 0.4053431043 | Mean Rouge2: 0.3329392475 | Mean RougeL: 0.3877725676 ( 85/54311 )[A

Wallace Robinson, the 6-foot-8-inch center who led the St. Louis University basketball team in rebounds, was dismissed from the team on Jan. 2 because of a fight in which he broke the nose and blackened the eyes of his coach, Ron Ekker, according to a published report.

Wallace Robinson, the 6-foot-8-inch center who led the St. Louis University basketball team in rebounds, was dismissed from the team on Jan. 2 because of a fight in which he broke the nose and blackened the eyes of his coach, Ron Ekker, according to a published report.  The fight, the report said, occurred in Ekker's hotel room in Indianapolis after Robinson and another player had missed the bus for a game at Butler, which the Billikens won.



Mean Rouge1: 0.4021573872 | Mean Rouge2: 0.3291558470 | Mean RougeL: 0.3843130308 ( 86/54311 )[A

Everybody likes a well-planned and well-executed heist.

To all those who are criminals at heart, the Harvard Film Archive is offering a chance to indulge forbidden desires next weekend. Their “Heist Night” movie marathon starts Friday at 7 p.m. and ends around 12 hours later.



Mean Rouge1: 0.4056644471 | Mean Rouge2: 0.3326224235 | Mean RougeL: 0.3876995618 ( 87/54311 )[A

SAN DIEGO, Sept. 2, 2015 \/PRNewswire\/ -- In an effort to help customers lower their energy bills, San Diego Gas & Electric (SDG&E) has launched a new online store, SDG&E Marketplace, for customers to quickly and easily shop for energy saving products offered by third-party retailers..To view the original version on PR Newswire, visit:http:\/\/www.prnewswire.com\/news-releases\/sdge-launches-new-energy-efficiency-marketplace-website-300137030.html

SOURCE San Diego Gas & Electric (SDG&E)

SAN DIEGO, Sept. 2, 2015\/ PRNewswire\/-- In an effort to help customers lower their energy bills, San Diego Gas& Electric has launched a new online store, SDG&E Marketplace, for customers to quickly and easily shop for energy saving products offered by third-party retailers. At the SDG&E Marketplace, customers will be able to shop for rebate-eligible products, such as...
* Dollar\/yen coasts up after Japan lawmaker stirs BOJ easing hopes

* Sterling supported by optimistic BOE

* Aussie briefly hits


Mean Rouge1: 0.4083922065 | Mean Rouge2: 0.3352258314 | Mean RougeL: 0.3897656045 ( 88/54311 )[A
Mean Rouge1: 0.4052780064 | Mean Rouge2: 0.3319593353 | Mean RougeL: 0.3865813671 ( 89/54311 )[A

Char siu - barbecued pork

Compensated dating - relationship in return for cash

Dai pai dong - open-air food stall

Guanxi - personal connections that aid business

Lucky money - cash given in red envelopes

Sandwich class - squeezed middle class

Milk tea - local speciality

Siu mei - type of dim sum

Yum cha - type of breakfast

Wet market - market for fresh fish, meat and other produce

Ang moh - a light-skinned person, Westerner

Chilli crab - regional delicacy

Chinese helicopter - person who speaks little English

Hawker centre - food market with individual vendors

HDB - public housing estate

Killer litter - lethal falling rubbish

Lepak - to loiter aimlessly

Sabo - to harm, make trouble

Sabo king - a troublemaker

Sotong - squid or cuttlefish

Teh tarik - sweet tea with milk

Wah - an expression of delight.An Oxford English Dictionary.

Words most commonly heard on the streets of Hong Kong and Singapore like "yum cha" and "wah" have entered the linguistic mainstream, with t


Mean Rouge1: 0.4086104673 | Mean Rouge2: 0.3359788467 | Mean RougeL: 0.3901170523 ( 90/54311 )[A

, has fired off a cease and desist letter to the publicist who claimed Chrystal demanded he leak nude photos of her eldest daughter -- however she does admit sending him scantily clad pics.

Ariel Winter's mother, Chrystal Workman, has fired off a cease and desist letter to the publicist who claimed Chrystal demanded he leak nude photos of her…



Mean Rouge1: 0.4110666669 | Mean Rouge2: 0.3391573424 | Mean RougeL: 0.3927721059 ( 91/54311 )[A

The owners of nearly half a million polluting Volkswagens in the U.S. will be given the option of selling them back to the company or getting them repaired at VW's expense, under a deal announced by a federal judge on Thursday

Senior U.S. District Judge Charles Breyer, who is overseeing a tangle of litigation over the VW emissions scandal, gave no details on how much car owners would be paid but said the tentative agreement between the automaker, the U.S. government and plaintiffs' attorneys would include "substantial compensation."

The owners of nearly half a million polluting Volkswagens in the U.S. will be given the option of selling them back to the company or getting them repaired at VW's expense, under a deal announced by a federal judge on Thursday



Mean Rouge1: 0.4082504414 | Mean Rouge2: 0.3355492856 | Mean RougeL: 0.3896315620 ( 92/54311 )[A

Fiat Chrysler Automobiles NV said on Monday it would investigate a crash that killed Star Trek actor Anton Yelchin in his recalled 2015 Jeep Grand Cherokee.

The car maker sent a letter to vehicle owners after announcing the recall in April



Mean Rouge1: 0.4117224919 | Mean Rouge2: 0.3397193701 | Mean RougeL: 0.3932996007 ( 93/54311 )[A

Monday night’s Council of Fashion Designers of America Awards brought out some of the shiniest superstars in the fashion, modeling and acting worlds, but many chose surprisingly somber black looks..Beyoncé won the event’s coveted Fashion Icon Award, while designer Marc Jacobs was named Women’s Wear Designer of the Year.

Monday night’s Council of Fashion Designers of America Awards brought out some of the shiniest superstars in the fashion, modeling and acting worlds, but many chose surprisingly somber black looks.…



Mean Rouge1: 0.4084930379 | Mean Rouge2: 0.3365461238 | Mean RougeL: 0.3899089445 ( 94/54311 )[A

Â Identical twins Sarah Mariuz and Leah Rodgers have shared everything from clothes to toys over the years, but never did the sisters think they would give birth on the same day – let alone at the exact same time.

"There have just been certain things in our life where things just match up – it's odd," Sarah Mariuz tells PEOPLE



Mean Rouge1: 0.4124332919 | Mean Rouge2: 0.3411228421 | Mean RougeL: 0.3940407871 ( 95/54311 )[A

An 80-year-old Katoomba man has been found safe after going missing from a nursing home overnight in the NSW Blue Mountains.

An 80-year-old Katoomba man has been found safe after going missing from a nursing home on Saturday night.



Mean Rouge1: 0.4095002991 | Mean Rouge2: 0.3376419968 | Mean RougeL: 0.3906577179 ( 96/54311 )[A

US inmate Thomas Arthur.

An inmate set to die by lethal injection in the southern US state of Alabama was granted an 11th-hour reprieve late Thursday, his latest stay of execution.



Mean Rouge1: 0.4076086014 | Mean Rouge2: 0.3342314716 | Mean RougeL: 0.3878340148 ( 97/54311 )[A


Meanwhile, Novak is still very much in the acting game — he’s appearing as former McDonald’s President Harry J. Sonneborn in “The Founder,” a biopic of Ray Kroc coming in January.

Actor and comedian B.J. Novak, best known for his performance as the ambitious but luckless Ryan Howard on the television show “The Office,” is writing a follow-up to his bestselling children’s book, “The Book With No Pictures.”
Edgeio, the online classifieds service that has appeared on Mashable a few times, are announcing today that they’ve raised $5 million in Series A financing in a round led by Intel Capital - other investors include Transcosmos Investments and Business Development Inc..Now that’s died down, they seem to be building a decent service - it’s just going to take a while.

Edgeio, the online classifieds service that has appeared on Mashable a few times, are announcing today that they've raised $5 ...


Mean Rouge1: 0.4085325154 | Mean Rouge2: 0.3357672056 | Mean RougeL: 0.3889556746 ( 98/54311 )[A
Mean Rouge1: 0.4101453476 | Mean Rouge2: 0.3380233359 | Mean RougeL: 0.3907623370 ( 99/54311 )[A

Women's groups expressed disgust yesterday over plans by Tupac Shakur's record company to help bail the gangsta rapper and convicted sex abuser out of prison while he appeals his conviction.

Women's groups expressed disgust yesterday over plans by Tupac Shakur's record company to help bail the gangsta rapper and convicted sex abuser out of prison while he appeals his conviction. "That's shocking,"said feminist Betty Friedan. "It certainly confirms the impression that rap is very, very anti-woman and lethal to women.""It sends the message that violence against women isn't serious,"said Charlotte Watson, executive director of My Sister's Place, a battered women's



Mean Rouge1: 0.4110262756 | Mean Rouge2: 0.3393439628 | Mean RougeL: 0.3918332944 ( 100/54311 )[A

By TIM SMITH and DAVE SALTONSTALL DAILY NEWS STAFF WRITERS

Sunday, July 29th 2001, 2:22AM

Many men have died in battle aboard the Intrepid, the towering aircraft carrier docked on the Hudson River.

By TIM SMITH and DAVE SALTONSTALL DAILY NEWS STAFF WRITERS Many men have died in battle aboard the Intrepid, the towering aircraft carrier docked on the Hudson River. But Beethavean Scottland, a boxer who was beaten to death aboard the ship during a state-sanctioned fight last month, shouldn't have been one of them. That's the conclusion of a Daily News investigation into the June 26 fight - a tragedy that many believe underscores serious problems with



Mean Rouge1: 0.4070357292 | Mean Rouge2: 0.3360493612 | Mean RougeL: 0.3880290877 ( 101/54311 )[A

Let me just say there are very few places that could lure me away.’’

Opera Boston’s production of Berlioz’ “Beatrice et Benedict’’ opens in two weeks and Lesley Koenig, the company’s new general director, is taking notes. She talks with director David Kneuss during a break. He’s glad to listen. Kneuss knows Koenig’s background. In her previous life, at the Metropolitan Opera, they worked together as directors. She also served as the San Francisco Ballet’s general manager for eight years. She speaks fluent Italian, German and French, and has a business degree from Stanford.



Mean Rouge1: 0.4078084141 | Mean Rouge2: 0.3372559871 | Mean RougeL: 0.3888229257 ( 102/54311 )[A

NEW YORK - Carl Icahn is suing Amylin Pharmaceuticals Inc., saying he and other shareholders should get more time to nominate candidates to the diabetes drug maker’s board of directors.

Billionaire investor Carl Icahn is suing Amylin Pharmaceuticals Inc., saying he and other shareholders should get more time to nominate candidates to the diabetes drug maker’s board of directors. Icahn said Monday that Amylin is “at a crossroads’’ and stockholders deserve a chance to nominate new directors. The activist investor said he wants to nominate directors who will listen to offers for the company and accused the current board of diluting the value of Amylin shares. He filed a lawsuit against the San Diego company in Delaware court.



Mean Rouge1: 0.4062067277 | Mean Rouge2: 0.3362588648 | Mean RougeL: 0.3874020534 ( 103/54311 )[A

It could simply be that the country is desperately in need of a laugh these days, but the situation comedy is showing renewed signs of life.

It could simply be that the country is desperately in need of a laugh these days, but the situation comedy is showing renewed signs of life. Just when the sit-com scene was threatening to disintegrate into endless imitations of ''The Dukes of Hazzard,'' crammed with shapely bodies and screeching automobile tires, producers apparently have decided to venture in less mindless directions.   ABC's ''The Greatest American Hero'' is not a situation comedy in the strictest sense of that label. For one thing, it runs for 60 minutes instead of the standard half-hour. For another, in blending fantasy with action-adventure, producer Stephen J. Cannell is using the character of an idealistic teacher in his late 20's to illustrate cleverly the serious thesis that it's not easy to be a superhero.  During a recent panel discussion about video art at Manhattan's


Mean Rouge1: 0.4101678619 | Mean Rouge2: 0.3408053077 | Mean RougeL: 0.3915405902 ( 104/54311 )[A

By Anne Dolce The Daily Meal

These days, you can get any kind of chicken wing your heart desires.

By Anne Dolce
The Daily Meal
These days, you can get any kind of chicken wing your heart desires. Whether you're craving honey-glazed barbecue, teriya...
“They are meeting obligations by things that are least helpful of homeowners,’’ she said of lenders..Nearly 4,000 Massachusetts borrowers have received some kind of housing-debt relief this year as part of a national mortgage settlement involving five major lenders, with each homeowner getting an average of $67,457 in assistance, according to federal report released Monday.

Nearly 4,000 Massachusetts borrowers have received some kind of housing-debt relief this year as part of a national mortgage settlement involving five major lenders, with each homeowner averaging about $67,457 in financial assistance, according to federal report released Monday. Massachusetts borrowers received $266 million of the more than $21.92 billion disbursed 


Mean Rouge1: 0.4114947720 | Mean Rouge2: 0.3420318907 | Mean RougeL: 0.3924682253 ( 105/54311 )[A
Mean Rouge1: 0.4116334369 | Mean Rouge2: 0.3427344700 | Mean RougeL: 0.3927830620 ( 106/54311 )[A

Jamie Dimon, chairman and CEO of JPMorgan Chase, will testify before the Senate Banking Committee on Wednesday on the now infamous trading loss that has reached about $3 billion.

Jamie Dimon,  chairman and CEO of JPMorgan Chase, will testify before the Senate Banking Committee on Wednesday on the now infamous trading loss that has reached about $3 billion.  A preview of the testimony reveals that he will again apologize for letting “a lot of people down” but will say that  traders in Chief Investment Unit “did not have the requisite understanding of the risks they took.” He will defend the bank’s multibillion-dollar loss saying:  The Chief Investment Unit, where the loss happened, did something it shouldn’t have done and, “as a result, we have let a lot of people down, and we are…



Mean Rouge1: 0.4155978091 | Mean Rouge2: 0.3472847012 | Mean RougeL: 0.3969203734 ( 107/54311 )[A

By Jason Notte The Street

Just because a company shelled out billions to become a National Football League sponsor doesn't mean it won't take a beating from rivals the day of the Super Bowl.

By Jason Notte
The Street
Just because a company shelled out billions to become a National Football League sponsor doesn't mean it won't take a beatin...



Mean Rouge1: 0.4137959317 | Mean Rouge2: 0.3449297066 | Mean RougeL: 0.3943660248 ( 108/54311 )[A

Sunday night’s episode of “Breaking Bad” unfolds like a symphony, with show creator Vince Gilligan as the maestro..The quieter “Breaking Bad” gets, the more we hear.

As the premiere of “Breaking Bad” moves forward from long scene to long scene, it goes from strength to strength.  I  realize that the kudos for “Breaking Bad” sometimes veer into hyperbole. And yet, if I want to be honest, I can’t do anything but lavish praise on the show, which returns Sunday night at 9 for the final eight episodes. “Breaking Bad” actually may become one of the rare prestige TV dramas that doesn’t falter in its later seasons. If Sunday’s episode is any indication of the quality of the last seven hours of season 5, the show may ultimately have a close-to-perfect run.



Mean Rouge1: 0.4179019613 | Mean Rouge2: 0.3495442395 | Mean RougeL: 0.3986470986 ( 109/54311 )[A

Diamonds are forever - but they're not for every bride.

Diamonds are forever - but they're not for every bride.$500M$3,200



Mean Rouge1: 0.4154934452 | Mean Rouge2: 0.3464233088 | Mean RougeL: 0.3964105008 ( 110/54311 )[A

and Tony Bennett, are still scheduled to perform in Israel in the coming months.

"This is a major disappointment for the band and fans," the group said



Mean Rouge1: 0.4136993966 | Mean Rouge2: 0.3437509295 | Mean RougeL: 0.3944087509 ( 111/54311 )[A

P&G earlier Friday reported a 38% jump in fiscal fourth-quarter profit, though that growth was due to a cut in overhead expenses as overall sales trends were choppy.

Businesses P&G intends to either discontinue, or divest, have reported weaker profits of late, CEO says.


KeyboardInterrupt: 

___
___
# **Notes**


1. scoring -> label rougeL -> sentence feats -> train
2. grid search ?! (or manual fine tuning)
3. test -> input doc -> predict score -> keep N first sentences or keep those over a threshold -> create summary -> calculate rouge1/2/L

___
### **References**

1. [Named Entity Recognition (NER) with TensorflowNamed Entity Recognition (NER) with Tensorflow](https://www.kaggle.com/code/naseralqaydeh/named-entity-recognition-ner-with-tensorflow)
2. [Extractive Summarization using Deep LearningExtractive Summarization using Deep Learning](https://arxiv.org/pdf/1708.04439v1.pdf)
3. [NLTK](https://www.bogotobogo.com/python/NLTK/Stemming_NLTK.php)
4. [Text Features Library](https://github.com/pmbaumgartner/text-feat-lib/tree/master/notebooks)
5. []()



### **Feats**
1. [Feature extraction](https://arxiv.org/pdf/1708.04439v1.pdf)
    1. Number of thematic words
    2. Sentence position
    3. Sentence length
    4. Sentence position relative to paragraph
    5. Number of proper nouns
    6. Number of numerals
    7. Number of named entities
    8. Term Frequency-Inverse Sentence Frequency
    9. Sentence to Centroid similarity
    
    
2. [Text Summarization References](https://github.com/Tian312/awesome-text-summarization/blob/master/README.md)



___
### **Feature Base**

The feature base model extracts the features of the sentence, then evaluate its importance. Here is the representative research.
Sentence Extraction Based Single Document Summarization
Following features are used in the above method.

1. Position of the sentence in the input document
2. Presence of the verb in the sentence
3. Length of the sentence
4. Term frequency
5. Named entity tag NE
6. Font style

…etc. All the features are accumulated as the score.
The No.of coreferences are the number of pronouns to the previous sentence. It is simply calculated by counting the pronouns occurred in the first half of the sentence. So the Score represents the reference to the previous sentence.
Now we can evaluate each sentence. Next is selecting the sentence to avoid the duplicate of the information. In this paper, the same word between the new and selected sentence is considered. And the refinement to connect the selected sentences are executed.
Luhn’s Algorithm is also feature base. It evaluates the “significance” of the word that is calculated from the frequency.
You can try feature base text summarization by TextTeaser (PyTeaser is available for Python user).

# Unused

In [None]:
# train_set = f"..{os.sep}Data{os.sep}release{os.sep}train.jsonl"
# dev_set = f"..{os.sep}Data{os.sep}release{os.sep}dev.jsonl"
# test_set = f"..{os.sep}Data{os.sep}release{os.sep}test.jsonl"
# load json files and convert them to dataframes to load faster next time
# train_df = funs.json_to_df(train_set,"train")
# dev_df = funs.json_to_df(dev_set,"dev")
# test_df = funs.json_to_df(test_set,"test")



In [None]:
# colab command to download the dataset
!kaggle datasets download -d tkylafi/summarizer-data