In [1]:
import datetime as dt
import pandas as pd

In [11]:
def parse_millisecond_timestamp(ts: int) -> dt.datetime:
    return dt.datetime.fromtimestamp(ts / 1000, tz=dt.timezone.utc)

df = pd.read_csv(
    "news.csv",
    sep="\t",
    header=None,
    index_col=0,
    names=["title", "url", "outlet", "category", "cluster", "host", "tstamp"],
    parse_dates=["tstamp"],
    date_parser=parse_millisecond_timestamp,
    dtype={
        "outlet": "category",
        "category": "category",
        "cluster": "category",
        "host": "category",
    },
)

In [12]:
df.iloc[0]

title       Fed official says weak data caused by weather,...
url         http://www.latimes.com/business/money/la-fi-mo...
outlet                                      Los Angeles Times
category                                                    b
cluster                         ddUyU0VZz0BRneMioxUPQVP6sIxvM
host                                          www.latimes.com
tstamp                       2014-03-10 16:52:50.698000+00:00
Name: 1, dtype: object

In [14]:
df.groupby('outlet', sort=False)['title'].apply(lambda ser: ser.str.contains('Fed').sum()).nlargest(10)

outlet
Reuters                         161
NASDAQ                          103
Businessweek                     93
Investing.com                    66
Wall Street Journal \(blog\)     61
MarketWatch                      56
Moneynews                        55
Bloomberg                        53
GlobalPost                       51
Economic Times                   44
Name: title, dtype: int64

In [15]:
title, ser = next(iter(df.groupby('outlet', sort=False)['title']))
title

'Los Angeles Times'

In [16]:
ser.head()

1       Fed official says weak data caused by weather,...
486            Stocks fall on discouraging news from Asia
1124    Clues to Genghis Khan's rise, written in the r...
1146    Elephants distinguish human voices by sex, age...
1237    Honda splits Acura into its own division to re...
Name: title, dtype: object

In [19]:
ser.str.contains('Fed').head(5)

1        True
486     False
1124    False
1146    False
1237    False
Name: title, dtype: bool

In [20]:
ser.str.contains('Fed').sum

<bound method Series.sum of 1          True
486       False
1124      False
1146      False
1237      False
1250      False
1525      False
1551      False
1598      False
1633      False
1941      False
2120      False
2239      False
2405      False
2966      False
3060      False
3299      False
3667      False
3864      False
3987      False
4169      False
4264      False
4357      False
4391      False
4616      False
5051      False
5112      False
5179      False
5394      False
5655      False
          ...  
414834    False
414866    False
414965    False
415438    False
417039    False
417125    False
417379    False
417453    False
417580    False
417743    False
417892    False
418038    False
418358    False
418468    False
418712    False
418722     True
418954    False
419097    False
419131    False
419723    False
420317    False
420793    False
421282    False
421431    False
421480    False
421547    False
421584    False
421972    False
422226    False
422905    Fa

In [22]:
mentions_fed = df['title'].str.contains('Fed')
type(mentions_fed)

pandas.core.series.Series