In [1]:
import pandas as pd
from gensim import models, corpora
from gensim.parsing.preprocessing import preprocess_string, strip_tags, strip_punctuation
from gensim.parsing.preprocessing import remove_stopwords, stem_text, strip_non_alphanum, strip_multiple_whitespaces
from gensim.parsing.preprocessing import strip_short, strip_numeric
from gensim.parsing.preprocessing import STOPWORDS
from gensim.models.doc2vec import Doc2Vec, TaggedDocument
from sklearn.utils import class_weight
import multiprocessing as mp
import nltk
import pandas as pd
import numpy as np
import tensorflow as tf
import parmap
import ast

In [2]:
topics_index_to_name_map = {
    0: 'Agriculture, animals, food and rural affairs',
    1: 'Asylum, immigration and nationality',
    2: 'Business, industry and consumers',
    3: 'Communities and families',
    4: 'Crime, civil law, justice and rights',
    5: 'Culture, media and sport',
    6: 'Defence',
    7: 'Economy and finance',
    8: 'Education',
    9: 'Employment and training',
    10: 'Energy and environment',
    11: 'European Union',
    12: 'Health services and medicine',
    13: 'Housing and planning',
    14: 'International affairs',
    15: 'Parliament, government and politics',
    16: 'Science and technology',
    17: 'Social security and pensions',
    18: 'Social services',
    19: 'Transport',
    20: 'Others'
}
topics_name_to_index_map = {y:x for x,y in topics_index_to_name_map.items()}

topics = list(topics_name_to_index_map.keys())

def strip_short2(text):
    return strip_short(text, minsize=4)


def preprocess_text(text):
    FILTERS = [lambda x: x.lower(), strip_multiple_whitespaces, strip_tags, strip_punctuation,
                   strip_non_alphanum, strip_numeric, strip_short2]
    return preprocess_string(text, FILTERS)

def preprocess(topic):
    ret = []
    topic = topic.strip()
    
    if '|' in topic:
        topics = topic.split('|')
        t = topics[0]
        t = t.strip()
        return topics_name_to_index_map[t]
        
    return topics_name_to_index_map[topic]

In [3]:
year = 2018

In [5]:
news_pred = pd.read_csv('../data/news_predictions/news_{}_predictions.csv'.format(year))

In [6]:
sources = news_pred['source'].unique()

In [7]:
counts = {month:{source: {topic:0 for topic in topics} for source in sources} for month in range(1, 13)}

In [8]:
news_pred.head()

Unnamed: 0.1,Unnamed: 0,source_id,source,day,month,year,program_name,transcript,parliament,top1_topic,top1_acc,top2_topic,top2_acc,top3_topic,top3_acc
0,0,163795,Belfast Telegraph,1,1,2018,Shocking number of nurses giving up jobs in NI...,"Full story, see page 4",,"Parliament, government and politics",26.78,Others,14.83,"Crime, civil law, justice and rights",7.3
1,1,163795,Belfast Telegraph,1,1,2018,City's record run ends on a sore point,But injuries to Gabriel Jesus and this season'...,,Others,50.7,"Culture, media and sport",8.24,Communities and families,7.92
2,2,163795,Belfast Telegraph,1,1,2018,Fears for health service as hundreds of downtr...,Just under 600 nurses and midwives left the Nu...,,Social services,86.75,Health services and medicine,8.91,Others,2.19
3,3,163795,Belfast Telegraph,1,1,2018,Outrage after children witness man being assau...,It marked the end of a year in which almost 10...,,"Parliament, government and politics",33.84,Others,23.8,"Crime, civil law, justice and rights",20.98
4,4,163795,Belfast Telegraph,1,1,2018,Diabetes drug could be used in battle with Alz...,"A so-called ""triple receptor drug"" created to ...",,Health services and medicine,78.47,Others,3.6,Energy and environment,2.34


In [9]:
for index, row in news_pred.iterrows():
    month = row['month']
    source = row['source']
    topic = row['top1_topic']
    counts[month][source][topic] += 1


In [10]:
rows = []

In [11]:
for month in range(1, 13):
    for source in sources:
        row = [month, source]
        for topic in topics:
            row.append(counts[month][source][topic])
        rows.append(row)
columns = ['month', 'source'] + topics
df = pd.DataFrame(rows, columns=columns)

In [12]:
df = df.sort_values(by=['month', 'source'])

In [13]:
df

Unnamed: 0,month,source,"Agriculture, animals, food and rural affairs","Asylum, immigration and nationality","Business, industry and consumers",Communities and families,"Crime, civil law, justice and rights","Culture, media and sport",Defence,Economy and finance,...,European Union,Health services and medicine,Housing and planning,International affairs,"Parliament, government and politics",Science and technology,Social security and pensions,Social services,Transport,Others
0,1,Belfast Telegraph,198,3,539,47,678,2790,160,331,...,98,481,10,386,2045,1,0,71,502,2470
1,1,Express,381,8,746,31,266,6957,293,751,...,641,711,6,713,1524,2,0,10,795,3638
2,1,Guardian,174,12,214,45,185,1298,50,190,...,87,201,9,327,455,0,0,21,151,1063
3,1,Herald,91,3,190,10,155,544,37,107,...,21,134,2,114,689,0,0,4,158,541
4,1,Independent,243,19,446,40,299,2142,121,365,...,239,427,8,772,586,7,1,20,357,1738
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
210,12,Standard,100,8,126,14,116,1718,40,69,...,120,97,0,95,254,0,0,1,240,983
211,12,Star,167,1,109,8,94,3048,106,30,...,44,114,0,81,295,3,0,6,195,1598
215,12,Sun,40,0,11,8,13,168,3,3,...,3,17,0,6,51,0,0,5,35,247
212,12,Telegraph,431,17,609,34,298,2476,127,428,...,442,327,5,453,1275,1,0,13,467,2279


In [14]:
df.to_csv('counts_news_count_by_source_{}.csv'.format(year))

In [16]:
count_by_month = df.drop(['source'], axis=1).groupby(['month'], as_index=False).sum()

In [17]:
count_by_month.to_csv('count_by_month_{}.csv'.format(year))

# BBC COUNTS

In [8]:
bbc_sources = [54, 106, 107, 175, 279]
years = [2014, 2015, 2016, 2017]

In [10]:
for year in years:
    print(year)
    counts = {month:{source: {topic:0 for topic in topics} for source in bbc_sources} for month in range(1, 13)}
    word_counts = {month:{source: {topic:0 for topic in topics} for source in bbc_sources} for month in range(1, 13)}
    for source in bbc_sources:
        df = pd.read_csv('../data/partition_predictions/topics_pred_on_bert_partitioned_bbc_{}_{}_with_news_classifier_no_Others.csv'.format(source, year))
        df = df.drop(['Unnamed: 0'], axis=1)
        df['date'] = pd.to_datetime(df['date'])
        df['topic'] = df['topic'].apply(lambda x: ast.literal_eval(x)[0][0])
        display(df.head())
        for index, row in df.iterrows():
            month = row['date'].month
            topic = row['topic']
            counts[month][source][topic] += 1
            word_counts[month][source][topic] += len(row.transcript.split())
            
    rows = []
    word_count_rows = []
    
    for month in range(1, 13):
        for source in bbc_sources:
            row = [month, source]
            word_count_row = [month, source]
            
            for topic in topics:
                row.append(counts[month][source][topic])
                word_count_row.append(word_counts[month][source][topic])
            rows.append(row)
            word_count_rows.append(word_count_row)
            
    columns = ['month', 'source'] + topics
    res_df = pd.DataFrame(rows, columns=columns)
    res_df.to_csv('./bbc_partition_count_by_source_topic_{}.csv'.format(year), index=False)
    res_df = pd.DataFrame(word_count_rows, columns=columns)
    res_df.to_csv('./bbc_partition_word_count_by_source_topic_{}.csv'.format(year), index=False)

2014


Unnamed: 0,partition_id,date,transcript,topic
0,0,2014-12-06,"# Oh, yeah # I see your smiling face # Like I’...","Culture, media and sport"
1,0,2014-12-06,Two hostages are killed in Yemen during a fail...,"Parliament, government and politics"
2,0,2014-12-06,"Our hearts are full of sorrow tonight, our pra...","Parliament, government and politics"
3,0,2014-12-06,Ferocious winds and torrential rain in the eas...,"Culture, media and sport"
4,0,2014-12-06,"President Obama has condemned as ""barbaric"" th...",Defence


Unnamed: 0,partition_id,date,transcript,topic
0,0,2014-07-05,"A woman should have two good things, good heel...","Parliament, government and politics"
1,0,2014-07-05,Susan needs 31 points to leap frog Stuart and ...,"Culture, media and sport"
2,0,2014-07-05,"I was a bit over come, I burst into tears and ...","Parliament, government and politics"
3,0,2014-07-05,"(HE LAUGHS) l’ve loved it, l’ve met four reall...","Culture, media and sport"
4,0,2014-07-05,# Bon-bon-bon-bon-bon-bonkers...# Subtitles by...,"Parliament, government and politics"


Unnamed: 0,partition_id,date,transcript,topic
0,0,2014-05-04,# You know it's true # itfc subtitles ~ Welcom...,"Culture, media and sport"
1,0,2014-05-04,This is the scene outside the police Sinn Fein...,"Crime, civil law, justice and rights"
2,0,2014-05-04,Ed Miliband has called for an independent revi...,"Parliament, government and politics"
3,0,2014-05-04,South Korea's President has met with the relat...,"Culture, media and sport"
4,0,2014-05-04,Subtitles It will be a dry night tonight for m...,"Parliament, government and politics"


Unnamed: 0,partition_id,date,transcript,topic
0,0,2014-08-03,Your credit score is yours and at Experian Cre...,"Business, industry and consumers"
1,0,2014-08-03,You’re the photographer that everyone wants to...,"Culture, media and sport"
2,0,2014-08-03,"And at the Halifax, we think people who give e...","Parliament, government and politics"
3,0,2014-08-03,For your chance to enter the £1 Million Mega D...,"Culture, media and sport"
4,0,2014-08-03,Israel under pressure as another UN school is ...,"Culture, media and sport"


Unnamed: 0,partition_id,date,transcript,topic
0,0,2014-03-04,You can always rely on a Premier Inn for a gre...,"Culture, media and sport"
1,0,2014-03-04,But just how much Russian money is already her...,"Culture, media and sport"
2,0,2014-03-04,The pro Russian forces took over the base in S...,"Parliament, government and politics"
3,0,2014-03-04,This is their base and they want to go back to...,Defence
4,0,2014-03-04,Looking out of the gates wondering what is goi...,"Culture, media and sport"


2015


Unnamed: 0,partition_id,date,transcript,topic
0,0,2015-07-05,# You’re stayin’ alive Stayin’ alive # Feel th...,"Parliament, government and politics"
1,0,2015-07-05,The polls have closed in Greece’s crucial refe...,"Parliament, government and politics"
2,0,2015-07-05,"Also on the programme, as he prepares for the ...","Parliament, government and politics"
3,0,2015-07-05,Princess Charlotte has been christened in a pr...,"Culture, media and sport"
4,0,2015-07-05,"And the reigning world champion, after a bad s...","Culture, media and sport"


Unnamed: 0,partition_id,date,transcript,topic
0,0,2015-01-08,"Good evening from Paris, from a city and a nat...",International affairs
1,0,2015-01-08,This and A7 other Tesco stores will never be b...,"Parliament, government and politics"
2,0,2015-01-08,A good day to bury bad news - this was the fir...,"Parliament, government and politics"
3,0,2015-01-08,So the mood remains sombre and reflective in P...,"Parliament, government and politics"
4,0,2015-01-08,Throughout the day we have found ourselves cha...,International affairs


Unnamed: 0,partition_id,date,transcript,topic
0,0,2015-12-05,"# ""Uptown Funk"" Whatever gifts you buy don’t f...",Defence
1,0,2015-12-05,The Defence Secretary says striking oilfields ...,International affairs
2,0,2015-12-05,Subtitles by Deluxe Northern part of the UK wi...,"Parliament, government and politics"
3,0,2015-12-05,"As the worst of the weather heads to the West,...","Culture, media and sport"
4,0,2015-12-05,"These are the few, the fearless, People were s...","Culture, media and sport"


Unnamed: 0,partition_id,date,transcript,topic
0,0,2015-07-04,The migrants who crossed the border- and storm...,International affairs
1,0,2015-07-04,"Moro Sooo to This group is walI unconcerned, u...",Transport
2,0,2015-07-04,"We put our video footage to Eurotunnel, the co...",Transport
3,0,2015-07-04,"Our Political Correspondent, Lewis Vaughan jon...","Parliament, government and politics"
4,0,2015-07-04,All of the 30 British holidaymakers- killed in...,Transport


Unnamed: 0,partition_id,date,transcript,topic
0,0,2015-10-04,An eight-year old boy and a pensioner die in a...,"Parliament, government and politics"
1,0,2015-10-04,Paid leave for grandparents - the Chancellor o...,"Parliament, government and politics"
2,0,2015-10-04,President Obama promises a full investigation ...,International affairs
3,0,2015-10-04,Disappointment for fans as hosts England are d...,"Culture, media and sport"
4,1,2015-10-04,Breaking news - at least 13 dead in flash floo...,"Parliament, government and politics"


2016


Unnamed: 0,partition_id,date,transcript,topic
0,0,2016-06-04,we would have seen during this weekend and the...,"Culture, media and sport"
1,0,2016-06-04,manage that and make sure we stay within the g...,"Culture, media and sport"
2,0,2016-06-04,We start at the desk where you have the three ...,"Culture, media and sport"
3,0,2016-06-04,It helps to correct this atmosphere which is v...,"Culture, media and sport"
4,0,2016-06-04,Twitter’s live streaming video service announc...,"Culture, media and sport"


Unnamed: 0,partition_id,date,transcript,topic
0,0,2016-12-03,Trump blows away four decades of complex diplo...,International affairs
1,0,2016-12-03,Also tonight... Brazil mourns the 71 people ki...,"Parliament, government and politics"
2,0,2016-12-03,The tiny West African state of Gambia throws o...,"Parliament, government and politics"
3,0,2016-12-03,And the unstoppable England rugby union team w...,"Parliament, government and politics"
4,0,2016-12-03,China has Lodged an official complaint after t...,International affairs


Unnamed: 0,partition_id,date,transcript,topic
0,0,2016-05-12,"On Five News, immigration and the economy - th...","Parliament, government and politics"
1,0,2016-05-12,"A senior officer, investigating Ben Needham’s ...","Culture, media and sport"
2,0,2016-05-12,What happened is just unnecessary.I What happe...,"Culture, media and sport"
3,0,2016-05-12,And that sinking feeling - A shock for one mot...,"Parliament, government and politics"
4,0,2016-05-12,The battle for your vote in the European refer...,"Parliament, government and politics"


Unnamed: 0,partition_id,date,transcript,topic
0,0,2016-08-07,Anti-doping chiefs will investigate claims a K...,"Culture, media and sport"
1,0,2016-08-07,"This is major Michael Rottich, he Thls ls majo...","Culture, media and sport"
2,0,2016-08-07,It was hoped that the figures were turned to t...,"Culture, media and sport"
3,0,2016-08-07,"Two British climbers, who died on Europe’s fam...","Parliament, government and politics"
4,0,2016-08-07,Jose Mourinho could win his first trophy as Ma...,"Culture, media and sport"


Unnamed: 0,partition_id,date,transcript,topic
0,0,2016-03-01,THUDDINGI DRUMMING There is a rhythm that beat...,"Culture, media and sport"
1,1,2016-03-01,"This is Sky News at 10pm, live from Virginia, ...","Parliament, government and politics"
2,1,2016-03-01,"Good evening from Virginia, one of 12 states v...","Parliament, government and politics"
3,1,2016-03-01,Let me just show you what they have been doing...,"Parliament, government and politics"
4,1,2016-03-01,And we’re expecting that story to feature in t...,"Parliament, government and politics"


2017


Unnamed: 0,partition_id,date,transcript,topic
0,0,2017-09-02,# ..Love you... # 0n the front line of Britain...,"Culture, media and sport"
1,0,2017-09-02,Put your thumb vertically above her and make s...,"Culture, media and sport"
2,0,2017-09-02,As we step into the first weekend in September...,"Parliament, government and politics"
3,0,2017-09-02,"Fora cardiac arrest we always send two, in cas...","Culture, media and sport"
4,0,2017-09-02,I like a woman who’s sexually experienced but ...,"Culture, media and sport"


Unnamed: 0,partition_id,date,transcript,topic
0,0,2017-02-05,"But if the President is fuming, it’s because h...","Culture, media and sport"
1,0,2017-02-05,"First, his travel ban was overturned by a fede...","Culture, media and sport"
2,0,2017-02-05,That interview wiLL be played just before the ...,"Culture, media and sport"
3,0,2017-02-05,Marine Le Pen Launched her run at the presiden...,"Culture, media and sport"
4,0,2017-02-05,"And Welsh star, new hero, George North runs th...","Culture, media and sport"


Unnamed: 0,partition_id,date,transcript,topic
0,0,2017-01-01,"For now, from aLL of us here, it’s goodbye Aar...","Culture, media and sport"
1,0,2017-01-01,"Good evening, the headlines: Police in Turkey ...","Crime, civil law, justice and rights"
2,0,2017-01-01,She had blood all and saw her on the floor. Sh...,"Culture, media and sport"
3,0,2017-01-01,The Queen has missed the New Year The Queen ha...,"Parliament, government and politics"
4,1,2017-01-08,Your headlines: The Prime Minister has hinted ...,European Union


Unnamed: 0,partition_id,date,transcript,topic
0,0,2017-08-06,The world’s fastest man - Usain Bolt - says he...,"Culture, media and sport"
1,0,2017-08-06,The United Nations Security Council has voted ...,Economy and finance
2,0,2017-08-06,The government has launched an independent rev...,Energy and environment
3,0,2017-08-06,"Until then, from us all here, have a very good...","Culture, media and sport"
4,0,2017-08-06,Music today is from one of Ireland’s most famo...,"Culture, media and sport"


Unnamed: 0,partition_id,date,transcript,topic
0,0,2017-10-01,"Violence in Catalonia, as the Spanish governme...","Culture, media and sport"
1,0,2017-10-01,Do you think years after your Job’ job} Do you...,"Parliament, government and politics"
2,0,2017-10-01,Two women are killed by a knifeman in a suspec...,"Culture, media and sport"
3,0,2017-10-01,Catalan officials say more than 700 people hav...,"Parliament, government and politics"
4,0,2017-10-01,Riot police using extraordinary force to preve...,"Culture, media and sport"


# PARTITION COUNTS WINDOW TOPIC PREDICTION

In [88]:
years = [2014, 2015, 2016, 2017, 2018]

for year in years:
    print(year)
    counts = {month:{topic:0 for topic in topics} for month in range(1, 13)}
    word_counts = {month:{topic:0 for topic in topics} for month in range(1, 13)}

    df = pd.read_csv('./window_topic_prediction_with_short_sentences_merged_54_{}.csv'.format(year))
    df = df.drop(['Unnamed: 0'], axis=1)
    # df.columns = ['partition_id', 'date', 'transcript', 'topic']
    df['date'] = pd.to_datetime(df['date'])
    # df['topic'] = df['topic'].apply(lambda x: ast.literal_eval(x)[0][0])

    last_topic = None
    for index, row in df.iterrows():
        month = row['date'].month
        topic = row['topic']
        if topic != last_topic:
            counts[month][topic] += 1
        last_topic = topic
        word_counts[month][topic] += len(row.transcript.split())

    others_df = pd.read_csv('./dropped_partitions_54_{}.csv'.format(year))
    others_df = others_df.drop(['Unnamed: 0'], axis=1)
    others_df['date'] = pd.to_datetime(others_df['date'])

    for index, row in others_df.iterrows():
        month = row['date'].month
        topic = 'Others'
        counts[month][topic] += 1
        word_counts[month][topic] += len(row.transcript.split())

    rows = []
    word_count_rows = []

    for month in range(1, 13):
        row = [month]
        word_count_row = [month]

        for topic in topics:
            row.append(counts[month][topic])
            word_count_row.append(word_counts[month][topic])
        rows.append(row)
        word_count_rows.append(word_count_row)

    columns = ['month'] + topics
    res_df = pd.DataFrame(rows, columns=columns)
    res_df.to_csv('./bbc_partition_count_by_source_topic_{}.csv'.format(year), index=False)
    res_df = pd.DataFrame(word_count_rows, columns=columns)
    res_df.to_csv('./bbc_partition_word_count_by_source_topic_{}.csv'.format(year), index=False)

2014
2015
2016
2017
2018


# BBC News at One, Six, Ten partition count

In [71]:
years = list(range(2014, 2019))
rows = []
word_count_rows = []

for year in years:
    df = pd.read_csv('../data/partition_predictions/window_topic_prediction_with_short_sentences_merged_54_{}.csv'.format(year))

    programs = ['BBC News at One', 'BBC News at Six', 'BBC News at Ten']

    counts = {month:{program:0 for program in programs} for month in range(1, 13)}
    word_counts = {month:{program:0 for program in programs} for month in range(1, 13)}

    for program in programs:
        print(program, year)
        df_p = df.loc[df.source == program]
        df_p = df_p.drop(['Unnamed: 0'], axis=1)
        print(program, year)
        print(len(df_p))
        # df.columns = ['partition_id', 'date', 'transcript', 'topic']
        df_p['date'] = pd.to_datetime(df_p['date'])
        # df['topic'] = df['topic'].apply(lambda x: ast.literal_eval(x)[0][0])
        display(df_p.head())

        last_topic = None
        for index, row in df_p.iterrows():
            month = row['date'].month
            topic = row['topic']
            if topic != last_topic:
                counts[month][program] += 1
            last_topic = topic
            word_counts[month][program] += len(row.transcript.split())

        for month in range(1, 13):
            row = [year, month]
            word_count_row = [year, month]

            for program in programs:
                row.append(counts[month][program])
                word_count_row.append(word_counts[month][program])
            rows.append(row)
            word_count_rows.append(word_count_row)

columns = ['year', 'month'] + programs
res_df = pd.DataFrame(rows, columns=columns)
res_df.to_csv('./bbc_news_at_one_six_ten_partition_count.csv', index=False)
res_df = pd.DataFrame(word_count_rows, columns=columns)
res_df.to_csv('./bbc_news_at_one_six_ten_word_count.csv', index=False)

BBC News at One 2014
BBC News at One 2014
6659


Unnamed: 0,partition_id,date,source,transcript,type,topic
207,445,2014-03-04,BBC News at One,Yanukovych its legitimate president. Russian ...,p,International affairs
208,445,2014-03-04,BBC News at One,As Vladimir Putin warns the West that sanctio...,p,International affairs
209,445,2014-03-04,BBC News at One,Also this lunchtime... At the Oscar Pistorius...,p,Education
210,445,2014-03-04,BBC News at One,On display for the first time ever in Britain...,p,"Culture, media and sport"
211,445,2014-03-04,BBC News at One,And this is an exhibition that gives showers ...,p,Communities and families


BBC News at Six 2014
BBC News at Six 2014
8177


Unnamed: 0,partition_id,date,source,transcript,type,topic
133,442,2014-03-07,BBC News at Six,The head ofthe The head of the Metropolitan P...,p,"Parliament, government and politics"
134,442,2014-03-07,BBC News at Six,Vladimir Putin opens the Winter Paralympics i...,p,"Culture, media and sport"
135,442,2014-03-07,BBC News at Six,Network Rail apologises unreservedly for what...,p,"Business, industry and consumers"
136,442,2014-03-07,BBC News at Six,"The cost of a pint of milk going down, as the...",p,"Parliament, government and politics"
137,442,2014-03-07,BBC News at Six,And trenches where the First World War’s sold...,p,"Parliament, government and politics"


BBC News at Ten 2014
BBC News at Ten 2014
8024


Unnamed: 0,partition_id,date,source,transcript,type,topic
167,443,2014-03-07,BBC News at Ten,as devastating. With the Met facing one of th...,p,"Crime, civil law, justice and rights"
168,443,2014-03-07,BBC News at Ten,European observers are blocked again from ent...,p,"Parliament, government and politics"
169,443,2014-03-07,BBC News at Ten,All but one of Ukraine’s team stay away from ...,p,International affairs
170,443,2014-03-07,BBC News at Ten,The head of the Metropolitan Police says he w...,p,"Crime, civil law, justice and rights"
171,443,2014-03-07,BBC News at Ten,Our home affairs correspondent Matt Prodger h...,p,"Parliament, government and politics"


BBC News at One 2015
BBC News at One 2015
9068


Unnamed: 0,partition_id,date,source,transcript,type,topic
0,89,2015-04-06,BBC News at One,"Tense, or what? I’m feeling it. # ..I’m read...",p,"Parliament, government and politics"
1,89,2015-04-06,BBC News at One,You may depend on it for your everyday social...,p,"Parliament, government and politics"
2,89,2015-04-06,BBC News at One,And cheers for Prince Harry as he arrives in ...,p,"Parliament, government and politics"
3,89,2015-04-06,BBC News at One,Sweeping changes to pension rules which will ...,p,Economy and finance
4,89,2015-04-06,BBC News at One,The official start of the new tax year has sp...,p,Economy and finance


BBC News at Six 2015
BBC News at Six 2015
9827


Unnamed: 0,partition_id,date,source,transcript,type,topic
16,90,2015-04-06,BBC News at Six,This is a story of Britain - but a Britain we...,p,Economy and finance
17,90,2015-04-06,BBC News at Six,"In Bristol, David Cameron said it was immoral...",p,"Parliament, government and politics"
18,90,2015-04-06,BBC News at Six,I don’t want people to hear about the recover...,p,Economy and finance
19,90,2015-04-06,BBC News at Six,That’s exactly what Michael plans to do tomor...,p,Economy and finance
20,90,2015-04-06,BBC News at Six,The Spring challenge is to get into it succes...,p,"Parliament, government and politics"


BBC News at Ten 2015
BBC News at Ten 2015
8405


Unnamed: 0,partition_id,date,source,transcript,type,topic
50,92,2015-04-06,BBC News at Ten,# It’s a safe bet she’d never let life get he...,p,"Parliament, government and politics"
51,92,2015-04-06,BBC News at Ten,A new tax year and a new election row over wh...,p,Health services and medicine
52,92,2015-04-06,BBC News at Ten,"And our online presence might outlive us, but...",p,"Parliament, government and politics"
53,92,2015-04-06,BBC News at Ten,The spring challenge is to get into it succes...,p,"Parliament, government and politics"
54,92,2015-04-06,BBC News at Ten,Counter terrorism measures are stifling class...,p,Education


BBC News at One 2016
BBC News at One 2016
7057


Unnamed: 0,partition_id,date,source,transcript,type,topic
637,5,2016-06-02,BBC News at One,Mr Reginald Keys? We’re from Army notificatio...,p,Defence
638,5,2016-06-02,BBC News at One,We’d like you to take on Tony Blair at the ne...,p,"Parliament, government and politics"
639,5,2016-06-02,BBC News at One,You’re taking on the Prime Minister and you c...,p,"Parliament, government and politics"
640,5,2016-06-02,BBC News at One,We’ll be live with our correspondent in Westm...,p,"Parliament, government and politics"
641,5,2016-06-02,BBC News at One,Also this lunchtime: The future of BHS - thou...,p,Transport


BBC News at Six 2016
BBC News at Six 2016
7113


Unnamed: 0,partition_id,date,source,transcript,type,topic
679,6,2016-06-02,BBC News at Six,BIRDSONG ROCK MUSIC PLAYS Come on now! Squea...,p,"Parliament, government and politics"
680,6,2016-06-02,BBC News at Six,We have a special report on the risks of radi...,p,European Union
681,6,2016-06-02,BBC News at Six,"And it’s waterproof but not inflation proof, ...",p,"Crime, civil law, justice and rights"
682,6,2016-06-02,BBC News at Six,On BBC London: We hear from the mother of a 1...,p,"Parliament, government and politics"
683,6,2016-06-02,BBC News at Six,And drivers are warned that Tower Bridge is s...,p,"Parliament, government and politics"


BBC News at Ten 2016
BBC News at Ten 2016
6859


Unnamed: 0,partition_id,date,source,transcript,type,topic
710,7,2016-06-02,BBC News at Ten,I can’t hear anything.,p,"Parliament, government and politics"
711,7,2016-06-02,BBC News at Ten,"OK, we need to intubate, get him ready for su...",p,"Parliament, government and politics"
712,7,2016-06-02,BBC News at Ten,"An unmissable Holby City: Tonight at Ten, up ...",p,"Business, industry and consumers"
713,7,2016-06-02,BBC News at Ten,Some MPs are expressing anger at the plight o...,p,"Business, industry and consumers"
714,7,2016-06-02,BBC News at Ten,Chancellor Merkel warns that Britain faces a ...,p,"Parliament, government and politics"


BBC News at One 2017
BBC News at One 2017
7584


Unnamed: 0,partition_id,date,source,transcript,type,topic
278,198,2017-05-01,BBC News at One,You cannot mess up now. Our nine best cooks a...,p,"Culture, media and sport"
279,198,2017-05-01,BBC News at One,MasterChef Semifinals... Police name the busi...,p,"Crime, civil law, justice and rights"
280,198,2017-05-01,BBC News at One,Social media companies are accused of a disgr...,p,"Parliament, government and politics"
281,198,2017-05-01,BBC News at One,A last week of campaigning in the French Pres...,p,"Parliament, government and politics"
282,198,2017-05-01,BBC News at One,And cuts to bus services in England and Wales...,p,"Crime, civil law, justice and rights"


BBC News at Six 2017
BBC News at Six 2017
8626


Unnamed: 0,partition_id,date,source,transcript,type,topic
616,207,2017-05-02,BBC News at Six,Our nine best cooks all fighting it out becau...,p,"Culture, media and sport"
617,207,2017-05-02,BBC News at Six,The Prime Minister tells the BBC she expects ...,p,"Crime, civil law, justice and rights"
618,207,2017-05-02,BBC News at Six,The Labour Shadow Home Secretary comes unstuc...,p,Economy and finance
619,207,2017-05-02,BBC News at Six,How new proposals to stop doping could mean p...,p,"Culture, media and sport"
620,207,2017-05-02,BBC News at Six,And the surfer rescued after 30 hours driftin...,p,"Parliament, government and politics"


BBC News at Ten 2017
BBC News at Ten 2017
8857


Unnamed: 0,partition_id,date,source,transcript,type,topic
677,209,2017-05-02,BBC News at Ten,Do you believe in your dreams?,p,"Parliament, government and politics"
678,209,2017-05-02,BBC News at Ten,"Versailles, Series Two, continues... Tonight ...",p,"Culture, media and sport"
679,209,2017-05-02,BBC News at Ten,"No, I mean... A jury at the inquest of a teen...",p,Health services and medicine
680,209,2017-05-02,BBC News at Ten,How leading former athletes could be written ...,p,"Parliament, government and politics"
681,209,2017-05-02,BBC News at Ten,And the story of the surfer rescued after 30 ...,p,"Parliament, government and politics"


BBC News at One 2018
BBC News at One 2018
6401


Unnamed: 0,partition_id,date,source,transcript,type,topic
690,11,2018-06-08,BBC News at One,It’s incredibly close. Britain’s Best Home Co...,p,"Culture, media and sport"
691,11,2018-06-08,BBC News at One,# BorisJohnson says the Brexit talks are head...,p,European Union
692,11,2018-06-08,BBC News at One,Also this lunchtime... More heated disagreeme...,p,"Parliament, government and politics"
693,11,2018-06-08,BBC News at One,A 90—year—old woman is seriously ill after be...,p,"Culture, media and sport"
694,11,2018-06-08,BBC News at One,Researchers say they found microplastic in al...,p,"Agriculture, animals, food and rural affairs"


BBC News at Six 2018
BBC News at Six 2018
8221


Unnamed: 0,partition_id,date,source,transcript,type,topic
744,13,2018-06-08,BBC News at Six,"The FIFA World Cup, starts Mthjune across the...",p,"Parliament, government and politics"
745,13,2018-06-08,BBC News at Six,The Prime Minister arrives in Canada for talk...,p,European Union
746,13,2018-06-08,BBC News at Six,The 90—year—old woman now seriously ill in ho...,p,"Parliament, government and politics"
747,13,2018-06-08,BBC News at Six,And... laughing all the way to the football W...,p,"Parliament, government and politics"
748,13,2018-06-08,BBC News at Six,Later on BBC London: Pay if you pollute — the...,p,Transport


BBC News at Ten 2018
BBC News at Ten 2018
8958


Unnamed: 0,partition_id,date,source,transcript,type,topic
816,15,2018-06-08,BBC News at Ten,Donald Trump catches leaders at the G7 summit...,p,European Union
817,15,2018-06-08,BBC News at Ten,"Meanwhile Theresa May wants to talk trade, bu...",p,European Union
818,15,2018-06-08,BBC News at Ten,The 90—year—old woman now seriously ill in ho...,p,"Parliament, government and politics"
819,15,2018-06-08,BBC News at Ten,Hell hath no fury — Tyson determined to make ...,p,"Parliament, government and politics"
820,15,2018-06-08,BBC News at Ten,And laughing all the way to the football Worl...,p,"Parliament, government and politics"


In [64]:
df['source'].unique()

array(['BBC News', 'BBC London News', 'BBC News at One',
       'BBC News at Six', 'BBC News at Ten',
       'BBC News Special: May Leadership Challenge'], dtype=object)

In [68]:
res_df

Unnamed: 0,year,month,BBC News at One,BBC News at Six,BBC News at Ten
0,2014,1,55031,0,0
1,2014,2,72284,0,0
2,2014,3,45821,0,0
3,2014,4,31898,0,0
4,2014,5,93214,0,0
...,...,...,...,...,...
175,2018,8,117004,166044,141136
176,2018,9,121208,141635,130903
177,2018,10,105169,173801,143380
178,2018,11,151798,143477,138502


In [69]:
rows

[[2014, 1, 317, 0, 0],
 [2014, 2, 395, 0, 0],
 [2014, 3, 266, 0, 0],
 [2014, 4, 193, 0, 0],
 [2014, 5, 589, 0, 0],
 [2014, 6, 593, 0, 0],
 [2014, 7, 668, 0, 0],
 [2014, 8, 466, 0, 0],
 [2014, 9, 405, 0, 0],
 [2014, 10, 455, 0, 0],
 [2014, 11, 384, 0, 0],
 [2014, 12, 436, 0, 0],
 [2014, 1, 317, 332, 0],
 [2014, 2, 395, 629, 0],
 [2014, 3, 266, 439, 0],
 [2014, 4, 193, 384, 0],
 [2014, 5, 589, 617, 0],
 [2014, 6, 593, 561, 0],
 [2014, 7, 668, 667, 0],
 [2014, 8, 466, 541, 0],
 [2014, 9, 405, 496, 0],
 [2014, 10, 455, 569, 0],
 [2014, 11, 384, 559, 0],
 [2014, 12, 436, 489, 0],
 [2014, 1, 317, 332, 549],
 [2014, 2, 395, 629, 500],
 [2014, 3, 266, 439, 518],
 [2014, 4, 193, 384, 469],
 [2014, 5, 589, 617, 598],
 [2014, 6, 593, 561, 499],
 [2014, 7, 668, 667, 484],
 [2014, 8, 466, 541, 445],
 [2014, 9, 405, 496, 461],
 [2014, 10, 455, 569, 532],
 [2014, 11, 384, 559, 523],
 [2014, 12, 436, 489, 490],
 [2015, 1, 554, 0, 0],
 [2015, 2, 649, 0, 0],
 [2015, 3, 710, 0, 0],
 [2015, 4, 677, 0, 0],

In [79]:
years = list(range(2014, 2019))
rows = []
word_count_rows = []
from collections import defaultdict
import calendar


for year in years:
    df = pd.read_csv('../data/partition_predictions/window_topic_prediction_with_short_sentences_merged_54_{}.csv'.format(year))

    programs = ['BBC News at One', 'BBC News at Six', 'BBC News at Ten']

    counts = {month:{day: {program:0 for program in programs} for day in range(1, calendar.monthrange(year, month)[1]+1)} for month in range(1, 13)}
#     counts = defaultdict(dict)
    for program in programs:
        print(program, year)
        df_p = df.loc[df.source == program]
        df_p = df_p.drop(['Unnamed: 0'], axis=1)
        print(program, year)
        print(len(df_p))
        # df.columns = ['partition_id', 'date', 'transcript', 'topic']
        df_p['date'] = pd.to_datetime(df_p['date'])
        # df['topic'] = df['topic'].apply(lambda x: ast.literal_eval(x)[0][0])
        display(df_p.head())

        last_topic = None
        for index, row in df_p.iterrows():
            month = row['date'].month
            day = row['date'].day
            topic = row['topic']
            if topic != last_topic:
                counts[month][day][program] += 1
            last_topic = topic

        for month in range(1, 13):
            for day in range(1, calendar.monthrange(year, month)[1]):
                row = [year, month, day]

                for program in programs:
                    row.append(counts[month][day][program])
                rows.append(row)

columns = ['year', 'month', 'day'] + programs
res_df = pd.DataFrame(rows, columns=columns)
res_df.to_csv('./average_partition_count_day.csv', index=False)
res_df

BBC News at One 2014
BBC News at One 2014
6659


Unnamed: 0,partition_id,date,source,transcript,type,topic
207,445,2014-03-04,BBC News at One,Yanukovych its legitimate president. Russian ...,p,International affairs
208,445,2014-03-04,BBC News at One,As Vladimir Putin warns the West that sanctio...,p,International affairs
209,445,2014-03-04,BBC News at One,Also this lunchtime... At the Oscar Pistorius...,p,Education
210,445,2014-03-04,BBC News at One,On display for the first time ever in Britain...,p,"Culture, media and sport"
211,445,2014-03-04,BBC News at One,And this is an exhibition that gives showers ...,p,Communities and families


BBC News at Six 2014
BBC News at Six 2014
8177


Unnamed: 0,partition_id,date,source,transcript,type,topic
133,442,2014-03-07,BBC News at Six,The head ofthe The head of the Metropolitan P...,p,"Parliament, government and politics"
134,442,2014-03-07,BBC News at Six,Vladimir Putin opens the Winter Paralympics i...,p,"Culture, media and sport"
135,442,2014-03-07,BBC News at Six,Network Rail apologises unreservedly for what...,p,"Business, industry and consumers"
136,442,2014-03-07,BBC News at Six,"The cost of a pint of milk going down, as the...",p,"Parliament, government and politics"
137,442,2014-03-07,BBC News at Six,And trenches where the First World War’s sold...,p,"Parliament, government and politics"


BBC News at Ten 2014
BBC News at Ten 2014
8024


Unnamed: 0,partition_id,date,source,transcript,type,topic
167,443,2014-03-07,BBC News at Ten,as devastating. With the Met facing one of th...,p,"Crime, civil law, justice and rights"
168,443,2014-03-07,BBC News at Ten,European observers are blocked again from ent...,p,"Parliament, government and politics"
169,443,2014-03-07,BBC News at Ten,All but one of Ukraine’s team stay away from ...,p,International affairs
170,443,2014-03-07,BBC News at Ten,The head of the Metropolitan Police says he w...,p,"Crime, civil law, justice and rights"
171,443,2014-03-07,BBC News at Ten,Our home affairs correspondent Matt Prodger h...,p,"Parliament, government and politics"


BBC News at One 2015
BBC News at One 2015
9068


Unnamed: 0,partition_id,date,source,transcript,type,topic
0,89,2015-04-06,BBC News at One,"Tense, or what? I’m feeling it. # ..I’m read...",p,"Parliament, government and politics"
1,89,2015-04-06,BBC News at One,You may depend on it for your everyday social...,p,"Parliament, government and politics"
2,89,2015-04-06,BBC News at One,And cheers for Prince Harry as he arrives in ...,p,"Parliament, government and politics"
3,89,2015-04-06,BBC News at One,Sweeping changes to pension rules which will ...,p,Economy and finance
4,89,2015-04-06,BBC News at One,The official start of the new tax year has sp...,p,Economy and finance


BBC News at Six 2015
BBC News at Six 2015
9827


Unnamed: 0,partition_id,date,source,transcript,type,topic
16,90,2015-04-06,BBC News at Six,This is a story of Britain - but a Britain we...,p,Economy and finance
17,90,2015-04-06,BBC News at Six,"In Bristol, David Cameron said it was immoral...",p,"Parliament, government and politics"
18,90,2015-04-06,BBC News at Six,I don’t want people to hear about the recover...,p,Economy and finance
19,90,2015-04-06,BBC News at Six,That’s exactly what Michael plans to do tomor...,p,Economy and finance
20,90,2015-04-06,BBC News at Six,The Spring challenge is to get into it succes...,p,"Parliament, government and politics"


BBC News at Ten 2015
BBC News at Ten 2015
8405


Unnamed: 0,partition_id,date,source,transcript,type,topic
50,92,2015-04-06,BBC News at Ten,# It’s a safe bet she’d never let life get he...,p,"Parliament, government and politics"
51,92,2015-04-06,BBC News at Ten,A new tax year and a new election row over wh...,p,Health services and medicine
52,92,2015-04-06,BBC News at Ten,"And our online presence might outlive us, but...",p,"Parliament, government and politics"
53,92,2015-04-06,BBC News at Ten,The spring challenge is to get into it succes...,p,"Parliament, government and politics"
54,92,2015-04-06,BBC News at Ten,Counter terrorism measures are stifling class...,p,Education


BBC News at One 2016
BBC News at One 2016
7057


Unnamed: 0,partition_id,date,source,transcript,type,topic
637,5,2016-06-02,BBC News at One,Mr Reginald Keys? We’re from Army notificatio...,p,Defence
638,5,2016-06-02,BBC News at One,We’d like you to take on Tony Blair at the ne...,p,"Parliament, government and politics"
639,5,2016-06-02,BBC News at One,You’re taking on the Prime Minister and you c...,p,"Parliament, government and politics"
640,5,2016-06-02,BBC News at One,We’ll be live with our correspondent in Westm...,p,"Parliament, government and politics"
641,5,2016-06-02,BBC News at One,Also this lunchtime: The future of BHS - thou...,p,Transport


BBC News at Six 2016
BBC News at Six 2016
7113


Unnamed: 0,partition_id,date,source,transcript,type,topic
679,6,2016-06-02,BBC News at Six,BIRDSONG ROCK MUSIC PLAYS Come on now! Squea...,p,"Parliament, government and politics"
680,6,2016-06-02,BBC News at Six,We have a special report on the risks of radi...,p,European Union
681,6,2016-06-02,BBC News at Six,"And it’s waterproof but not inflation proof, ...",p,"Crime, civil law, justice and rights"
682,6,2016-06-02,BBC News at Six,On BBC London: We hear from the mother of a 1...,p,"Parliament, government and politics"
683,6,2016-06-02,BBC News at Six,And drivers are warned that Tower Bridge is s...,p,"Parliament, government and politics"


BBC News at Ten 2016
BBC News at Ten 2016
6859


Unnamed: 0,partition_id,date,source,transcript,type,topic
710,7,2016-06-02,BBC News at Ten,I can’t hear anything.,p,"Parliament, government and politics"
711,7,2016-06-02,BBC News at Ten,"OK, we need to intubate, get him ready for su...",p,"Parliament, government and politics"
712,7,2016-06-02,BBC News at Ten,"An unmissable Holby City: Tonight at Ten, up ...",p,"Business, industry and consumers"
713,7,2016-06-02,BBC News at Ten,Some MPs are expressing anger at the plight o...,p,"Business, industry and consumers"
714,7,2016-06-02,BBC News at Ten,Chancellor Merkel warns that Britain faces a ...,p,"Parliament, government and politics"


BBC News at One 2017
BBC News at One 2017
7584


Unnamed: 0,partition_id,date,source,transcript,type,topic
278,198,2017-05-01,BBC News at One,You cannot mess up now. Our nine best cooks a...,p,"Culture, media and sport"
279,198,2017-05-01,BBC News at One,MasterChef Semifinals... Police name the busi...,p,"Crime, civil law, justice and rights"
280,198,2017-05-01,BBC News at One,Social media companies are accused of a disgr...,p,"Parliament, government and politics"
281,198,2017-05-01,BBC News at One,A last week of campaigning in the French Pres...,p,"Parliament, government and politics"
282,198,2017-05-01,BBC News at One,And cuts to bus services in England and Wales...,p,"Crime, civil law, justice and rights"


BBC News at Six 2017
BBC News at Six 2017
8626


Unnamed: 0,partition_id,date,source,transcript,type,topic
616,207,2017-05-02,BBC News at Six,Our nine best cooks all fighting it out becau...,p,"Culture, media and sport"
617,207,2017-05-02,BBC News at Six,The Prime Minister tells the BBC she expects ...,p,"Crime, civil law, justice and rights"
618,207,2017-05-02,BBC News at Six,The Labour Shadow Home Secretary comes unstuc...,p,Economy and finance
619,207,2017-05-02,BBC News at Six,How new proposals to stop doping could mean p...,p,"Culture, media and sport"
620,207,2017-05-02,BBC News at Six,And the surfer rescued after 30 hours driftin...,p,"Parliament, government and politics"


BBC News at Ten 2017
BBC News at Ten 2017
8857


Unnamed: 0,partition_id,date,source,transcript,type,topic
677,209,2017-05-02,BBC News at Ten,Do you believe in your dreams?,p,"Parliament, government and politics"
678,209,2017-05-02,BBC News at Ten,"Versailles, Series Two, continues... Tonight ...",p,"Culture, media and sport"
679,209,2017-05-02,BBC News at Ten,"No, I mean... A jury at the inquest of a teen...",p,Health services and medicine
680,209,2017-05-02,BBC News at Ten,How leading former athletes could be written ...,p,"Parliament, government and politics"
681,209,2017-05-02,BBC News at Ten,And the story of the surfer rescued after 30 ...,p,"Parliament, government and politics"


BBC News at One 2018
BBC News at One 2018
6401


Unnamed: 0,partition_id,date,source,transcript,type,topic
690,11,2018-06-08,BBC News at One,It’s incredibly close. Britain’s Best Home Co...,p,"Culture, media and sport"
691,11,2018-06-08,BBC News at One,# BorisJohnson says the Brexit talks are head...,p,European Union
692,11,2018-06-08,BBC News at One,Also this lunchtime... More heated disagreeme...,p,"Parliament, government and politics"
693,11,2018-06-08,BBC News at One,A 90—year—old woman is seriously ill after be...,p,"Culture, media and sport"
694,11,2018-06-08,BBC News at One,Researchers say they found microplastic in al...,p,"Agriculture, animals, food and rural affairs"


BBC News at Six 2018
BBC News at Six 2018
8221


Unnamed: 0,partition_id,date,source,transcript,type,topic
744,13,2018-06-08,BBC News at Six,"The FIFA World Cup, starts Mthjune across the...",p,"Parliament, government and politics"
745,13,2018-06-08,BBC News at Six,The Prime Minister arrives in Canada for talk...,p,European Union
746,13,2018-06-08,BBC News at Six,The 90—year—old woman now seriously ill in ho...,p,"Parliament, government and politics"
747,13,2018-06-08,BBC News at Six,And... laughing all the way to the football W...,p,"Parliament, government and politics"
748,13,2018-06-08,BBC News at Six,Later on BBC London: Pay if you pollute — the...,p,Transport


BBC News at Ten 2018
BBC News at Ten 2018
8958


Unnamed: 0,partition_id,date,source,transcript,type,topic
816,15,2018-06-08,BBC News at Ten,Donald Trump catches leaders at the G7 summit...,p,European Union
817,15,2018-06-08,BBC News at Ten,"Meanwhile Theresa May wants to talk trade, bu...",p,European Union
818,15,2018-06-08,BBC News at Ten,The 90—year—old woman now seriously ill in ho...,p,"Parliament, government and politics"
819,15,2018-06-08,BBC News at Ten,Hell hath no fury — Tyson determined to make ...,p,"Parliament, government and politics"
820,15,2018-06-08,BBC News at Ten,And laughing all the way to the football Worl...,p,"Parliament, government and politics"


Unnamed: 0,year,month,day,BBC News at One,BBC News at Six,BBC News at Ten
0,2014,1,1,0,0,0
1,2014,1,2,0,0,0
2,2014,1,3,0,0,0
3,2014,1,4,0,0,0
4,2014,1,5,0,0,0
...,...,...,...,...,...,...
5293,2018,12,26,25,0,24
5294,2018,12,27,0,0,12
5295,2018,12,28,0,0,24
5296,2018,12,29,0,0,0


In [82]:
res_df2 = res_df.drop(['day'], axis=1).groupby(['year', 'month'], as_index=False).mean()
res_df2.to_csv('./average_partition_count_month.csv', index=False)

In [91]:
years = list(range(2014, 2019))
rows = []

for year in years:
    df = pd.read_csv('../data/partition_predictions/window_topic_prediction_with_short_sentences_merged_54_{}.csv'.format(year))

    programs = ['BBC News at One', 'BBC News at Six', 'BBC News at Ten']
    display(df.head())
    counts = {month:{program:0 for program in programs} for month in range(1, 13)}
    word_counts = {month:{program:0 for program in programs} for month in range(1, 13)}
    
    df = df.loc[df.source.isin(programs)]
    
    df['word_len'] = df['transcript'].apply(lambda x: len(x.split()))
    
    rows.append([year, df['word_len'].mean()])

Unnamed: 0.1,Unnamed: 0,partition_id,date,source,transcript,type,topic
0,0,435,2014-03-01,BBC Weekend News,"Don’t mind, do you? Shame for both of us to l...",p,"Agriculture, animals, food and rural affairs"
1,1,435,2014-03-01,BBC Weekend News,Today I was led to believe that a patient I h...,p,"Crime, civil law, justice and rights"
2,2,435,2014-03-01,BBC Weekend News,This is potentially a grave threat to the ter...,p,International affairs
3,3,435,2014-03-01,BBC Weekend News,"Also tonight, Ed Miliband’s plans to reform L...",p,"Parliament, government and politics"
4,4,435,2014-03-01,BBC Weekend News,"And Newcastle’s manager, Alan Pardew, is sent...",p,"Parliament, government and politics"


Unnamed: 0.1,Unnamed: 0,partition_id,date,source,transcript,type,topic
0,0,89,2015-04-06,BBC News at One,"Tense, or what? I’m feeling it. # ..I’m read...",p,"Parliament, government and politics"
1,1,89,2015-04-06,BBC News at One,You may depend on it for your everyday social...,p,"Parliament, government and politics"
2,2,89,2015-04-06,BBC News at One,And cheers for Prince Harry as he arrives in ...,p,"Parliament, government and politics"
3,3,89,2015-04-06,BBC News at One,Sweeping changes to pension rules which will ...,p,Economy and finance
4,4,89,2015-04-06,BBC News at One,The official start of the new tax year has sp...,p,Economy and finance


Unnamed: 0.1,Unnamed: 0,partition_id,date,source,transcript,type,topic
0,0,0,2016-06-04,Joins BBC News,we would have seen during this weekend and th...,p,Transport
1,1,0,2016-06-04,Joins BBC News,manage that and make sure we stay within the ...,p,"Parliament, government and politics"
2,2,0,2016-06-04,Joins BBC News,We start at the desk where you have the three...,p,"Agriculture, animals, food and rural affairs"
3,3,0,2016-06-04,Joins BBC News,So far the inexperience is an intriguing mix ...,p,"Parliament, government and politics"
4,4,0,2016-06-04,Joins BBC News,"It will allow, sessions to be moderated in re...",p,"Parliament, government and politics"


Unnamed: 0.1,Unnamed: 0,partition_id,date,source,transcript,type,topic
0,0,197,2017-05-01,BBC News,MasterChef Semifinals... MasterChef Ser galle...,p,"Culture, media and sport"
1,1,197,2017-05-01,BBC News,A chance discovery by a pre-school child has ...,p,"Agriculture, animals, food and rural affairs"
2,2,197,2017-05-01,BBC News,As Australia tightens up its visa rules for s...,p,"Parliament, government and politics"
3,3,197,2017-05-01,BBC News,"And before we go, let’s take you to Chengdu C...",p,Energy and environment
4,4,197,2017-05-01,BBC News,We saw some big contrasts across the UK on Su...,p,"Culture, media and sport"


Unnamed: 0.1,Unnamed: 0,partition_id,date,source,transcript,type,topic
0,0,0,2018-06-02,BBC News,"but also gathering storm clouds, some further...",p,Energy and environment
1,1,0,2018-06-02,BBC News,"Jotheremy Thorpe, I’m arresting you for consp...",p,"Parliament, government and politics"
2,2,0,2018-06-02,BBC News,Canada hasjoined the European Union in filing...,p,European Union
3,3,0,2018-06-02,BBC News,After the closure of its main steel plant 20 ...,p,"Business, industry and consumers"
4,4,0,2018-06-02,BBC News,This is further weakening the transatlantic r...,p,Transport


In [92]:
rows

[[2014, 131.00944881889762],
 [2015, 127.72282051282052],
 [2016, 190.20642921679584],
 [2017, 233.0420473132006],
 [2018, 220.57442748091603]]