In [44]:
import pandas as pd
import numpy as np
import textstat

In [45]:
rm = pd.read_csv('Maddow_cleaned.tsv',sep='\t').drop(columns='Unnamed: 0')
tc = pd.read_csv('Tucker_transcripts_2.10.23.tsv', sep='\t').drop(columns='Unnamed: 0')

In [46]:
tc

Unnamed: 0,url,timestamp,title,text
0,https://www.foxnews.com/opinion/tucker-carlson...,"February 9, 2023 10:19pm EST",TUCKER CARLSON: Self-defense is becoming illeg...,WARNING: Graphic footage—Fox News host Tucker ...
1,https://www.foxnews.com/opinion/tucker-carlson...,"February 8, 2023 10:25pm EST",TUCKER CARLSON: The lies 'just kept coming' du...,Fox News host Tucker Carlson reacts to Presid...
2,https://www.foxnews.com/opinion/tucker-carlson...,"February 7, 2023 10:51pm EST",TUCKER CARLSON: America's problems are fixable...,Fox News host Tucker Carlson breaks down the t...
3,https://www.foxnews.com/opinion/tucker-carlson...,"February 6, 2023 9:33pm EST",TUCKER CARLSON: The people who owe you the tru...,Fox News host Tucker Carlson calls out the Bid...
4,https://www.foxnews.com/opinion/tucker-carlson...,"February 3, 2023 9:59pm EST",TUCKER CARLSON: Biden administration's stance ...,Fox News host Tucker Carlson takes on the Bid...
...,...,...,...,...
642,https://www.foxnews.com/opinion/tucker-carlson...,"November 19, 2019 1:30pm EST","Tucker Carlson: Media, federal government toug...",Stone found guilty on all seven counts; Epstei...
643,https://www.foxnews.com/opinion/tucker-carlson...,"November 15, 2019 1:09pm EST",Tucker Carlson: Trump impeachment inquiry will...,Media goes into overdrive on impeachment heari...
644,https://www.foxnews.com/opinion/tucker-carlson...,"November 14, 2019 9:24am EST",Tucker Carlson: It's never been clear what Dem...,First public impeachment hearings take place.I...
645,https://www.foxnews.com/opinion/tucker-carlson...,"November 13, 2019 10:13am EST",Tucker Carlson: The end of free speech – What ...,Liberals working to crack down on the First Am...


In [47]:
tc.rename(columns = {'url':'URL', 'text':'transcript'}, inplace = True)

In [48]:
rm = rm.drop('Unnamed: 0.1', axis = 1)

### Remove duplicates from rm and tc

In [49]:
rm = rm.drop_duplicates(subset=None, keep= 'first', inplace=False)
tc = tc.drop_duplicates(subset=None, keep= 'first', inplace=False)

In [50]:
print(len(tc))

647


### Add a 'host' column to rm and tc

In [51]:
rm['host'] = 'Rachel Maddow'
tc['host'] = 'Tucker Carlson'

In [52]:
# Change the format of the 'timestamp' columnn for tc
from datetime import datetime
from dateutil.parser import parse

def string_to_datetime(datetime_str):
    datetime_obj = parse(datetime_str, fuzzy = True)
    return datetime_obj

In [53]:
tc.timestamp = [string_to_datetime(timestamp) for timestamp in tc.timestamp]



### Flesch-Kincaid reading levels

![alternative text](Flesch-Kincaid_functions.jpg)

In [54]:
# Use the Flesch Reading Ease formula to assess the ease of readability of each transcript (returns the Flesch Reading Ease Score)
rm['flesch_reading_ease'] = [textstat.flesch_reading_ease(text) for text in rm.transcript]
tc['flesch_reading_ease'] = [textstat.flesch_reading_ease(text) for text in tc.transcript]


# Get the Flesch-Kincaid Grade level of each transcript (score of 9.3 means a ninth grader would be able to read the document)
rm['flesch_kincaid_grade_level'] = [textstat.flesch_kincaid_grade(text) for text in rm.transcript]
tc['flesch_kincaid_grade_level'] = [textstat.flesch_kincaid_grade(text) for text in tc.transcript]

In [56]:
# Combine the dataframes
frames = [rm, tc]

df = pd.concat(frames)
print(len(df))

707


In [58]:
# df = df.drop('Unnamed: 0.1', axis = 1)
df = df.drop_duplicates(subset=None, keep= 'first', inplace=False)

In [59]:
df

Unnamed: 0,URL,timestamp,transcript,stop words removed,host,flesch_reading_ease,flesch_kincaid_grade_level,title
0,https://www.msnbc.com/transcripts/rachel-maddo...,2022-10-04 01:00:00+00:00,"SummaryWith public trust at a low, the Supreme...",SummaryWith public trust low Supreme Court ope...,Rachel Maddow,68.70,8.5,
2,https://www.msnbc.com/transcripts/rachel-maddo...,2022-09-27 01:00:00+00:00,"Summary ""American Psychosis"" examines the rise...",Summary American Psychosis examines rise extre...,Rachel Maddow,69.82,8.1,
4,https://www.msnbc.com/transcripts/rachel-maddo...,2022-09-13 01:00:00+00:00,"Summary Interview with Geoffrey Berman, the U....",Summary Interview Geoffrey Berman U attorney T...,Rachel Maddow,63.09,8.6,
6,https://www.msnbc.com/transcripts/rachel-maddo...,2022-08-30 01:00:00+00:00,"SummaryThe ODNI is going to assess ""potential ...",SummaryThe ODNI going assess potential risk na...,Rachel Maddow,59.64,9.9,
8,https://www.msnbc.com/transcripts/rachel-maddo...,2022-08-23 01:00:00+00:00,"SummaryInterview with Dr. Anthony Fauci, direc...",SummaryInterview Dr Anthony Fauci director Nat...,Rachel Maddow,59.43,10.0,
...,...,...,...,...,...,...,...,...
642,https://www.foxnews.com/opinion/tucker-carlson...,2019-11-19 13:30:00,Stone found guilty on all seven counts; Epstei...,,Tucker Carlson,67.96,6.7,"Tucker Carlson: Media, federal government toug..."
643,https://www.foxnews.com/opinion/tucker-carlson...,2019-11-15 13:09:00,Media goes into overdrive on impeachment heari...,,Tucker Carlson,66.64,7.2,Tucker Carlson: Trump impeachment inquiry will...
644,https://www.foxnews.com/opinion/tucker-carlson...,2019-11-14 09:24:00,First public impeachment hearings take place.I...,,Tucker Carlson,74.90,6.1,Tucker Carlson: It's never been clear what Dem...
645,https://www.foxnews.com/opinion/tucker-carlson...,2019-11-13 10:13:00,Liberals working to crack down on the First Am...,,Tucker Carlson,66.33,7.3,Tucker Carlson: The end of free speech – What ...


In [66]:
df.to_csv('RM-TC_combined_cleaned.tsv', sep="\t")

In [65]:
grouped = df.groupby('host')

# Find the average Flesch Reading Ease Score across all of the Rachel Maddow and Tucker Carlson transcripts
# Find the average Flesch-Kincaid Grade Level across all of the Rachel Maddow and Tucker Carlson transcripts
grouped.mean()



  grouped.mean()


Unnamed: 0_level_0,flesch_reading_ease,flesch_kincaid_grade_level
host,Unnamed: 1_level_1,Unnamed: 2_level_1
Rachel Maddow,66.315,8.49
Tucker Carlson,72.229104,6.495209
