In [1]:
import pandas as pd

In [2]:
d1 = pd.read_csv('data/csv/us_election_2020_1st_presidential_debate.csv')
d2 = pd.read_csv('data/csv/us_election_2020_2nd_presidential_debate.csv')
t = pd.read_csv('data/csv/us_election_2020_trump_town_hall.csv')
b = pd.read_csv('data/csv/us_election_2020_biden_town_hall.csv')
e = pd.read_csv('data/csv/extra_training_data.csv')

In [3]:
d1.loc[d1['speaker'] == 'President Donald J. Trump','speaker'] = 'Donald Trump'
d1.loc[d1['speaker'] == 'Vice President Joe Biden','speaker'] = 'Joe Biden'

In [4]:
t.loc[t['speaker'] == 'President Trump','speaker'] = 'Donald Trump'

In [5]:
def cand_only(df):
    df.drop(df.index[~((df['speaker']=='Donald Trump') | (df['speaker']=='Joe Biden'))], inplace=True)
    df.reset_index(drop=True, inplace=True)

In [6]:
cand_only(d1)
cand_only(d2)
cand_only(t)
cand_only(b)
print('\n')





In [7]:
def drop_min(df):
    df.drop(columns=['minute'],inplace=True)

In [8]:
drop_min(d1)
drop_min(d2)
drop_min(t)
drop_min(b)
print('\n')





In [9]:
frames=[d1,d2,t,b,e]

In [10]:
master_df = pd.concat(frames, ignore_index=True)

In [11]:
df = master_df.copy()

In [12]:
import re
import string

def text_process(text):
  text = text.lower() ##Convert into lower
  text = re.sub('\[.*?\]','',text) ## Removing brackets
  text = re.sub('[%s]' % re.escape(string.punctuation), '' , text)  ## remove punctuations
  text = re.sub('\w*\d\w*','',text)
  return text

In [13]:
df['text'] = df['text'].apply(text_process)

In [14]:
df.head()

Unnamed: 0,speaker,text
0,Joe Biden,how you doing man
1,Donald Trump,how are you doing
2,Joe Biden,i’m well
3,Donald Trump,thank you very much chris i will tell you very...
4,Donald Trump,and we won the election and therefore we have ...


In [15]:
##Model Training

from sklearn.model_selection import train_test_split

ID_V = df.text ##ID_V  = Independent Var
D_V = df.speaker #D_V = Dependent Var

IDV_train, IDV_test, DV_train, DV_test = train_test_split(ID_V, D_V, test_size=0.2, random_state=42)

print('IDV_train:',len(IDV_train))
print('IDV_test:',len(IDV_test))
print('DV_train:',len(DV_train))
print('DV_test:',len(DV_test))

IDV_train: 2935
IDV_test: 734
DV_train: 2935
DV_test: 734


In [16]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression

vec = TfidfVectorizer()
reg = LogisticRegression(solver = 'lbfgs')

from sklearn.pipeline import Pipeline

In [17]:
model = Pipeline([('vectorizer',vec),('classifier',reg)])

model.fit(IDV_train,DV_train)

Pipeline(steps=[('vectorizer', TfidfVectorizer()),
                ('classifier', LogisticRegression())])

In [18]:
from sklearn.metrics import confusion_matrix
predictions = model.predict(IDV_test)
confusion_matrix(predictions, DV_test)

array([[238,  19],
       [ 85, 392]])

In [19]:
accuracy = round(((238+392)/(238+392+19+85))*100,2)
accuracy

85.83

### Let us check the model's predictions for some of Joe Biden's quotes.

In [20]:
ex = ["Leadership, at its core, in my view, is about being personal.",

"It's time to put away the harsh rhetoric, lower the temperature, see each other again ... we have to stop treating our opponents like enemies",

"On Monday, I will name a group of leading scientists and experts as transition advisors to help take the Biden-Harris COVID plan and convert it into an action blueprint that will start on January 20th, 2021",

"Already more than 220,000 people in the US have lost their lives to this virus. Worse yet, a new study from Columbia University suggests that anywhere between 130,000 and 210,000 of those deaths were avoidable",

"The good news is that people know me… The bad news is, they know me.",

"If we follow the science and keep faith with one another, I promise you, we'll get through this and come out the other side much faster than the rate we're going now. Look, you all know this. The American people have always given their best to this country in times of crisis. And this time isn't any different,"]
result = model.predict(ex)
result

array(['Joe Biden', 'Joe Biden', 'Joe Biden', 'Joe Biden', 'Donald Trump',
       'Joe Biden'], dtype=object)

In [21]:
ex=['I sought this office to restore the soul of America. To rebuild the backbone of the nation — the middle class. To make America respected around the world again and to unite us here at home.']
result = model.predict(ex)
result

array(['Joe Biden'], dtype=object)

In [22]:
ex=['We all agreed that we want to get the economy back on track. We need our workers to be back on the job by getting the virus under control. We’re going in a very dark winter. Things are going to get much tougher before they get easier.']
result = model.predict(ex)
result

array(['Joe Biden'], dtype=object)

In [23]:
ex=['Well, I hope there’s going to be a lot of people who vote for me because of who I am, but I think the contrast between Donald Trump and me is about as stark as it can get in terms of our value set and how we view the world.']
result = model.predict(ex)
result

array(['Joe Biden'], dtype=object)

In [24]:
ex=['Right now, the biggest domestic issue is our health. Right now, COVID. COVID, the way he’s handling COVID is just absolutely totally irresponsible. He’s telling people that we’ve turned the bend in one of his recent rallies. Well, he’s gone, and as my grandpop would say, he’s gone round the bend. I mean, we are in real trouble.']
result = model.predict(ex)
result

array(['Joe Biden'], dtype=object)

In [25]:
ex=['You’d don’t have to lock down the economy. It depends on the community. It depends on where it’s in real trouble. And you have to do things that make sense that make it easier for people to avoid being exposed. Freedom is about making sure that you care about the people you’re around that they be free too. It’s a patriotism to put this mask on.']
result = model.predict(ex)
result

array(['Joe Biden'], dtype=object)

### Let us check the model's predictions for some of Donald Trump's quotes.

In [26]:
ex = ["We have it totally under control. It's one person coming in from China. It's going to be just fine",

"I like this stuff. I really get it. People are surprised that I understand it... Every one of these doctors said, 'How do you know so much about this?' Maybe I have a natural ability. Maybe I should have done that instead of running for president",

"Then I see the disinfectant which knocks it out in a minute. One minute. And is there a way we can do something like that, by injection inside for almost a cleaning? Because you see it gets in the lungs and it does a tremendous number on the lungs, so it'd be interesting to check that.",

"When we have a lot of cases, I don't look at that as a bad thing, I look at that as, in a certain respect, as being a good thing... Because it means our testing is much better. I view it as a badge of honor, really, it's a badge of honor.",

"We're twenty-sixth in the world. Twenty-five countries are better than us at education. And some of them are like third world countries. But we're becoming a third world country."]
result = model.predict(ex)
result

array(['Donald Trump', 'Donald Trump', 'Donald Trump', 'Donald Trump',
       'Joe Biden'], dtype=object)

In [27]:
ex=['I will not go. This administration will not be going to a lockdown. Hopefully, whatever happens in the future, who knows which administration it will be. I guess time will tell, but I can tell you, this administration will not go to a lockdown. They won’t be a necessity.']
result = model.predict(ex)
result 

array(['Joe Biden'], dtype=object)

In [28]:
ex=['Democrats are the party of the big donors. The big media, the big tech, it seems, and Republicans have become the party of the American worker, and that’s what’s happened. And we’re also, I believe the party of inclusion. As everyone now recognizes, media polling was election interference in the truest sense of that word, by powerful special interests.']
result = model.predict(ex)
result

array(['Donald Trump'], dtype=object)

In [29]:
ex=['They had him up four points in Ohio and they were off by 12.2 points, and I also won Ohio, great state of Ohio very easily. And the Washington Post said, “Biden up 17 points in Wisconsin,” and it was basically even. They were off by about 17 points, and they knew that, they’re not stupid people. They knew that. Suppression.']
result = model.predict(ex)
result

array(['Donald Trump'], dtype=object)

In [30]:
ex=['In Philadelphia, observers have been kept far away, very far away. So far that people are using binoculars to try and see, and there’s been tremendous problems caused. They put a paper on all of the windows so you can’t see in, and the people that are banned are very unhappy and become somewhat violent.']
result = model.predict(ex)
result

array(['Donald Trump'], dtype=object)

In [31]:
ex=['One of the worst trade deals ever, how anybody could have signed it, but worse, how anybody could have let it run for 25 years or whatever it was. They just took advantage of us. We had 60,000 empty plants and factories in our country by the time that mess got finished. So we just signed a great deal, USMCA. It’s the largest trade deal ever made, Mexico, Canada, the largest ever made.']
result = model.predict(ex)
result

array(['Donald Trump'], dtype=object)

In [32]:
ex=['In fact, I’m going to have to be very nice to UPS. UPS, I love you, Carol, wherever you are, Carol. I love you, Carol. Now it’s very bad what’s going on with mail-in ballots. Okay? As differentiated from absentee ballots where you have to go and you go through a process because you can’t be there for some reason, but the mail-in ballots is going to be, they’re going to be rigged. They’re going to be a terrible situation.']
result = model.predict(ex)
result

array(['Donald Trump'], dtype=object)

In [33]:
ex=['We’re here today to celebrate and expand our historic campaign to rescue American workers from job-killing regulations. Before I came into office, American workers were smothered by merciless avalanche of wasteful and expensive and intrusive federal regulation. These oppressive burdensome mandates were a stealth tax on our people, slashing take home pay, suppressing innovation, surging the cost of goods, and shipping millions of American jobs overseas. ']
result = model.predict(ex)
result

array(['Joe Biden'], dtype=object)

In [34]:
ex=['We’re bringing back consumer choice in home appliances so that you can buy washers and dryers, shower heads and faucets. So shower heads, you take a shower, the water doesn’t come out. You want to wash your hands, the water doesn’t come out.']
result = model.predict(ex)
result

array(['Joe Biden'], dtype=object)

In [35]:
ex=['Because my hair, I don’t know about you, but it has to be perfect. Perfect. Dishwashers, you didn’t have any water so the people that do the dishes, you press it and it goes again and you do it again and again. ']
result = model.predict(ex)
result

array(['Donald Trump'], dtype=object)

In [36]:
ex=['But there and then, we got hit by the virus that came from China. We’ve made a lot of progress. Our strategy is moving along well. It goes out in one area and rears back its ugly face in another area.']
result = model.predict(ex)
result

array(['Donald Trump'], dtype=object)

In [37]:
ex=['But the second debate in particular, I think was something that worked out very well. And then we did the… We got back, we’re lucky we got it back, because for a period of time we did not think we were going to get these big rallies back. And we got them back and they were amazing. I mean, they were amazing. And you add it all together, and I think we really took off.']
result = model.predict(ex)
result

array(['Donald Trump'], dtype=object)

In [38]:
ex=['Well, it’s become pretty standard. We built the greatest Academy in the world. It was horribly interrupted by something that should’ve never happened. Came in from China, the plague, the plague from China, the horrible situation. And we had to shut it down and we had to learn about it. We had to shut it down and we did the right thing. We saved two million people. We saved a lot of people. The original model was 2.2 million people. And so if you use that model, and you go, that was a main model, you’ve used that model. We saved two million people, and now we’re building it back up again. And we had the best economy. Nobody had ever seen anything like it. And now what happens is we build it up again and we are doing it at a record clip.']
result = model.predict(ex)
result

array(['Donald Trump'], dtype=object)

In [39]:
ex=['So I’m doing a big series of phone calls and to some really good people. And you’re the first one, by the way, but I’m doing a big series of calls. And in doing the calls, I’m going to be talking to some people that really, that have been very important. Both to me, important, some very important calls and some people that have been very loyal to me over the years. I like those people too. Even if they reach about 15 people, that’s okay with me.']
result = model.predict(ex)
result

array(['Donald Trump'], dtype=object)