### Importing and preparing classifier

In [1]:
from bs4 import BeautifulSoup as bs
import pandas as pd
from selenium import webdriver
import pickle

In [2]:
with open('models/CARDS_Logistic_Classifier.pkl', 'rb') as f:
    logit = pickle.load(f)

vectorizer = logit['vectorizer']
clf = logit['clf']
le = logit['label_encoder']

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
  logit = pickle.load(f)
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


### Classifying AEI claims

In [3]:
aei_df = pd.read_csv("scrapers/data/AEI.csv")
print(aei_df.shape)
aei_df.head()

(29, 7)


Unnamed: 0,TITLE,AUTHOR,CONTENT,DATE,TYPE,THINK_TANK,LINK
0,From Environmentalism to Climate Catastrophism...,Ruy Teixeira,Key Points\n\nThe environmental movement has a...,"May 10, 2023",Report,AEI,https://www.aei.org/research-products/report/f...
1,Letter: Electric Vehicles Won’t Save the Climate,Benjamin Zycher,Note: This letter to the editor appeared in th...,"April 26, 2023",Article,AEI,https://www.aei.org/articles/letter-electric-v...
2,Earth Day 2023: Utterly Bereft of Ideas,Benjamin Zycher,Earth Day falls on April 22 — Lenin’s birthday...,"April 21, 2023",Article,AEI,https://www.aei.org/articles/earth-day-2023-ut...
3,OPEC Remains an Obstacle for Biden’s Green-Ene...,"Benjamin Zycher, James Pethokoukis",Can it surprise anyone that the multiple Biden...,"April 17, 2023",Article,AEI,https://www.aei.org/articles/opec-remains-an-o...
4,Comment to the Department of Energy: Proposed ...,Benjamin Zycher,Summary\n\nThe estimates on benefits and costs...,"April 14, 2023",Testimony,AEI,https://www.aei.org/research-products/testimon...


In [4]:
aei_df["CONTENT"] = aei_df["CONTENT"].str.split("\n\n") #Split each entry into a list of paragraphs.

In [5]:
classifications = []
for paragraphs in aei_df["CONTENT"]:
    if type(paragraphs) is float: #Some empty parapgrahs return an empty float instead of a list. No claim. 
        classifications.append([])
    else:
        X = vectorizer.transform(paragraphs) 
        predictions = clf.predict(X) #Predict the claim of each paragraph

        claim_classifications = list(le.inverse_transform(predictions))
        classifications.append(claim_classifications)  

aei_df["CLAIMS"] = classifications #adds a row with a list of the claims made corresponding to each paragraph in the article.
aei_df.head()

Unnamed: 0,TITLE,AUTHOR,CONTENT,DATE,TYPE,THINK_TANK,LINK,CLAIMS
0,From Environmentalism to Climate Catastrophism...,Ruy Teixeira,"[Key Points, The environmental movement has al...","May 10, 2023",Report,AEI,https://www.aei.org/research-products/report/f...,"[0_0, 0_0, 0_0, 0_0, 0_0, 0_0, 0_0, 0_0, 0_0, ..."
1,Letter: Electric Vehicles Won’t Save the Climate,Benjamin Zycher,[Note: This letter to the editor appeared in t...,"April 26, 2023",Article,AEI,https://www.aei.org/articles/letter-electric-v...,"[0_0, 4_2, 0_0, 0_0]"
2,Earth Day 2023: Utterly Bereft of Ideas,Benjamin Zycher,[Earth Day falls on April 22 — Lenin’s birthda...,"April 21, 2023",Article,AEI,https://www.aei.org/articles/earth-day-2023-ut...,"[0_0, 0_0, 0_0, 0_0, 0_0, 0_0, 0_0, 0_0, 4_4, ..."
3,OPEC Remains an Obstacle for Biden’s Green-Ene...,"Benjamin Zycher, James Pethokoukis",[Can it surprise anyone that the multiple Bide...,"April 17, 2023",Article,AEI,https://www.aei.org/articles/opec-remains-an-o...,"[0_0, 0_0, 0_0, 0_0, 5_1, 0_0, 0_0, 0_0]"
4,Comment to the Department of Energy: Proposed ...,Benjamin Zycher,"[Summary, The estimates on benefits and costs ...","April 14, 2023",Testimony,AEI,https://www.aei.org/research-products/testimon...,"[0_0, 0_0, 0_0, 5_1, 0_0, 0_0]"


### Heartland

In [6]:
heartland_df = pd.read_csv("scrapers/data/heartland.csv")
print(heartland_df.shape)
heartland_df.head()

(249, 7)


Unnamed: 0,TITLE,AUTHOR,CONTENT,DATE,TYPE,THINK_TANK,LINK
0,Research & Commentary: Anti-Economic Boycott B...,Tim Benson,Legislation in the Alabama Senate (SB 261) wou...,"May 2, 2023",PUBLICATION,Heartland,https://heartland.org/publications/research-co...
1,Research & Commentary: Anti-ESG Bill in North ...,"Tim Benson, Jack McPherrin",Legislation in the North Carolina House of Rep...,"May 2, 2023",PUBLICATION,Heartland,https://heartland.org/publications/research-co...
2,Energy at a Glance: Fracking for Oil and Natur...,Linnea Lueken,"For the full PDF of this policy tip sheet, cli...","April 26, 2023",PUBLICATION,Heartland,https://heartland.org/publications/energy-at-a...
3,"Environmental, Social, and Governance (ESG) Sc...",Jack McPherrin,"Environmental, social, and governance (ESG) sc...","April 26, 2023",PUBLICATION,Heartland,https://heartland.org/publications/environment...
4,Research & Commentary: EPA Finds Greenhouse Ga...,Tim Benson,The U.S. Environmental Protection Agency’s (EP...,"April 13, 2023",PUBLICATION,Heartland,https://heartland.org/publications/research-co...


In [7]:
heartland_df["CONTENT"] = heartland_df["CONTENT"].str.split("\n\n")

In [8]:
classifications = []
for paragraphs in heartland_df["CONTENT"]:
    if type(paragraphs) is float: #Some empty parapgrahs return an empty float instead of a list. No claim.
        classifications.append([])
    else:
        X = vectorizer.transform(paragraphs)
        predictions = clf.predict(X)

        claim_classifications = list(le.inverse_transform(predictions))
        classifications.append(claim_classifications)   
heartland_df["CLAIMS"] = classifications #adds a row with a list of the claims made corresponding to each paragraph in the article.
heartland_df.head()

Unnamed: 0,TITLE,AUTHOR,CONTENT,DATE,TYPE,THINK_TANK,LINK,CLAIMS
0,Research & Commentary: Anti-Economic Boycott B...,Tim Benson,[Legislation in the Alabama Senate (SB 261) wo...,"May 2, 2023",PUBLICATION,Heartland,https://heartland.org/publications/research-co...,"[0_0, 0_0, 0_0, 0_0, 0_0, 0_0, 0_0, 0_0, 0_0, ..."
1,Research & Commentary: Anti-ESG Bill in North ...,"Tim Benson, Jack McPherrin",[Legislation in the North Carolina House of Re...,"May 2, 2023",PUBLICATION,Heartland,https://heartland.org/publications/research-co...,"[0_0, 0_0, 0_0, 0_0, 0_0, 0_0, 0_0, 0_0, 0_0, ..."
2,Energy at a Glance: Fracking for Oil and Natur...,Linnea Lueken,"[For the full PDF of this policy tip sheet, cl...","April 26, 2023",PUBLICATION,Heartland,https://heartland.org/publications/energy-at-a...,"[0_0, 0_0, 0_0, 4_5, 0_0, 0_0, 4_5, 0_0, 0_0]"
3,"Environmental, Social, and Governance (ESG) Sc...",Jack McPherrin,"[Environmental, social, and governance (ESG) s...","April 26, 2023",PUBLICATION,Heartland,https://heartland.org/publications/environment...,"[0_0, 0_0, 0_0, 0_0, 0_0, 0_0]"
4,Research & Commentary: EPA Finds Greenhouse Ga...,Tim Benson,[The U.S. Environmental Protection Agency’s (E...,"April 13, 2023",PUBLICATION,Heartland,https://heartland.org/publications/research-co...,"[0_0, 3_3, 0_0, 4_5, 0_0, 0_0, 0_0, 0_0, 4_5, ..."
