In [None]:
import numpy as np
import pandas as pd
import os
from newspaper import Article
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator

import matplotlib.pyplot as plt
% matplotlib inline


In [2]:
CATEGORY_MAPPINGS = {
    "The Hype": "Concerns & Hype",
    "The Panic": "Concerns & Hype",
    "The good coverage": "Advances & Business",
    "Expert Opinions & Discussion within the field": "Expert Opinions & Discussion within the field",
    "Explainers": "Explainers",
    "AI Advances": "Advances & Business",
    "AI Worries": "Concerns & Hype",
    "Advances & Business": "Advances & Business",
    "Concerns & Hype": "Concerns & Hype",
    "Analysis & Policy": "Analysis & Policy",
    "Mini Briefs": "Mini Briefs"
}

In [None]:
def parse_file(file_name):
    with open(file_name,'r') as f:
        current_category = None
        articles = []
        for line in f:
            for c in CATEGORY_MAPPINGS.keys():
                if c in line:
                    current_category = CATEGORY_MAPPINGS[c]
            if current_category and '[' in line and '(' in line:
                title = line.split('[')[1].split(']')[0]
                url = line.split('(')[1].split(')')[0]
                if len(title.split(' '))<4:
                    continue
                print(title)
                if ' - ' in line:
                    excerpt = line.split(' - ')[1].strip()
                else:
                    excerpt = ''
                article = Article(url)
                try: 
                    article.download()
                    article.parse()
                    authors = article.authors
                    date = article.publish_date
                    text = article.text
                    top_image = article.top_image
                    article.nlp()
                    keywords = article.keywords
                    summary = article.summary
                except:
                    authors=None
                    date=None
                    text=None
                    keywords=[]
                    summary=''
                articles.append([str(current_category), 
                                 title, 
                                 date, 
                                 url, 
                                 excerpt, 
                                 authors, 
                                 keywords, 
                                 summary,
                                 text])
    return articles

In [None]:
all_articles = []
category_counts = {}
for file_name in os.listdir('.'):
    if 'py' in file_name or 'this' in file_name:
        continue
    name_parts = file_name.split('.')[0].split('-')
    year = int(name_parts[0])
    month = int(name_parts[1])
    day = int(name_parts[2])
    edition = int(name_parts[3])
    articles = parse_file(file_name)
    all_articles+=articles
    for article in articles:
        if article[0] not in category_counts:
            category_counts[article[0]]=0
        category_counts[article[0]]+=1
print(len(all_articles))
print(category_counts)

DeepMind touts predictive healthcare AI ‘breakthrough’ trained on heavily skewed data
This AI detects 11 types of emotions from a selfie
EmoNet
America is drowning in garbage. Now robots are being put on duty to help solve the recycling crisis
Artificial Intelligence Can Now Create Perfumes, Even Without A Sense Of Smell
NASA's Robotic Arm will Work Just Like a Human Geologist on Mars
This autonomous bicycle shows China’s rising AI chip expertise
A new tool uses AI to spot text written by AI
Science Goes Too Far, Creates AI That Turns You Into an Anime Character
China has started a grand experiment in AI education. It could reshape how the world learns.
Reducing malicious use of synthetic media research
Toby Walsh, A.I. Expert, Is Racing to Stop the Killer Robots
California Police Are Sharing Facial Recognition Databases to ID Suspects
Huge computing power ‘can deliver human-level AI in 5 years’
A team of AI algorithms just crushed humans in a complex computer game
managing to beat a t

Using AI to predict breast cancer and personalize care
Stanford’s ‘QuizBot’ – a chatbot that teaches – beats flashcards for learning factual information
Smart Interfaces for Human-Centered AI
Microsoft Word uses AI to improve your writing
Live transcription and captioning in Android are a boon to the hearing-impaired
Collision-Detecting Suitcase, Wayfinding App Help Blind People Navigate Airports
The Future of Journalism: Will Robots Get it Right?
UK faces £90bn bill to retrain one million workers who face losing jobs to robots
Robots Edge Closer to Unloading Trucks in Amazon-Era Milestone
Robots don't destroy jobs, they create them: Amazon
Automakers Are Rethinking the Timetable for Fully Autonomous Cars
Don’t let industry write the rules for AI
Things you only know if you’re an AI research scientist
ChinAI #49: Rebuttal to FT Articles on Western-Chinese AI collaborations
AI Needs More Why
Ethical analysis of the open-sourcing of a state-of-the-art conversational AI
How to build a Sta

Google Head of Ethical AI Research on Data Biases and Ethics
In 2020, let's stop AI ethics-washing and actually do something
AI shows promise for breast cancer screening
Baidu has a new trick for teaching AI the meaning of language
Japan Loves Robots, but Getting Them to Do Human Work Isn't Easy
Robotics Trends to Watch in 2020: Our 8 Big Predictions
AI creativity will bloom in 2020, all thanks to true web machine learning
While Americans Worry About The AI Uprising, People In Japan Are Learning To Love Their Robots — And Be Loved Back
Bringing artificial intelligence and MIT to middle school classrooms
Cerebras’s Giant Chip Will Smash Deep Learning’s Speed Barrier
ByteDance & TikTok have secretly built a Deepfakes maker
Google DeepMind’s AI-based breast cancer detection is not yet an automatic diagnostician
Don’t Stress About AI Taking Your Job—Humans Will Do That Instead
China should step up regulation of artificial intelligence in finance, think tank says
Illinois says you should kn

UPS is buying thousands of electric vans and teaming up with Waymo to accelerate the future of delivery
Smarter Delivery Hinges on Smarter Robots
Facial Recognition Startup Clearview AI Is Struggling To Address Complaints As Its Legal Issues Mount


In [None]:
print(len(all_articles))
print(category_counts)

In [None]:
df = pd.DataFrame(all_articles, columns =['category', 'title', 'date', 'url', 'excerpt', 'authors', 'keywords', 'summary', 'text']) 