In [2]:
#default_exp rss_feeds 

In [3]:
#export
from operator import itemgetter

import numpy as np
import tqdm
import bs4
import feedparser
import seaborn as sns
import pandas as pd

In [4]:
%cd ..

/home/piotr/Documents/pytorch_hackathon


In [5]:
pd.set_option("display.max_colwidth", 250)

# RSS Feeds


In [6]:
#export
medium_publications = [
    'the-artificial-impostor',
    'pytorch',
    'dair.ai',
    'towards-artificial-intelligence',
    'swlh',
    '@ODSC',
    'doctrine',
    'paperswithcode'
]


medium_url_template = 'https://medium.com/feed/{}'
medium_url = medium_url_template.format(medium_publications[0])
medium_urls = [medium_url_template.format(publication) for publication in medium_publications]

In [7]:
#export
subreddits = [
    'MachineLearning',
    'deeplearning',
    'datascience',
    'cognitivelinguistics',
    'TopOfArxivSanity',
    'kaggle'
]

reddit_url_template = 'https://www.reddit.com/r/{}/.rss'
reddit_url = reddit_url_template.format(subreddits[0])
reddit_urls = [reddit_url_template.format(subreddit) for subreddit in subreddits]

In [8]:
#export


def get_article_text(article):
    article_html_content = article['content'][0]['value']
    article_text = bs4.BeautifulSoup(article_html_content).text
    return article_text


def get_feed_article_texts(feed):
    return [get_article_text(article) for article in feed['entries'] if 'content' in article.keys()]


def get_feed_article_df(feed):
    feed_df = pd.DataFrame.from_records(feed['entries'])
    feed_df['text'] = feed_df['summary'].apply(lambda s: bs4.BeautifulSoup(s).text)
    return feed_df


def add_field(df, field_name, values):
    df[field_name] = values
    return df

In [9]:
#export
paperswithcode_url = 'https://us-east1-ml-feeds.cloudfunctions.net/pwc/latest' 
hackernews_url = 'https://news.ycombinator.com/rss' 
rss_feed_urls = [paperswithcode_url, hackernews_url] + medium_urls + reddit_urls

In [10]:
feed_urls = pd.read_table('data/feeds.txt', header=None).iloc[:,0].values

In [11]:
for url in rss_feed_urls:
    print(url)

https://us-east1-ml-feeds.cloudfunctions.net/pwc/latest
https://news.ycombinator.com/rss
https://medium.com/feed/the-artificial-impostor
https://medium.com/feed/pytorch
https://medium.com/feed/dair.ai
https://medium.com/feed/towards-artificial-intelligence
https://medium.com/feed/swlh
https://medium.com/feed/@ODSC
https://medium.com/feed/doctrine
https://medium.com/feed/paperswithcode
https://www.reddit.com/r/MachineLearning/.rss
https://www.reddit.com/r/deeplearning/.rss
https://www.reddit.com/r/datascience/.rss
https://www.reddit.com/r/cognitivelinguistics/.rss
https://www.reddit.com/r/TopOfArxivSanity/.rss
https://www.reddit.com/r/kaggle/.rss


### Loading RSS feed articles to a dataframe

In [21]:
#export
def get_feed_df(feed_urls):
    feeds = [
        (feed_url, feedparser.parse(feed_url))
        for feed_url in tqdm.tqdm(feed_urls)
    ]
    df = pd.concat(
        [
            add_field(get_feed_article_df(feed), 'feed', feed_url)
            for (feed_url, feed) in feeds
            if len(feed['entries']) > 0
        ]
    )
    
    df['date'] = df['updated']
    df['date'] = df['date'].fillna(df['published'])
    
    return df
#feedparser.parse('https://news.ycombinator.com/rss')

In [18]:
feed_df = get_feed_df(rss_feed_urls)

100%|██████████| 16/16 [00:09<00:00,  1.63it/s]


In [20]:
feed_df.shape

(296, 23)

In [24]:
feed_df.head()

Unnamed: 0,title,title_detail,links,link,summary,summary_detail,id,guidislink,tags,text,...,comments,authors,author,author_detail,updated,updated_parsed,content,href,media_thumbnail,date
0,Automated Temporal Equilibrium Analysis: Verification and Synthesis of Multi-Player Games,"{'type': 'text/plain', 'language': None, 'base': 'https://us-east1-ml-feeds.cloudfunctions.net/pwc/latest', 'value': 'Automated Temporal Equilibrium Analysis: Verification and Synthesis of Multi-Player Games'}","[{'rel': 'alternate', 'type': 'text/html', 'href': 'https://paperswithcode.com/paper/automated-temporal-equilibrium-analysis'}]",https://paperswithcode.com/paper/automated-temporal-equilibrium-analysis,"In the context of multi-agent systems, the rational verification problem is concerned with checking which temporal logic properties will hold in a system when its constituent agents are assumed to behave rationally and strategically in pursuit of...","{'type': 'text/html', 'language': None, 'base': 'https://us-east1-ml-feeds.cloudfunctions.net/pwc/latest', 'value': 'In the context of multi-agent systems, the rational verification problem is concerned with checking which temporal logic properti...",https://paperswithcode.com/paper/automated-temporal-equilibrium-analysis,False,,"In the context of multi-agent systems, the rational verification problem is concerned with checking which temporal logic properties will hold in a system when its constituent agents are assumed to behave rationally and strategically in pursuit of...",...,,,,,,,,,,
1,Revisiting Temporal Modeling for Video Super-resolution,"{'type': 'text/plain', 'language': None, 'base': 'https://us-east1-ml-feeds.cloudfunctions.net/pwc/latest', 'value': 'Revisiting Temporal Modeling for Video Super-resolution'}","[{'rel': 'alternate', 'type': 'text/html', 'href': 'https://paperswithcode.com/paper/revisiting-temporal-modeling-for-video-super'}]",https://paperswithcode.com/paper/revisiting-temporal-modeling-for-video-super,"Video super-resolution plays an important role in surveillance video analysis and ultra-high-definition video display, which has drawn much attention in both the research and industrial communities. <p>Code: <a href=""https://github.com/junpan19/R...","{'type': 'text/html', 'language': None, 'base': 'https://us-east1-ml-feeds.cloudfunctions.net/pwc/latest', 'value': 'Video super-resolution plays an important role in surveillance video analysis and ultra-high-definition video display, which has ...",https://paperswithcode.com/paper/revisiting-temporal-modeling-for-video-super,False,"[{'term': 'Video super-resolution', 'scheme': None, 'label': None}]","Video super-resolution plays an important role in surveillance video analysis and ultra-high-definition video display, which has drawn much attention in both the research and industrial communities. Code: https://github.com/junpan19/RRN",...,,,,,,,,,,
2,Learning Temporally Invariant and Localizable Features via Data Augmentation for Video Recognition,"{'type': 'text/plain', 'language': None, 'base': 'https://us-east1-ml-feeds.cloudfunctions.net/pwc/latest', 'value': 'Learning Temporally Invariant and Localizable Features via Data Augmentation for Video Recognition'}","[{'rel': 'alternate', 'type': 'text/html', 'href': 'https://paperswithcode.com/paper/learning-temporally-invariant-and-localizable'}]",https://paperswithcode.com/paper/learning-temporally-invariant-and-localizable,"Based on our novel temporal data augmentation algorithms, video recognition performances are improved using only a limited amount of training data compared to the spatial-only data augmentation algorithms, including the 1st Visual Inductive Prior...","{'type': 'text/html', 'language': None, 'base': 'https://us-east1-ml-feeds.cloudfunctions.net/pwc/latest', 'value': 'Based on our novel temporal data augmentation algorithms, video recognition performances are improved using only a limited amount...",https://paperswithcode.com/paper/learning-temporally-invariant-and-localizable,False,"[{'term': 'Action recognition', 'scheme': None, 'label': None}, {'term': 'Data augmentation', 'scheme': None, 'label': None}, {'term': 'Video recognition', 'scheme': None, 'label': None}]","Based on our novel temporal data augmentation algorithms, video recognition performances are improved using only a limited amount of training data compared to the spatial-only data augmentation algorithms, including the 1st Visual Inductive Prior...",...,,,,,,,,,,
3,Hybrid Dynamic-static Context-aware Attention Network for Action Assessment in Long Videos,"{'type': 'text/plain', 'language': None, 'base': 'https://us-east1-ml-feeds.cloudfunctions.net/pwc/latest', 'value': 'Hybrid Dynamic-static Context-aware Attention Network for Action Assessment in Long Videos'}","[{'rel': 'alternate', 'type': 'text/html', 'href': 'https://paperswithcode.com/paper/hybrid-dynamic-static-context-aware-attention'}]",https://paperswithcode.com/paper/hybrid-dynamic-static-context-aware-attention,"However, most existing works focus only on video dynamic information (i. e., motion information) but ignore the specific postures that an athlete is performing in a video, which is important for action assessment in long videos. <p>Code: <a href=...","{'type': 'text/html', 'language': None, 'base': 'https://us-east1-ml-feeds.cloudfunctions.net/pwc/latest', 'value': 'However, most existing works focus only on video dynamic information (i. e., motion information) but ignore the specific postures...",https://paperswithcode.com/paper/hybrid-dynamic-static-context-aware-attention,False,"[{'term': 'Action quality assessment', 'scheme': None, 'label': None}]","However, most existing works focus only on video dynamic information (i. e., motion information) but ignore the specific postures that an athlete is performing in a video, which is important for action assessment in long videos. Code: https://git...",...,,,,,,,,,,
4,Weakly Supervised Generative Network for Multiple 3D Human Pose Hypotheses,"{'type': 'text/plain', 'language': None, 'base': 'https://us-east1-ml-feeds.cloudfunctions.net/pwc/latest', 'value': 'Weakly Supervised Generative Network for Multiple 3D Human Pose Hypotheses'}","[{'rel': 'alternate', 'type': 'text/html', 'href': 'https://paperswithcode.com/paper/weakly-supervised-generative-network-for'}]",https://paperswithcode.com/paper/weakly-supervised-generative-network-for,"In this paper, we propose a weakly supervised deep generative network to address the inverse problem and circumvent the need for ground truth 2D-to-3D correspondences. <p>Code: <a href=""https://github.com/chaneyddtt/weakly-supervised-3d-pose-gene...","{'type': 'text/html', 'language': None, 'base': 'https://us-east1-ml-feeds.cloudfunctions.net/pwc/latest', 'value': 'In this paper, we propose a weakly supervised deep generative network to address the inverse problem and circumvent the need for ...",https://paperswithcode.com/paper/weakly-supervised-generative-network-for,False,"[{'term': '3d human pose estimation', 'scheme': None, 'label': None}]","In this paper, we propose a weakly supervised deep generative network to address the inverse problem and circumvent the need for ground truth 2D-to-3D correspondences. Code: https://github.com/chaneyddtt/weakly-supervised-3d-pose-generator",...,,,,,,,,,,
