In [1]:
import pandas as pd
from sqlalchemy import create_engine, text

In [2]:
engine = create_engine("postgresql://postgres@localhost:5432/arxivdb")

# View: Frequent Collaborators

$$
\begin{aligned}
\textbf{R}_1 &\leftarrow 
paperauthor\ (pa1) 
\ \bowtie_{pa1.arxivid = pa2.arxivid \;\land\ pa1.authorid < pa2.authorid}\ 
paperauthor\ (pa2)
\\[12pt]

\textbf{R}_2 &\leftarrow 
R_1 
\ \bowtie_{pa1.authorid = a1.authorid}\ author\ (a1)
\ \bowtie_{pa2.authorid = a2.authorid}\ author\ (a2)
\\[12pt]

Result &\leftarrow 
_{a1.authorid,\ a1.authorname,\ a2.authorid,\ a2.authorname}\;
\mathcal{F}_{\text{count}(arxivid)\ \rightarrow\ papers\_together}(R_2)
\\[12pt]
\end{aligned}
$$

In [3]:
query = """
CREATE MATERIALIZED VIEW mv_frequent_collaborators AS
SELECT
    a1.authorid AS author1_id,
    a1.authorname AS author1_name,
    a2.authorid AS author2_id,
    a2.authorname AS author2_name,
    COUNT(*) AS papers_together
FROM paperauthor pa1
JOIN paperauthor pa2
    ON pa1.arxivid = pa2.arxivid AND pa1.authorid < pa2.authorid
JOIN author a1 ON pa1.authorid = a1.authorid
JOIN author a2 ON pa2.authorid = a2.authorid
GROUP BY a1.authorid, a1.authorname, a2.authorid, a2.authorname;
"""

with engine.connect() as conn:
    conn.execute(text(query))
    conn.commit()

## All authors who frequently collaborated with `Yoshua Bengio`, sorted by the number of papers they co-authored together.

$$
\begin{aligned}
R_1 &\leftarrow mv\_frequent\_collaborators \\[10pt]

R_2 &\leftarrow 
\sigma_{\;author1\_name = :name\;} (R_1) 
\\[10pt]

R_3 &\leftarrow 
\Pi_{\;author1\_name,\; author2\_name,\; papers\_together\;} (R_2)
\\[10pt]

Result &\leftarrow 
_{DESC}
\mathcal{T}_{papers\_together}(R_3)
\end{aligned}
$$

In [4]:
query = """
SELECT author1_name, author2_name, papers_together
FROM mv_frequent_collaborators
WHERE author1_name = :name
ORDER BY papers_together DESC;
"""

df = pd.read_sql(text(query), engine, params={"name": "Yoshua Bengio"})
df.head(10)

Unnamed: 0,author1_name,author2_name,papers_together
0,Yoshua Bengio,Razvan Pascanu,16
1,Yoshua Bengio,Alex Lamb,7
2,Yoshua Bengio,Laurent Dinh,4
3,Yoshua Bengio,Heng Luo,2
4,Yoshua Bengio,Konstantinos Drossos,2
5,Yoshua Bengio,Leslie Pack Kaelbling,2
6,Yoshua Bengio,Guillaume Sicard,1
7,Yoshua Bengio,Yan-Ming Zhang,1
8,Yoshua Bengio,Daniel Jiwoong Im,1


# View: Paper and All Metadata Combined

$$
\begin{aligned}
\textbf{R}_1 &\leftarrow 
paper\ (p) 
\ \bowtie_{p.submitterid = a.authorid}\ author\ (a)
\\[12pt]

\textbf{R}_2 &\leftarrow 
R_1\ \ast\ papercategory\
\\[12pt]

\textbf{R}_3 &\leftarrow 
R_2\ \ast\ category\
\\[12pt]

\textbf{Result} &\leftarrow 
\Pi_{\,p.arxivid,\ p.title,\ p.abstract,\ p.updatedate,\ 
      p.submitterid,\ a.authorname,\ 
      categorycode,\ categoryname\,}(R_3)
\end{aligned}
$$

In [5]:
query = """
CREATE MATERIALIZED VIEW mv_paper_full AS
SELECT
    p.arxivid,
    p.title,
    p.abstract,
    p.updatedate,
    p.submitterid,
    a.authorname AS submitter_name,
    categorycode,
    categoryname
FROM paper p
JOIN author a ON p.submitterid = a.authorid
NATURAL JOIN papercategory
NATURAL JOIN category;
"""

with engine.connect() as conn:
    conn.execute(text(query))
    conn.commit()

## Authors Publishing the Most Deep Learning Papers

$$
\begin{aligned}
R_1 &\leftarrow 
\sigma_{\substack{
\left(
\text{LOWER(title)}\ \text{LIKE}\ \text{`\%deep\ learning\%'} \;\lor\;
\text{LOWER(abstract)}\ \text{LIKE}\ \text{`\%deep\ learning\%'}
\right)
\\[4pt]
\land\;
\text{categorycode} \in 
\{\text{`cs.AI'},\ \text{`cs.CL'},\ \text{`cs.CV'},\ \text{`cs.LG'},\ \text{`cs.IR'},\ \text{`cs.NE'}\}
}}(mv\_paper\_full)
\\[12pt]

R_2 &\leftarrow 
R_1 \; \ast \; paperauthor
\\[12pt]

R_3 &\leftarrow 
R_2 \; \ast \; author
\\[12pt]

R_4 &\leftarrow 
_{authorid,\; authorname}\;
\mathcal{F}_{\text{count}(\text{distinct}(arxivid)) \rightarrow deep\_learning\_papers}(R_3)
\\[12pt]

Result &\leftarrow 
_{DESC}
\mathcal{T}_{deep\_learning\_papers}(R_4)
\end{aligned}
$$


In [6]:
query = """
SELECT
    authorid,
    authorname,
    COUNT(DISTINCT arxivid) AS deep_learning_papers
FROM mv_paper_full
NATURAL JOIN paperauthor
NATURAL JOIN author
WHERE 
    (LOWER(title) LIKE '%deep learning%' 
    OR LOWER(abstract) LIKE '%deep learning%')
    AND categorycode IN ('cs.AI', 'cs.CL', 'cs.CV', 'cs.LG', 'cs.IR', 'cs.NE')
GROUP BY authorid, authorname
ORDER BY deep_learning_papers DESC;
"""

df = pd.read_sql(text(query), engine)
df.head(10)

Unnamed: 0,authorid,authorname,deep_learning_papers
0,749604,Yoshua Bengio,32
1,248744,Jong Chul Ye,18
2,684239,Xiaoou Tang,14
3,269244,Luc Van Gool,13
4,244966,Wei Li,11
5,367416,Ian Reid,11
6,61804,Xiaogang Wang,10
7,732446,Zhangyang Wang,10
8,552212,Nassir Navab,10
9,701650,Yann LeCun,10
