# PostgreSQL

In [4]:
import psycopg2
conn = psycopg2.connect(database="test", user="pi", password="")

In [5]:
cur = conn.cursor()

## Check Schema

In [6]:
cur.execute("""SELECT table_name FROM information_schema.tables WHERE table_schema='public' ORDER BY table_name""")
[x for x in cur.fetchall()]

[('joineddate',), ('people',), ('posts',), ('user_comments',), ('users',)]

In [7]:
cur.description

(Column(name='table_name', type_code=1043, display_size=None, internal_size=-1, precision=None, scale=None, null_ok=None),)

## Create tables

In [None]:
# user_comments table
cur.execute("""
CREATE TABLE user_comments(
    id integer PRIMARY KEY,
    username text,
    posted_datetime timestamp,
    comments text
)
""")
conn.commit()

In [None]:
#posts table
cur.execute("""
CREATE TABLE posts(
    username VARCHAR (255) PRIMARY KEY,
    numposts integer
)
""")
conn.commit()

In [None]:
#joinedDate table
cur.execute("""
CREATE TABLE joinedDate(
    username VARCHAR (255) PRIMARY KEY,
    joined_date timestamp
)
""")
conn.commit()

In [None]:
#users table
cur.execute("""
CREATE TABLE users(
    username VARCHAR (255) PRIMARY KEY,
    numposts integer,
    joined timestamp
)
""")
conn.commit()

## Load data

In [None]:
conn = psycopg2.connect(database="test", user="pi", password="")
cur = conn.cursor()
with open('comma_delim_clean.csv', 'r') as f:
    next(f)  # Skip the header row.
    cur.copy_from(f, 'testEight', sep=',')
    
conn.commit()

In [None]:
conn = psycopg2.connect(database="test", user="pi", password="")
cur = conn.cursor()
with open('users.csv', 'r') as f:
    next(f)  # Skip the header row.
    cur.copy_from(f, 'users', sep=',')
    
conn.commit()

In [None]:
# build posts table from unique usernames and number of posts
cur.execute('INSERT INTO posts(username, numposts) SELECT username, count(DISTINCT comments) AS num_posts FROM user_comments GROUP BY username')
conn.commit()

In [None]:
# build joined table from unique usernames, post_counts, and first post date
# cur.execute("""INSERT INTO users SELECT p.username, p.numposts, j.joined_date FROM posts p INNER JOIN joinedDate j ON p.username = j.username ORDER BY p.numposts DESC""")
# conn.commit()

In [None]:
# build users table from unique usernames, post_counts, and first post date
cur.execute("""INSERT INTO users SELECT p.username, p.numposts, j.joined_date FROM posts p INNER JOIN joinedDate j ON p.username = j.username ORDER BY p.numposts DESC""")
conn.commit()

In [15]:
cur.execute("""ALTER TABLE users ADD COLUMN is_vip boolean""")
conn.commit()

In [None]:
cur.execute("""UPDATE users SET is_vip = False""")

### Queries

---
**Question 3**

> You’ve cleaned and imported the file above into the database successfully. Great! The data is stored in a table called user_comments. Now you want to list the top-10 most prolific posters, by username. Write a query that produces this result.

---

In [58]:
cur.execute('SELECT username, count(DISTINCT comments) AS num_posts FROM user_comments GROUP BY username ORDER BY num_posts DESC LIMIT 10')

In [59]:
top_posters = cur.fetchall()

In [60]:
top_posters

[('tptacek', 6486),
 ('jacquesm', 4400),
 ('eru', 4207),
 ('pjmlp', 3400),
 ('pg', 2803),
 ('wmf', 2773),
 ('jrockway', 2617),
 ('Tichy', 2588),
 ('gaius', 2569),
 ('icebraining', 2361)]

---

**Question 4**

> There’s another table in your database called users that has the following columns:
 - username
 - name
 - is_vip
 - joined_datetime
>
> Write a query that updates the users table so that only the top-10 posters have a value for is_vip.

---

In [72]:
cur.execute("""SELECT username, numposts, joined FROM users GROUP BY username ORDER BY numposts DESC LIMIT 10 """)

In [73]:
test1 = cur.fetchall()

In [74]:
test1

[('tptacek', 6486, datetime.datetime(2007, 12, 10, 1, 4, 19)),
 ('jacquesm', 4400, datetime.datetime(2009, 1, 30, 22, 30, 52)),
 ('eru', 4207, datetime.datetime(2007, 8, 16, 16, 3, 6)),
 ('pjmlp', 3400, datetime.datetime(2011, 10, 9, 12, 33, 54)),
 ('pg', 2803, datetime.datetime(2006, 10, 9, 19, 52, 45)),
 ('wmf', 2773, datetime.datetime(2007, 8, 21, 20, 52, 39)),
 ('jrockway', 2617, datetime.datetime(2007, 5, 13, 7, 26, 39)),
 ('Tichy', 2588, datetime.datetime(2007, 3, 31, 22, 12, 20)),
 ('gaius', 2569, datetime.datetime(2008, 5, 29, 17, 18, 15)),
 ('icebraining', 2361, datetime.datetime(2011, 6, 20, 10, 26, 6))]

In [None]:
cur.execute("""UPDATE users SET is_vip = TRUE WHERE (SELECT username FROM users GROUP BY username ORDER BY numposts DESC LIMIT 10 )""")

In [None]:
cur.execute("""INSERT INTO users SELECT p.username, p.numposts, j.joined_date FROM posts p INNER JOIN joinedDate j ON p.username = j.username ORDER BY p.numposts DESC""")

In [None]:
test2 = cur.fetchall()

In [None]:
test2

In [None]:
cur.execute("""SET users.is_vip = True WHERE username IN(
        SELECT username, count(DISTINCT comments) AS num_posts FROM user_comments GROUP BY username ORDER BY num_posts DESC LIMIT 10
        )""")

In [None]:
cur.execute("""
UPDATE users
SET is_vip = 1
WHERE username (
        SELECT username, count(DISTINCT comments) AS num_posts FROM user_comments GROUP BY username ORDER BY num_posts DESC LIMIT 10
        )
        """)

In [None]:
cur.execute('Update users Set is_vip = 1 Where username in (SELECT username, count(DISTINCT comments) as num_posts FROM user_comments GROUP BY username ORDER BY num_posts DESC LIMIT 10)')

In [None]:
cur.execute('SELECT u.username, c.pasted_datetime FROM users u INNER JOIN user_comments c ON u.username = c.username GROUP BY u.username, c.pasted_datetime')

---

- **Question 5**

> Using both the users and user_comments table, write a query to calculate what percentage of comments were made in the first 30  > days of the users account.
---

In [None]:
# # SQL
# CREATE #temp_first_month_posts (
#     username VARCHAR(30)
#     ,num_posts INT
#     );

# INSERT INTO #temp_first_month_posts (
#     SELECT joined_date, posted_datetime FROM users, user_comments
#     ); 

#     # SQL
# CREATE #temp_total_posts (
#     username VARCHAR(30)
#     ,num_posts INT
#     );

# INSERT INTO #temp_total_posts (
#     SELECT username, count(DISTINCT comments) as num_posts FROM user_comments GROUP BY username ORDER BY num_posts 
#     ); 

In [63]:
# num_unique_users
# 179400