## Reading and creating new table in postgres

In [1]:
#!pip install sqlalchemy
from sqlalchemy import *
from sqlalchemy.engine.url import URL
import pandas as pd

### Adding the parameters to connect to database, these are similar to previous notebook.

In [2]:
DATABASE = {
    'drivername': 'postgres',
    'host': 'localhost',
    'port': '5432',
    'username': 'twitter',
    'password': 'twitter',
    'database': 'twitter'
}

#### Creating a database connection

In [3]:
def db_connect():
    """
    Returns sqlalchemy engine instance
    """
    return create_engine(URL(**DATABASE))
engine = db_connect()

#### Get the list of tables that are present in database

In [4]:
print(engine.table_names())

['users_with_tweets_more_than_20', 'twitter_users', 'tweets_coronavirus']


### Reading data from twitter_users to a dataframe.

In [5]:
twitter_users = pd.read_sql_query('select * from twitter_users', con=engine)

In [6]:
twitter_users.head(10)

Unnamed: 0,user_id,user_name
0,885516405843533825,bsbigsexy77
1,3690194362,BioTrendy
2,827854565605191681,TheresaBravo8
3,3932603189,tati_vict
4,1227119032928616448,RaghavN92483800
5,1089206520531877888,JoseReq13
6,62628195,politicaltragic
7,3315442865,Chedahkid
8,35698331,ShawnFnLucas
9,1125029594577371138,ChristianneQ


### Reading data from tweets_coronavirus to a dataframe.

In [7]:
tweets_coronavirus = pd.read_sql_query('select * from tweets_coronavirus', con=engine)

In [8]:
tweets_coronavirus.head(10)

Unnamed: 0,id,created_at,tweet,user_id,retweetcount
0,1,2020-03-31 20:29:48,RT @RealCandaceO: UPDATE on Italy’s #coronavir...,885516405843533825,0
1,2,2020-03-31 20:29:48,Funny videos 🔴 Coronavirus (Covid-19) 🔥 BioTre...,3690194362,0
2,3,2020-03-31 20:29:48,RT @SenWarren: The coronavirus bill didn’t do ...,827854565605191681,0
3,4,2020-03-31 20:29:48,RT @B52Malmet: Bless the nursing home workers....,3932603189,0
4,5,2020-03-31 20:29:48,RT @TVMohandasPai: Over 83% trust Modi govt wi...,1227119032928616448,0
5,6,2020-03-31 20:29:48,RT @davidsirota: It's almost as if it is a set...,1089206520531877888,0
6,7,2020-03-31 20:29:48,RT @chrismurphys: A dozen jockeys and five or ...,62628195,0
7,8,2020-03-31 20:29:48,RT @KammyTaughtYou: Y’all gotta stop telling p...,3315442865,0
8,9,2020-03-31 20:29:48,RT @PoliticsWolf: The U.S. Postal Service warn...,35698331,0
9,10,2020-03-31 20:29:48,@LtGovTX Hold on...last week it was okay for s...,1125029594577371138,0


### Reading data for all users where they have more than 20 tweets to a dataframe.

In [9]:
users_with_tweets_more_than_20 = pd.read_sql_query('select * from tweets_coronavirus a, twitter_users b \
where a.user_id = b.user_id and a.user_id in (select user_id from ( \
select user_id , count(*) counted from tweets_coronavirus \
group by user_id \
having count(*) > 20) as a)', con=engine)

In [10]:
users_with_tweets_more_than_20.head(10)

Unnamed: 0,id,created_at,tweet,user_id,retweetcount,user_id.1,user_name
0,135,2020-03-31 20:29:50,@GrimKim @thebafflermag Also need this#Protect...,1236581105748361216,0,1236581105748361216,dupont16141315
1,435,2020-03-31 20:29:57,RT @KatyTurNBC: He’s worked 18 shifts in the l...,1238467680773894146,0,1238467680773894146,CoronaUpdateBot
2,800,2020-03-31 20:30:04,Social distancing helps slow California corona...,1025186618108530689,0,1025186618108530689,voiceofthehwy
3,818,2020-03-31 20:30:04,Coronavirus in Georgia: Confirmed cases contin...,1025186618108530689,0,1025186618108530689,voiceofthehwy
4,855,2020-03-31 20:30:05,Ice-rink to be turned into emergency mortuary ...,1179387847310266370,0,1179387847310266370,newworldsurvive
5,911,2020-03-31 20:30:06,@chicagosmayor EVERY AMERICAN QUALIFIES REGARD...,490949027,0,490949027,JONIMITCHELL4
6,984,2020-03-31 20:30:08,"'Healthy' boy, 13, is youngest UK coronavirus ...",1179387847310266370,0,1179387847310266370,newworldsurvive
7,1094,2020-03-31 20:30:10,"RT @GOP: Jan. 15, 2020: On the day the CDC rep...",1238467680773894146,0,1238467680773894146,CoronaUpdateBot
8,1186,2020-03-31 20:30:12,RT @newworldsurvive: 'It doesn't matter what a...,1229091405152256001,0,1229091405152256001,BotRedian
9,1266,2020-03-31 20:30:13,"RT @glennkirschner2: I, for one, can’t wait to...",1078741623197048832,0,1078741623197048832,Leeblev50045868


### Creating a new table with this dataframe and write this data into a table.

In [11]:
table_name = 'users_with_tweets_more_than_20'
users_with_tweets_more_than_20.to_sql(table_name, con=engine,  if_exists='replace')

#### checking the tables to make sure the newly created table is present.

In [12]:
print(engine.table_names())

['users_with_tweets_more_than_20', 'twitter_users', 'tweets_coronavirus']


#### There are many attributes in a tweet which are not added, you can experiment with adding geolocation and many more interesting attributes to analyze data.

### Happy Coding