## Importing Libraries

In [1]:
import re
import time
import numpy as np
import pandas as pd
import requests
import sqlite3 
from sqlite3 import Error
from connect_and_create import connect_and_create_table
from extract_twitter_data import import_data, merge_tweet_data
from progressbar import progressbar

## Creating and Connecting to SQLite Database

In [2]:
sql_create_emotions_table = """ CREATE TABLE IF NOT EXISTS emotions (
                                        id integer PRIMARY KEY,
                                        text text NOT NULL,
                                        emotion text NOT NULL
                                    ); """


In [3]:
 database = r"C:\Users\Melissa\Documents\DS_Bootcamp\Mod_Projects\00-Capstone\Emotion-Detection-on-Twitter-Posts\data\emotion.db"

In [4]:
connect_and_create_table(database,sql_create_emotions_table)

In [5]:
conn = sqlite3.connect(database)
c = conn.cursor()

## Importing TweetID with Emotion Labels

In [6]:
filelist = ['./data/test.txt','./data/dev.txt','./data/train_1.txt','./data/train_2_1.txt','./data/train_2_10.txt','./data/train_2_2.txt','./data/train_2_3.txt','./data/train_2_4.txt','./data/train_2_5.txt','./data/train_2_6.txt','./data/train_2_7.txt','./data/train_2_8.txt','./data/train_2_9.txt']
df = import_data(filelist)

In [7]:
df.head()

Unnamed: 0,id,emotion
0,144514632252530688,anger
1,147987957679263745,fear
2,143869765302226944,joy
3,148904944022470657,joy
4,145344771152543744,fear


In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2488982 entries, 0 to 2488981
Data columns (total 2 columns):
id         int64
emotion    object
dtypes: int64(1), object(1)
memory usage: 38.0+ MB


In [9]:
df.emotion.value_counts()

joy             706182
sadness         616471
anger           574170
love            301759
fear            135154
thankfulness    131340
surprise         23906
Name: emotion, dtype: int64

## Importing Text Data

In [13]:
filelist = ['./data/anger.csv','./data/fear.csv','./data/joy.csv','./data/sadness.csv', './data/surprise.csv','./data/thankfulness.csv','./data/love.csv']

In [15]:
twitter_data = merge_tweet_data(filelist)

In [16]:
twitter_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1339761 entries, 0 to 1339760
Data columns (total 2 columns):
id      1339761 non-null int64
text    1339761 non-null object
dtypes: int64(1), object(1)
memory usage: 20.4+ MB


## Merging with Emotion Labels

In [13]:
# Merging Twitter Text with Emotion Labels
data = pd.merge(twitter_data,
         df,
         on = 'id',
         how = 'inner',
        validate = 'one_to_one')

In [14]:
data.head()

Unnamed: 0,id,text,emotion
0,146975687788601344,Do not like this new twitter app #irritating,anger
1,148156821444636672,@timlovejoy what's going on with us blues at t...,anger
2,148543314604462080,People complain too much. #annoying,anger
3,134687650601893889,The awkward moment when someone calls you out ...,anger
4,144248292254031872,Chris Paul leaving the Hornets #disgusted,anger


In [15]:
data.shape

(1339794, 3)

In [16]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1339794 entries, 0 to 1339793
Data columns (total 3 columns):
id         1339794 non-null int64
text       1339794 non-null object
emotion    1339794 non-null object
dtypes: int64(1), object(2)
memory usage: 40.9+ MB


In [17]:
data.emotion.value_counts()

joy             386601
sadness         330432
anger           291480
love            167715
thankfulness     78228
fear             72094
surprise         13244
Name: emotion, dtype: int64

## Loading Emotion Data to SQLite Table

In [18]:
data.to_sql("emotions", conn, if_exists="replace" , index = False)
conn.commit()

In [6]:
pd.read_sql_query("select count(*) from emotions;", conn)

Unnamed: 0,count(*)
0,1339794


In [7]:
pd.read_sql_query("SELECT * FROM emotions;", conn).head()

Unnamed: 0,id,text,emotion
0,1865547808,"Be gentle with yourself for we all fall down, ...",love
1,1898216785,Weeding out negative thoughts allows inspirati...,love
2,1943079793,"There is a great divine plan to everything, ev...",love
3,7756449559,i was asked a question.: What am i more afraid...,love
4,14400628407,A #Libra doesn't play with #love,love


In [21]:
conn.close()