In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
from scipy.sparse import hstack

# save models to pickle
import pickle

In [2]:
filename = 'twitter_data/benghazi2016'

In [3]:
def convert_to_df(filename):
    df = pd.read_csv(filename, encoding = "ISO-8859-1",
                     names = ['date', 'fullname', 'id', 'likes', 'replies', 
                                'retweets', 'text', 'url', 'user', 'month'])
    df = df.drop_duplicates()
    return df

In [4]:
ben2016 = convert_to_df(filename)

In [5]:
len(ben2016)

7053

In [6]:
for date in ben2016.sort_values('date').date.unique():
    print(date)

2016-01-01 23:53:29
2016-01-01 23:53:40
2016-01-01 23:53:51
2016-01-01 23:54:05
2016-01-01 23:54:07
2016-01-01 23:54:16
2016-01-01 23:54:20
2016-01-01 23:55:48
2016-01-01 23:56:08
2016-01-01 23:56:31
2016-01-01 23:57:33
2016-01-01 23:57:59
2016-01-01 23:58:32
2016-01-02 23:48:47
2016-01-02 23:49:10
2016-01-02 23:49:33
2016-01-02 23:49:35
2016-01-02 23:49:48
2016-01-02 23:51:08
2016-01-02 23:51:41
2016-01-02 23:53:22
2016-01-02 23:53:37
2016-01-02 23:54:00
2016-01-02 23:54:29
2016-01-02 23:54:45
2016-01-02 23:55:29
2016-01-02 23:56:18
2016-01-02 23:57:27
2016-01-02 23:57:32
2016-01-02 23:57:58
2016-01-02 23:58:44
2016-01-02 23:59:29
2016-01-02 23:59:48
2016-01-03 23:47:43
2016-01-03 23:47:44
2016-01-03 23:47:55
2016-01-03 23:48:05
2016-01-03 23:48:21
2016-01-03 23:48:58
2016-01-03 23:49:27
2016-01-03 23:49:42
2016-01-03 23:49:50
2016-01-03 23:50:20
2016-01-03 23:50:22
2016-01-03 23:50:35
2016-01-03 23:51:27
2016-01-03 23:52:21
2016-01-03 23:53:19
2016-01-03 23:53:29
2016-01-03 23:54:16


2016-02-03 23:49:13
2016-02-03 23:49:15
2016-02-03 23:49:35
2016-02-03 23:52:13
2016-02-03 23:52:17
2016-02-03 23:53:46
2016-02-03 23:55:35
2016-02-03 23:56:46
2016-02-03 23:57:56
2016-02-03 23:59:42
2016-02-04 23:48:52
2016-02-04 23:49:34
2016-02-04 23:50:33
2016-02-04 23:51:26
2016-02-04 23:51:51
2016-02-04 23:52:04
2016-02-04 23:52:07
2016-02-04 23:52:11
2016-02-04 23:52:17
2016-02-04 23:54:43
2016-02-04 23:55:02
2016-02-04 23:55:48
2016-02-04 23:56:25
2016-02-04 23:56:40
2016-02-04 23:58:11
2016-02-04 23:58:39
2016-02-04 23:58:51
2016-02-04 23:58:54
2016-02-04 23:59:36
2016-02-05 23:21:36
2016-02-05 23:54:28
2016-02-05 23:54:44
2016-02-05 23:54:55
2016-02-05 23:55:09
2016-02-05 23:55:12
2016-02-05 23:55:13
2016-02-05 23:55:25
2016-02-05 23:55:29
2016-02-05 23:55:46
2016-02-05 23:56:03
2016-02-05 23:57:07
2016-02-05 23:57:39
2016-02-05 23:58:22
2016-02-05 23:58:26
2016-02-05 23:58:39
2016-02-05 23:59:12
2016-02-05 23:59:31
2016-02-05 23:59:36
2016-02-05 23:59:48
2016-02-06 23:52:53


2016-04-23 23:50:00
2016-04-23 23:50:06
2016-04-23 23:51:37
2016-04-23 23:51:39
2016-04-23 23:53:48
2016-04-23 23:53:56
2016-04-23 23:55:23
2016-04-23 23:55:41
2016-04-23 23:55:53
2016-04-23 23:56:42
2016-04-23 23:57:17
2016-04-23 23:57:34
2016-04-23 23:58:27
2016-04-24 23:44:13
2016-04-24 23:44:28
2016-04-24 23:45:28
2016-04-24 23:46:35
2016-04-24 23:47:15
2016-04-24 23:47:37
2016-04-24 23:47:54
2016-04-24 23:48:55
2016-04-24 23:49:57
2016-04-24 23:50:27
2016-04-24 23:50:36
2016-04-24 23:51:27
2016-04-24 23:51:46
2016-04-24 23:52:46
2016-04-24 23:52:50
2016-04-24 23:54:16
2016-04-24 23:55:48
2016-04-24 23:56:27
2016-04-24 23:57:23
2016-04-24 23:57:24
2016-04-25 23:47:04
2016-04-25 23:47:10
2016-04-25 23:47:59
2016-04-25 23:48:23
2016-04-25 23:49:01
2016-04-25 23:50:43
2016-04-25 23:52:15
2016-04-25 23:52:48
2016-04-25 23:52:56
2016-04-25 23:52:59
2016-04-25 23:53:07
2016-04-25 23:55:16
2016-04-25 23:55:22
2016-04-25 23:56:06
2016-04-25 23:56:13
2016-04-25 23:56:49
2016-04-25 23:57:15


2016-07-13 23:56:44
2016-07-13 23:58:13
2016-07-13 23:58:52
2016-07-14 23:51:42
2016-07-14 23:51:56
2016-07-14 23:52:09
2016-07-14 23:52:13
2016-07-14 23:52:27
2016-07-14 23:52:35
2016-07-14 23:53:40
2016-07-14 23:53:52
2016-07-14 23:54:04
2016-07-14 23:54:30
2016-07-14 23:54:58
2016-07-14 23:56:08
2016-07-14 23:56:25
2016-07-14 23:56:31
2016-07-14 23:57:34
2016-07-14 23:57:39
2016-07-14 23:57:55
2016-07-14 23:59:00
2016-07-14 23:59:09
2016-07-14 23:59:34
2016-07-15 23:32:08
2016-07-15 23:52:57
2016-07-15 23:53:10
2016-07-15 23:53:11
2016-07-15 23:53:15
2016-07-15 23:53:53
2016-07-15 23:54:04
2016-07-15 23:54:17
2016-07-15 23:54:32
2016-07-15 23:54:49
2016-07-15 23:54:57
2016-07-15 23:56:27
2016-07-15 23:56:51
2016-07-15 23:56:54
2016-07-15 23:57:06
2016-07-15 23:57:38
2016-07-15 23:58:55
2016-07-15 23:59:03
2016-07-15 23:59:24
2016-07-15 23:59:48
2016-07-16 11:22:00
2016-07-16 23:20:02
2016-07-16 23:48:10
2016-07-16 23:49:01
2016-07-16 23:49:39
2016-07-16 23:49:40
2016-07-16 23:50:49


2016-09-23 23:54:05
2016-09-23 23:54:23
2016-09-23 23:55:42
2016-09-23 23:56:46
2016-09-23 23:56:56
2016-09-23 23:57:33
2016-09-23 23:57:53
2016-09-23 23:58:49
2016-09-23 23:58:51
2016-09-23 23:58:59
2016-09-23 23:59:13
2016-09-23 23:59:19
2016-09-23 23:59:45
2016-09-23 23:59:48
2016-09-24 20:14:49
2016-09-24 23:04:41
2016-09-24 23:57:45
2016-09-24 23:57:49
2016-09-24 23:58:15
2016-09-24 23:58:19
2016-09-24 23:58:23
2016-09-24 23:58:24
2016-09-24 23:58:44
2016-09-24 23:58:54
2016-09-24 23:59:00
2016-09-24 23:59:02
2016-09-24 23:59:06
2016-09-24 23:59:09
2016-09-24 23:59:11
2016-09-24 23:59:21
2016-09-24 23:59:32
2016-09-24 23:59:36
2016-09-24 23:59:47
2016-09-24 23:59:58
2016-09-25 23:56:09
2016-09-25 23:56:23
2016-09-25 23:56:45
2016-09-25 23:56:55
2016-09-25 23:57:20
2016-09-25 23:57:37
2016-09-25 23:57:52
2016-09-25 23:58:02
2016-09-25 23:58:10
2016-09-25 23:58:22
2016-09-25 23:58:28
2016-09-25 23:58:33
2016-09-25 23:58:54
2016-09-25 23:59:16
2016-09-25 23:59:21
2016-09-25 23:59:24


2016-12-09 23:57:47
2016-12-09 23:58:04
2016-12-09 23:58:11
2016-12-09 23:58:49
2016-12-09 23:59:36
2016-12-09 23:59:41
2016-12-10 15:34:41
2016-12-10 16:03:41
2016-12-10 16:41:10
2016-12-10 23:53:04
2016-12-10 23:53:21
2016-12-10 23:53:35
2016-12-10 23:53:51
2016-12-10 23:54:13
2016-12-10 23:54:16
2016-12-10 23:54:43
2016-12-10 23:54:53
2016-12-10 23:54:57
2016-12-10 23:55:47
2016-12-10 23:56:35
2016-12-10 23:57:41
2016-12-10 23:58:04
2016-12-10 23:58:51
2016-12-10 23:58:59
2016-12-10 23:59:29
2016-12-10 23:59:57
2016-12-11 01:22:14
2016-12-11 01:33:09
2016-12-11 17:56:56
2016-12-11 19:48:28
2016-12-11 21:08:43
2016-12-11 23:48:24
2016-12-11 23:52:42
2016-12-11 23:52:48
2016-12-11 23:52:56
2016-12-11 23:53:09
2016-12-11 23:55:04
2016-12-11 23:55:16
2016-12-11 23:57:22
2016-12-11 23:58:18
2016-12-11 23:59:10
2016-12-11 23:59:23
2016-12-11 23:59:48
2016-12-12 23:57:20
2016-12-12 23:57:27
2016-12-12 23:57:37
2016-12-12 23:57:52
2016-12-12 23:57:58
2016-12-12 23:58:00
2016-12-12 23:58:02
