-
Notifications
You must be signed in to change notification settings - Fork 3
/
EnronSQL.txt
206 lines (132 loc) · 5.8 KB
/
EnronSQL.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
##MySQL Musings and Functions
to do:
- replace nulls in analysis2 with N/A and re-run messageanalysis
- group recipientsmag3 by message id and average the tenure
- join recipientsmag3 + messageanalysis
- group (pool) by date and average the tenure
- export to r for metrics variable creation
——————————
changing reserved name
###ALTER TABLE analysis2
CHANGE `date` newdate varchar(75)
ALTER TABLE analysis2
CHANGE `status` hier varchar(75)
______________________________
obtain table characteristics including data type and primary keys
### describe messageanalysis
______________________________
add a primary key
UPDATE messageanalysis ADD PRIMARY KEY (mid)
UPDATE recipients ADD PRIMARY KEY (mid)
——————————
Stripping out time from date time
###ALTER TABLE analysis2 CHANGE newdate newdate DATE;
________________
Change column data type
###ALTER TABLE messageanalysis MODIFY direct varchar(15)
——————————
Adding title score columns
#### for recipients #### create table recipientsmag3 select mid, rtype, rvalue, title, legend.mag
from recipient stats
left join legend
on status = title
##### for messages ##### create table messageanalysis select mid, sender, newdate, subject, body, hier, classification, legend.mag
from analysis2
left join legend
on hier = title
—————————————
Joining the messages table with the recipient scores
##### UPDATE messageanalysis
INNER JOIN recipients
ON messageanalysis.mid = recipients.mid
SET messageanalysis.rec_mag = recipients.recip_mag
——————————————
Creating the “vector” or direction column determining where the message is going
UPDATE messageanalysis
SET direct = IF((sender_mag - rec_mag) > 0, 'Below', IF((sender_mag - rec_mag) < 0, 'Above', 'Peer')) WHERE (sender_mag - rec_mag) IS NOT NULL
_____________________________
Join for the two major tables
######CREATE TABLE analysis SELECT sender, newdate, subject, body, hier, classification, sender_mag, recip_mag
FROM messageanalysis
INNER JOIN recipients
on messageanalysis.mid = recipients.mid
WHERE newdate BETWEEN '1990-01-01' AND '2010-08-010 23:59:59'
ORDER BY newdate
——————————————————
Select for the two major tables
###SELECT messageanalysis.mid, sender, newdate, subject, body, hier, classification, sender_mag
FROM messageanalysis
INNER JOIN recipients
on messageanalysis.mid = recipients.mid
WHERE newdate BETWEEN '1990-01-01' AND '2010-08-010 23:59:59' AND length(hier)>1
limit 0,1000000
———————————
Select after *message analysis* was complete
###SELECT messageanalysis.mid, sender, newdate, concat(subject), concat(body), hier, classification, direct
FROM messageanalysis
INNER JOIN recipients
on messageanalysis.mid = recipients.mid
WHERE length(direct) >0 ## toggle for broader view including emails sent from the outside AND newdate BETWEEN '1990-01-01' AND '2010-08-010 23:59:59'
GROUP BY newdate, hier, direct
limit 0,1000000
_________________________________
CREATE TABLE analysis2 SELECT mid, sender, date, subject, body, status
from message
left join employeelist
on sender = Email_ID or sender = Email2 or sender = Email3 or sender = Email4
alter table analysis2 add classification varchar(50)
---
+++
@@ -2,0 +2,3 @@
+innodb_log_file_size = 7864320
+innodb_buffer_pool_size = 134217728
+innodb_log_buffer_size = 1572864
update analysis2
SET classification = "External"
Where sender not like "%@enron.com%"
update analysis2
SET classification = "Enron"
Where sender like "%@enron.com%"
update analysis2
set questioncount = (length(body) - length(replace(body,'?',''))) + (length(subject) - length(replace(subject,'?','')))
update analysis2
set questioncounts = (length(subject) - length(replace(subject,'?','')))
update analysis2
set newdate = date(analysis2.date)
update analysis2
set status = 'N/A' where status is null
CREATE TABLE AnalysisSummary2
SELECT mid, newdate, status, classification, questioncount, questioncounts, questioncountb
FROM analysis2
select status,newdate,sum(questioncount),sum(questioncounts),sum(questioncountb) from AnalysisSummary2 where year(newdate)>1990 group by newdate,status
CREATE TABLE SummaryStatusSimple
select status,newdate,sum(questioncount) from AnalysisSummary2 where year(newdate)>1990 group by newdate,status
CREATE TABLE SummaryStatusSimple2
select status,newdate,sum(questioncount) from AnalysisSummary2 where year(newdate)>1990 group by month(newdate),status
CREATE TABLE analysis SELECT mid, sender, date, subject, body, status
from message
left join employeelist
on sender = Email_ID or sender = Email2 or sender = Email3 or sender = Email4
alter table analysis add classification varchar(50)
update analysis
SET classification = "External"
Where sender not like "%@enron.com%"
update analysis
SET classification = "Enron"
Where sender like "%@enron.com%"
update analysis
set questioncount = (length(body) - length(replace(body,'?',''))) + (length(subject) - length(replace(subject,'?','')))
update analysis
set questioncounts = (length(subject) - length(replace(subject,'?','')))
update analysis
set newdate = date(analysis2.date)
update analysis
set status = 'N/A' where status is null
CREATE TABLE AnalysisSummary2
SELECT mid, newdate, status, classification, questioncount, questioncounts, questioncountb
FROM analysis2
select status,newdate,sum(questioncount),sum(questioncounts),sum(questioncountb) from AnalysisSummary2 where year(newdate)>1990 group by newdate,status
CREATE TABLE SummaryStatusSimple
select status,newdate,sum(questioncount) from AnalysisSummary2 where year(newdate)>1990 group by newdate,status
CREATE TABLE SummaryStatusSimple2
select status,newdate,sum(questioncount) from AnalysisSummary2 where year(newdate)>1990 group by month(newdate),status