-
Notifications
You must be signed in to change notification settings - Fork 0
/
LDAinitial.py
90 lines (59 loc) · 2.66 KB
/
LDAinitial.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# -*- coding: utf-8 -*-
import sys
import os
import re
os.environ['DJANGO_SETTINGS_MODULE'] = 'mysite.settings'
import django
#django.setup()
import LDAmodel # import lda_parts, News, get_words
import gensim
from feedreader.models import Entry
def get_data():
from feedreader.models import Entry
Entrys = Entry.objects.all()
n = len(Entrys)
PKs = [Entrys[i].pk for i in range(n)]
#Links = [Entrys[i].link for i in range(n)]
p = re.compile(r"<[^>]*?>")
Titles = [p.sub("", Entrys[i].title) for i in range(n)]
Descriptions = [p.sub("", Entrys[i].description) for i in range(n)]
return [PKs, Titles, Descriptions]
if __name__ == '__main__':
argvs = sys.argv
[PKs, Titles, Descriptions] = get_data()
filters = True
show = False
num_topics = 50
no_below = 5
no_above = 0.4
if len(argvs) == 2 and sys.argv[1] == 'initial':
title_lda = LDAmodel.lda_parts(Titles)
title_lda.dictionary_corpus(filters = filters,show = show,save = ("./model/titles.dictionary"), no_below = no_below, no_above = no_above)
print "dictionary of title made"
title_lda.LDA_model(num_topics = num_topics,save = ("./model/titles.model"),show = show,set_matrix = False)
print "LDA of title made"
description_lda = LDAmodel.lda_parts(Descriptions)
description_lda.dictionary_corpus(filters = filters,show = show, save = ("./model/descriptions.dictionary"), no_below = no_below, no_above = no_above)
print "dictionary of description made"
description_lda.LDA_model(num_topics = num_topics,save = ("./model/descriptions.model"),show = show,set_matrix = False)
print "LDA of description made"
else:
NewsEntry = LDAmodel.News(PKs, Titles ,Descriptions, filters = True,show=False, no_below=no_below, no_above=no_above)
NewsEntry.calculate_relevent()
entrys = Entry.objects.all()
for entry in entrys:
Revelant = NewsEntry.RelevantList[entry.pk]
if len(Revelant) == 1:
entry.FisrtRevelant = Entry.objects.get(pk=Revelant[0])
print 1
elif len(Revelant) == 2:
entry.FisrtRevelant = Entry.objects.get(pk=Revelant[0])
entry.SecondRevelant = Entry.objects.get(pk=Revelant[1])
print 2
elif len(Revelant) == 3:
entry.FisrtRevelant = Entry.objects.get(pk=Revelant[0])
entry.SecondRevelant = Entry.objects.get(pk=Revelant[1])
entry.ThirdRevelant = Entry.objects.get(pk=Revelant[2])
print 3
entry.save()
print "save ok"