In [1]:
import numpy as np
import tensorflow.contrib.keras as kr
import fasttext
import tensorflow as tf
from sklearn import metrics

Instructions for updating:
Use the retry module or similar alternatives.


In [22]:
 def loadWordEmbedding(model):
        '''
            read pretrained 
        '''
    model = fasttext.load_model(model)
    vocab = ['unk']
    embd = [[0] * 100]
    for word in model.words:
        vocab.append(word)
        embd.append(model[word])
    print("loaded word embedding")
    return vocab, embd

 **tensorflow load the trained word embedding (fasttext)**<br >
 return the vocab and embedding<br>
 the embedding_dim is 100<br>
 the vocab: 226403
 - 'unk',
 - '处会',
 - '倍觉',
 - '螳螂捕蝉',
 - '烟标',
 - '见证人',
 - '玩到',
 - '刑满释放',
 
the embedding:
- [[ 0.        ,  0.        ,  0.        , ...,  0.        ,
     0.        ,  0.        ],
- [ 0.20675033,  0.13941619, -0.02178967, ...,  0.08394553,
    -0.57525939, -0.17493315],
- [ 0.02645796, -0.16807704, -0.33025488, ...,  0.18632847,
    0.20377228,  0.1036511 ],

note: add the unk is that the word not in trained word embedding. and the vector is 0



**train data format**
- label /t document <br>
```
IT      互联网巨头雅虎公司26日宣布,公司正对高级管理层实施“大手术”,同时组建3个业务部门形成全新的公司组织构架。雅虎总裁苏珊·德克说,微软公司收购雅虎的谈判两周前结束后,雅虎终于可以专注于已耽搁数月的重组计划,为公司向赚钱机器转型迈出一大步。全新构架雅虎当天在声明中说,公司正在组建3个新业务部门,旨在把产品研发部门设为核心业务部门,同时加强产品研发部门与技术...
```

**train data cut format**
- words[0] space words[1]..... space words[end] <br>
```
来源 : 新华网 加拿大 多伦多市 一名 建筑师 推出 的 摩天 农场 概念 引起 世人 关注 。 有别于 传统 农场 , 摩天 农场 向 空中 延伸 , 形似 摩天大楼 。 如果 摩天 农场 落成 , 农作物 产出 每年 可 满足 约 3.5 万名 居民 的 需求 。 目前 , 世界 上 已 出现 数个 类似 的 ... 
```

- label number: 13
- detail of train_data:
```
 {'IT': 7021,
  '体育': 58025,
  '健康': 3486,
  '军事': 1906,
  '奥运': 18205,
  '女性': 11295,
  '娱乐': 22083,
  '房产': 37069,
  '教育': 6589,
  '文化': 2106,
  '旅游': 5945,
  '汽车': 4180,
  '财经': 40000}
```

note: **compute the average value of words length (seq_length)**

**train data information**

the words average is 317 for one news

the longest words of news are 1717

In [2]:
class RnnConfig(object):
    def __init__(self, num_class, dim, vocab_size, seq_length=600, num_lay=2, hidden_dim=128, rnn='lstm', drop_keep_prob=0.5,
                 learning_rate = 1e-3, batch_size=128, num_epochs = 50, print_per_batch=10, save_per_batch=10, embedding='one hot'):
        self.embedding_dim = dim
        self.seq_length = seq_length
        self.num_classes = num_class
        self.vocab_size = vocab_size

        self.num_layer = num_lay
        self.hidden_dim = hidden_dim
        self.rnn = rnn

        self.dropout_keep_prob = drop_keep_prob
        self.learning_rate = learning_rate

        self.batch_size = batch_size
        self.num_epochs = num_epochs

        self.print_per_batch = print_per_batch
        self.save_per_batch = save_per_batch
        
        self.embedding = embedding

In [3]:
class TextRnn(object):
    def __init__(self, config):
        self.config = config
        
        self.input_x = tf.placeholder(tf.int32, [None, self.config.seq_length], name='input_x')
        self.input_y = tf.placeholder(tf.float32, [None, self.config.num_classes], name='input_y')
        self.keep_prob = tf.placeholder(tf.float32, name='keep_prob')
        
        self.rnn()
    
    def rnn(self):
        def lstm_cell():
            return tf.contrib.rnn.BasicLSTMCell(self.config.hidden_dim, state_is_tuple=True)
        
        def gru_cell():
            return tf.contrib.rnn.GRUCell(self.config.hidden_dim)
        
        def dropout():
            if self.config.rnn == 'lstm':
                cell = lstm_cell()
            else:
                cell = gru_cell()
            return tf.contrib.rnn.DropoutWrapper(cell, output_keep_prob=self.keep_prob)
        
        with tf.device('/gpu:0'):
            if self.config.embedding == 'embedding':
                W = tf.Variable(tf.constant(0.0, shape=[self.config.vocab_size, self.config.embedding_dim]), trainable=False, name='W')
                self.embedding_placeholder = tf.placeholder(tf.float32, [self.config.vocab_size, self.config.embedding_dim])
                self.embedding_init = W.assign(self.embedding_placeholder)
                embedding_inputs = tf.nn.embedding_lookup(W, self.input_x)
                print('load the pretrained word vector')
            else:
                embedding = tf.get_variable('embedding', [self.config.vocab_size, self.config.embedding_dim])
                embedding_inputs = tf.nn.embedding_lookup(embedding, self.input_x)
                print('load the one hot')

        with tf.name_scope("rnn"):
            cells = [dropout() for _ in range(self.config.num_layer)]
            rnn_cell = tf.contrib.rnn.MultiRNNCell(cells, state_is_tuple=True)
            _outputs, _ = tf.nn.dynamic_rnn(cell=rnn_cell, inputs=embedding_inputs, dtype=tf.float32)
            last = _outputs[:, -1, :]
            
        with tf.name_scope("score"):
            fc = tf.layers.dense(last, self.config.hidden_dim, name='fc1')
            fc = tf.contrib.layers.dropout(fc, self.keep_prob)
            fc = tf.nn.relu(fc)
            
            self.logits = tf.layers.dense(fc, self.config.num_classes, name='fc2')
            self.y_pred_cls = tf.argmax(tf.nn.softmax(self.logits), 1)
            
        with tf.name_scope("optimize"):
            cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=self.input_y)
            self.loss = tf.reduce_mean(cross_entropy)
            self.optim = tf.train.AdamOptimizer(learning_rate=self.config.learning_rate).minimize(self.loss)
            
        with tf.name_scope("accuracy"):
            correnct_pred = tf.equal(tf.argmax(self.input_y, 1), self.y_pred_cls)
            self.acc = tf.reduce_mean(tf.cast(correnct_pred, tf.float32))
            

In [10]:
def read_category(categories):
    """
    categories transform to value 
    """
    cat_to_id = dict(zip(categories, range(len(categories))))
    return categories, cat_to_id

In [21]:
def read_file(filename):
    with open(filename, 'r', encoding='utf8') as f:
        for line in f:
            yield (line.split('\t')[1], line.split('\t')[0])

In [19]:
def process_file(content_dir, word_to_id, cat_to_id, max_length=1000, embedding='one hot'):
    '''
    make the train data
    '''
    contents = read_file(content_dir)
    raw_data = []
    labels = []
    for content in contents:
        raw_data.append(content[0])
        labels.append(content[1])
    
    data_id, label_id = [], []
    if embedding == 'one hot':
        for i in range(len(raw_data)):
            data_id.append([word_to_id[x] for x in raw_data[i] if x in word_to_id])
            label_id.append(cat_to_id[labels[i]])
            # print(data_id[len(data_id) - 1])
            # print(label_id[len(data_id) - 1])
        print('data processed!')
    else:
        data_id = [[word_to_id[x] if x in word_to_id else word_to_id['unk'] for x in raw.split(' ')] for raw in raw_data]
        print('wordvector successfully')
        label_id = [cat_to_id[label] for label in labels]
        print('generated label ont hot')
    
    x_pad = kr.preprocessing.sequence.pad_sequences(data_id, max_length, padding='post', truncating='post')
    y_pad = kr.utils.to_categorical(label_id, num_classes=len(cat_to_id))
    
    return x_pad, y_pad

In [4]:
from datetime import timedelta
def get_time_dif(start_time):
    end_time = time.time()
    time_dif = end_time - start_time
    return timedelta(seconds=int(round(time_dif)))

In [33]:
def batch_iter(x, y, batch_size=64):
    data_len = len(x)
    num_batch = int((data_len - 1) / batch_size) + 1
    
    indices = np.random.permutation(np.arange(data_len))
    x_shuffle = x[indices]
    y_shuffle = y[indices]
    
    for i in range(num_batch):
        start_id = i * batch_size
        end_id = min((i + 1) * batch_size, data_len)
        yield x_shuffle[start_id:end_id], y_shuffle[start_id:end_id]

In [5]:
def feed_data(x_batch, y_batch, keep_prob):
    feed_dict = {
        model.input_x: x_batch,
        model.input_y: y_batch,
        model.keep_prob: keep_prob
    }
    
    return feed_dict

In [6]:
def evaluate(sess, x_, y_):
    ## todo
    data_len = len(x_)
    batch_eval = batch_iter(x_, y_, 128)
    total_loss = 0.0
    total_acc = 0.0
    for x_batch, y_batch in batch_eval:
        batch_len = len(x_batch)
        feed_dict = feed_data(x_batch, y_batch, 1.0)
        loss, acc = sess.run([model.loss, model.acc], feed_dict=feed_dict)
        total_loss += loss * batch_len
        total_acc += acc * batch_len
    
    return total_loss / data_len, total_acc / data_len
    

In [7]:
import os
import time

save_dir = './cnews/checkpoint/textRnn'
train_dir = './cnews/cnews.train.txt'
val_dir = './cnews/cnews.val.txt'
save_path = './cnews//lstmModel/lstm.mod'


def train(model, config):
    print("Configuring TensorBoard and Saver...")
    tensorboard_dir = './cnews/TextRnn'
    
    tf.summary.scalar('loss', model.loss)
    tf.summary.scalar('accuracy', model.acc)
    merged_summary = tf.summary.merge_all()
    writer = tf.summary.FileWriter(tensorboard_dir)
    
    saver = tf.train.Saver()
    
    print('Loading training and validation data...')
    start_time = time.time()
    x_train, y_train = process_file(train_dir, word_to_id, cat_to_id, config.seq_length)
    # x_train = x_train[:100]
    # y_train = y_train[:100]
    x_val, y_val = process_file(val_dir, word_to_id, cat_to_id, config.seq_length)
    # x_val = x_val[:50]
    # y_val = y_val[:50]
    time_dif = get_time_dif(start_time)
    # print(x_train[0], y_train[0])
    # print(x_val[0], y_val[0])
    # print('time usage:', time_dif)
    
    gpu_config = tf.ConfigProto(allow_soft_placement=True)
    session = tf.Session(config=gpu_config)
    session.run(tf.global_variables_initializer())
    writer.add_graph(session.graph)
    if config.embedding == 'embedding':
        session.run(model.embedding_init, feed_dict={model.embedding_placeholder: embedding})
    
    print('Training and evaluating...')
    start_time = time.time()
    total_batch = 0
    best_acc_val = 0.0
    last_improved = 0
    require_improvement = 10
    
    flag = False
    for epoch in range(config.num_epochs):
        print('Epoch:', epoch + 1)
        batch_train = batch_iter(x_train, y_train, config.batch_size)
        for x_batch, y_batch in batch_train:
            feed_dict = feed_data(x_batch, y_batch, config.dropout_keep_prob)
            # print(feed_dict)
            
            if  total_batch % config.save_per_batch == 0:
                s = session.run(merged_summary, feed_dict=feed_dict)
                writer.add_summary(s, total_batch)
                
            if total_batch % config.print_per_batch == 0:
                feed_dict[model.keep_prob] = 1.0
                loss_train, acc_train = session.run([model.loss, model.acc], feed_dict=feed_dict)
                loss_val, acc_val = evaluate(session, x_val, y_val) #todo evaluate
                
                if acc_val > best_acc_val:
                    best_acc_val = acc_val
                    last_improved = total_batch
                    saver.save(sess=session, save_path=save_path)
                    improved_str = '*'
                else:
                    improved_str = ''
                
                time_dif = get_time_dif(start_time)
                msg = 'Iter: {0:>6}, Train Loss: {1:>6.2}, Train Acc: {2:>7.2%}, ' + ' val Lpss: {3:>6.2}, Val Acc: {4:>7.2%}, Time: {5} {6}'
                print(msg.format(total_batch, loss_train, acc_train, loss_val, acc_val, time_dif, improved_str))
                
            session.run(model.optim, feed_dict=feed_dict)
            total_batch += 1
            
            # if total_batch - last_improved > require_improvement:
              #  print("No optimization for a long time, auto-stopping...")
              #  flag = True
              #  break
        # if flag:
            # break    
    

In [None]:
def to_train_test(data, file1, file2, size, categories):
    train = []
    test = []
    for l in categories:
        tmp_data = [(x, y) for x, y in data if x == l]
        train_label, test_label = train_test_split(tmp_data, test_size=size)
        train += train_label
        test += test_label

    write_to_file(file1, train)
    write_to_file(file2, test)

In [None]:
def write_to_file(file, data):
    with open(file, 'w', encoding='utf8') as f:
        for x, y in data:
            f.write(x + '\t' + y)
            f.flush()

        f.close()

In [17]:
class fasttextConfig(object):
    def __init__(self, dim=100, ws=5, min_count=3):
        self.dim = dim
        self.ws = ws
        self.min_count = min_count

In [52]:
def generate_vocab_dict(vocab, vocab_dir):
    word_to_id = dict(zip(vocab, range(len(vocab))))
    with open(vocab_dir, 'w', encoding='utf8') as f:
        for key in word_to_id.keys():
            f.write(key + '\t' + str(word_to_id[key]) + '\n')
            f.flush()
    return word_to_id

In [8]:
import jieba
def cut_sentence(doc, file):
    data = read_file(doc)
    f = open(file, 'w', encoding='utf8')
    for x, y in data:
        words = jieba.cut(x)
        f.write(y + '\t' + ' '.join(words))
        f.flush()

In [13]:
def read_vocab(vocab_dir):
    with open(vocab_dir, 'r', encoding='utf8') as f:
        words = [_.strip() for _ in f.readlines()]
    word_to_id = dict(zip(words, range(len(words))))
    return words, word_to_id

In [20]:
def fasttext_model(doc, model, ft_config):
    with open('./cnews/without.dat', 'w', encoding='utf8') as f:
        data = read_file(doc)
        for d, _ in data:
            f.write(d)
            f.flush()        
    fasttext.skipgram('./cnews/without.dat', model,dim=ft_config.dim, ws=ft_config.ws, min_count=ft_config.min_count)

In [69]:
def test(test_dir, config, model_dir):
    print("Loading test data...")
    start_time = time.time()
    x_test, y_test = process_file(test_dir, word_to_id, cat_to_id, config.seq_length)
    
    gpu_config = tf.ConfigProto(allow_soft_placement=True)
    sess = tf.Session(config=gpu_config)
    sess.run(tf.global_variables_initializer())
    saver = tf.train.Saver()
    saver.restore(sess=sess, save_path=model_dir)
    
    print('Testing...')
    loss_test, acc_test = evaluate(sess, x_test, y_test)
    msg = 'Test Loss:{0:>6.2}, Test Acc:{1:>7.2%}'
    print(msg.format(loss_test, acc_test))

    batch_size = 128
    data_len = len(x_test)
    num_batch = int((data_len - 1) / batch_size) + 1
    y_test_cls = np.argmax(y_test, 1)
    y_pred_cls = np.zeros(shape=len(x_test), dtype=np.int32)
    for i in range(num_batch):
        start_id = i * batch_size
        end_id = min((i + 1) * batch_size, data_len)
        feed_dict ={
            model.input_x: x_test[start_id : end_id],
            model.keep_prob: 1.0
        }

        y_pred_cls[start_id:end_id] = sess.run(model.y_pred_cls, feed_dict=feed_dict)
        
    print('Precision, Recall and F1-Socre...')
    print(metrics.classification_report(y_test_cls, y_pred_cls, target_names=categories))
        
    print('Confusion Matrix...')
    cm = metrics.confusion_matrix(y_test_cls, y_pred_cls)
    print(cm)
        
    time_dif = get_time_dif(start_time)
    print("Time usage:", time_dif)
        

In [None]:
vocab, embd = loadWordEmbedding('model/fasttext.model.bin')
vocab_size = len(vocab)
embedding_dim = len(embd[0])
embedding = np.asarray(embd)

In [None]:
word_to_id = dict(zip(vocab, range(vocab_size)))

- **traisform to id**

    **the y_label is like this:**
        ```
         {'IT': 0,
          '体育': 1,
          '健康': 2,
          '军事': 3,
          '奥运': 4,
          '女性': 5,
          '娱乐': 6,
          '房产': 7,
          '教育': 8,
          '文化': 9,
          '旅游': 10,
          '汽车': 11,
          '财经': 12})
        ```

    **the x_data is like this:**
        ```
        {'盖房子': 1,
         '而书': 3,
         '其他': 4,
         '平房': 5,
         '要放': 6,
         '标本兼治': 7,
         '失魂': 8,
         '姚晓明': 9,
         '银票': 10,
         '斑秃': 11,
         '份量': 12,
         '四证': 13,
         '小剂量': 15,
        ```
     
note: plus the 'unk' in the vocab

**make the train data to index and pad in same length**
- if the word vector size is smaller than specified length, then add the 0 (mode=post)
- else delete the more word (mode=post)

```
([  5607, 215414,  15425,  19772,  52617, 111590,  19585,  40881,
        86058, 121047, 147869, 204886, 216120, 154775, 207544, 218800,
        81033, 130909, 167262,  30992, 139440,  19772,  80670, 193948,
       135063,  31337, 213472, 215414, 198527,  40727, 178484,  38815,
       187325, 177281,  83589, 178484,  38815,  39448, 210307, 180697,
       177281, 216383, 116715,  42935,  85169, 207544, 218800,  44149,
       197243, 177281, 130909, 207544, 218800, 154775, 160307, 111590,
        81033,  43340,  70777,  12807,  15110, 177281, 123144,  63772,
        81033, 178484, 174865,  70777, 137332, 160307, 111590, 213472,
        71244, 177281,  19772,  52617, 100735,  81033,  56902, 211348,
        ...,
          0,      0,      0,      0,      0,      0,      0,      0,
            0,      0,      0,      0,      0,      0,      0,      0], dtype=int32
            
```

**make the category to index and transform to one-hot vector**

```
array([ 1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.])
```

In [12]:
categories = ['体育' ,'财经', '房产', '家居', '教育', '科技', '时尚', '时政', '游戏', '娱乐']
_, cat_to_id  = read_category(categories)

In [9]:
cut_sentence('./cnews/cnews.train.txt', './cnews/cnews_train_cut.dat')

Building prefix dict from the default dictionary ...
DEBUG:jieba:Building prefix dict from the default dictionary ...
Loading model from cache /tmp/jieba.cache
DEBUG:jieba:Loading model from cache /tmp/jieba.cache
Loading model cost 0.482 seconds.
DEBUG:jieba:Loading model cost 0.482 seconds.
Prefix dict has been built succesfully.
DEBUG:jieba:Prefix dict has been built succesfully.


In [54]:
cut_sentence('./cnews/cnews.val.txt', './cnews/cnews_val_cut.dat')

In [21]:
ft_config = fasttextConfig()
fasttext_model('./cnews/cnews_train_cut.dat', './cnews/model/fasttext.mod', ft_config)

In [23]:
vocab, embd = loadWordEmbedding('./cnews/model/fasttext.mod.bin')

loaded word2vec


In [30]:
embedding = np.asarray(embd)

In [14]:
words, word_to_id = read_vocab('./cnews/cnews.vocab.txt')

In [53]:
word_to_id = generate_vocab_dict(vocab, './cnews/fasttext.vocab')

In [15]:
rnn_config = RnnConfig(len(categories), 64, len(words), rnn='gru',drop_keep_prob=0.8)

In [16]:
tf.reset_default_graph()
model = TextRnn(rnn_config)
# train(model, rnn_config)

load the one hot
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See tf.nn.softmax_cross_entropy_with_logits_v2.



In [70]:
tf.reset_default_graph()
rnn_config = RnnConfig(len(categories), 64, len(words), rnn='gru',drop_keep_prob=0.8)
model = TextRnn(rnn_config)
test('./cnews/cnews.test.txt', rnn_config, './cnews/lstmModel/lstm.mod')

load the one hot
Loading test data...
data processed!
INFO:tensorflow:Restoring parameters from ./cnews/lstmModel/lstm.mod


INFO:tensorflow:Restoring parameters from ./cnews/lstmModel/lstm.mod


Testing...
Test Loss:  0.22, Test Acc: 94.36%
Precision, Recall and F1-Socre...
             precision    recall  f1-score   support

         体育       0.99      1.00      0.99      1000
         财经       0.94      0.98      0.96      1000
         房产       0.92      0.86      0.89      1000
         家居       0.93      0.85      0.89      1000
         教育       0.94      0.94      0.94      1000
         科技       0.95      0.95      0.95      1000
         时尚       0.93      0.97      0.95      1000
         时政       0.90      0.96      0.93      1000
         游戏       0.97      0.93      0.95      1000
         娱乐       0.96      0.98      0.97      1000

avg / total       0.94      0.94      0.94     10000

Confusion Matrix...
[[996   0   1   1   1   0   0   0   0   1]
 [  0 982   9   1   1   0   0   7   0   0]
 [  4  34 858  15  10   5   5  66   0   3]
 [  4   7  54 854  13  14  26  19   3   6]
 [  3   9   1   5 942  13   3  12  11   1]
 [  0   1   0  10   7 954  14   3  10   1]
 [ 

In [112]:
tf.reset_default_graph()
rnn_config = RnnConfig(len(categories), 64, len(words), rnn='gru',drop_keep_prob=0.8)
model = TextRnn(rnn_config)
test('./cnews/Sougou_test.dat', rnn_config, './cnews/lstmModel/lstm.mod')

load the one hot
Loading test data...
data processed!
INFO:tensorflow:Restoring parameters from ./cnews/lstmModel/lstm.mod


INFO:tensorflow:Restoring parameters from ./cnews/lstmModel/lstm.mod


Testing...
Test Loss:   2.9, Test Acc: 47.60%
Precision, Recall and F1-Socre...
             precision    recall  f1-score   support

         体育       0.98      0.64      0.78     24869
         财经       0.80      0.36      0.49     17144
         房产       0.52      0.34      0.41     15887
         家居       0.00      0.00      0.00         0
         教育       0.39      0.27      0.32      2825
         科技       0.00      0.00      0.00         0
         时尚       0.00      0.00      0.00         0
         时政       0.00      0.00      0.00         0
         游戏       0.00      0.00      0.00         0
         娱乐       0.86      0.55      0.67      9465

avg / total       0.79      0.48      0.59     70190

Confusion Matrix...
[[15911   472   556  4742   719   139   967   431   401   531]
 [   28  6105  3931  2511   160   317    26  3521   431   114]
 [   12   476  5444  8962   165    35   123   533    36   101]
 [    0     0     0     0     0     0     0     0     0     0]
 [   20  

  'recall', 'true', average, warn_for)


In [85]:
def test_one(documemt, model_dir, config):
    gpu_config = tf.ConfigProto(allow_soft_placement=True)
    sess = tf.Session(config=gpu_config)
    sess.run(tf.global_variables_initializer())
    saver = tf.train.Saver()
    saver.restore(sess=sess, save_path=model_dir)
    
    data_id = [[word_to_id[x] for x in documemt if x in word_to_id]]
    # print(data_id)
    x_pad = kr.preprocessing.sequence.pad_sequences(data_id, config.seq_length, padding='post', truncating='post')
    
    y_pred_cls = np.zeros(shape=1, dtype=np.int32)
    feed_dict ={
            model.input_x: x_pad,
            model.keep_prob: 1.0
        }
    
    y_pred_cls[0] = sess.run(model.y_pred_cls, feed_dict=feed_dict)
    for category in cat_to_id.keys():
        if cat_to_id[category] == y_pred_cls[0]:
            print(category)


In [92]:
tf.reset_default_graph()
rnn_config = RnnConfig(len(categories), 64, len(words), rnn='gru',drop_keep_prob=0.8)
model = TextRnn(rnn_config)
test_one('宏观财经要闻?央行:2008年前5个月金融市场总体运行平稳?国资委:严格控制职工持有国有大型企业股权?上海产权市场两大新“热点”市场动态?指数短期还会有反复周三沪深两市大盘小幅低开,先抑后扬。早盘,两市指数再创近期新低,沪综指和深成指分别下探至2729.71点、9114.19点,随后在可能会出利好的传闻推动下,两市展开强劲反弹行情,双双以中阳线报收,同时成交量较上一交易日放大约五成。终盘,沪综指报收2941.11点,大涨146.36点或5.24%,成交711.9亿;深成指报收9903.14点,大涨473.64点或5.02%,成交347.5亿。盘面上,今日两市个股呈现普涨态势,涨幅靠前主要是超跌股与低价股。热点方面,煤炭,有色金属,化工,能源,奥运,创投等概念均有较大反弹。其中以江山股份、浏阳花炮为首的化工股,以国阳新能为首的煤炭股,以锡业股份为首的有色金属股均涨幅居前,是主要做多动力,只有ST板块微跌。消息面上,值得关注的有:人民币加速升值。人民币对美元中间价17日升破6.9至6.8919,较前日上涨109基点,再创汇改以来新高。2008年以来,人民币对美元升值幅度接近6%;第四次中美战略经济对话17日在美国马里兰州安纳波利斯开幕。王岐山指出,深入探讨美国次贷危机及其影响,加强双方宏观经济和金融政策的协调,有利于维护两国以及世界经济金融稳定;能源和环境领域是中美经济合作新的增长点,双方合作空间广阔,应当努力取得更多成果。6月16日,中美企业界代表在美国首都华盛顿和密苏里州的圣路易斯市签署71项合同或协议,涉及大豆、节能机电产品、通信化工产品、飞机发动机、机械设备、通信及网络设备、半导体及电子器件等11大类产品,总金额约136亿美元。对于后市走势,我们认为如果没有实质性的利好出台,两市指数短期还会有反复,指数在恢复性上涨后可能会进一步下探,在经历一个震荡筑底阶段之后才会开始一波反弹行情。?新股定价行业公司?小商品城公告点评:新会展中心未来盈利或高于之前我们的预期,业绩的确定性、治理结构改善等提升投资价值,调高投资评级至“买入”?昆明机床子公司西安交大智能电器公司可能解散?中国航空工业集团公司筹备组已成立,中航一、二集团合并重组将正式拉开帷幕?香溢融通发布两则有关其典当业务风险的公告?步步高上市及一季度财务数据点评?中国平安5月份保费收入点评债券和衍生品?宝钢发规模100亿元可分离债,20日申购近期重点研究报告?金融工程:宝钢可分离债_债券报告?行业公司:中国人寿_跟踪报告?行业公司:银行业_深度报告?宏观研究:中国经济_宏观快报?宏观研究:美国经济_宏观快报新近推荐买入个股一览?S三九、天威视讯、煤气化、丽珠集团、凌钢股份、沈阳化工、雨润食品、天地科技、丹化科技、S*ST天颐搜狐证券声明:本频道资讯内容系转引自合作媒体及合作机构,不代表搜狐证券自身观点与立场,建议投资者对此资讯谨慎判断,据此入市,风险自担。', './cnews/lstmModel/lstm.mod', rnn_config)

load the one hot
INFO:tensorflow:Restoring parameters from ./cnews/lstmModel/lstm.mod


INFO:tensorflow:Restoring parameters from ./cnews/lstmModel/lstm.mod


房产
