In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import re
import urllib.request
import mecab
from tqdm import tqdm
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import tensorflow as tf
from tensorflow.keras.layers import Embedding, Dense, LSTM
from tensorflow.keras.models import Sequential
from tensorflow.keras.models import load_model
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
import warnings
warnings.filterwarnings(action='ignore')

In [2]:
df= pd.read_csv('./df10-18.csv')
df.head(2)

Unnamed: 0,date,category,section,publisher,author,title,content_url,attachment,industry.label,industry.score,industry.name,polarity.label,polarity.score,polarity.name,corp
0,2018-12-27,news,economy,전자신문,,[ET투자뉴스]CMG제약_기관의 힘? 대량순매수 이후.. 현재 +3.05%,,,C21,1.0,의료용 물질 및 의약품 제조업,1.0,0.643,긍정,DeepSearch-CMG제약-news-2018-01-01-2018-12-31-20...
1,2018-12-24,news,economy,한국경제,,[한경로보뉴스] '와이지엔터테인먼트' 52주 신고가 경신,,,C26,0.59,"전자 부품, 컴퓨터, 영상, 음향 및 통신장비 제조업",1.0,0.667,긍정,DeepSearch-CMG제약-news-2018-01-01-2018-12-31-20...


In [3]:
df_X = df[df['polarity.label'].isna()]
df_O = df[df['polarity.label'].notna()]
len(df_X)

88485

In [4]:
mecab = mecab.MeCab()

In [5]:
df_X['tokenized'] = df_X['title'].apply(mecab.morphs)

In [6]:
stop_word = pd.read_csv('./stop_word_2.csv')

In [7]:
del_stopword = []
for row in df_X['tokenized']:
    tmp =[]
    for word in row:
        if word not in stop_word:
            tmp.append(word)
    del_stopword.append(tmp)

In [8]:
df_X['del_stopword'] = del_stopword

In [9]:
df_X.head(2)

Unnamed: 0,date,category,section,publisher,author,title,content_url,attachment,industry.label,industry.score,industry.name,polarity.label,polarity.score,polarity.name,corp,tokenized,del_stopword
4,2018-12-05,news,tech,조선일보,최인준,간암·뇌전증·치매… 한국 신약 10여종 美 상륙한다,,,,,,,,,DeepSearch-CMG제약-news-2018-01-01-2018-12-31-20...,"[간암, ·, 뇌전증, ·, 치매, …, 한국, 신약, 10, 여종, 美, 상륙, 한다]","[간암, ·, 뇌전증, ·, 치매, …, 한국, 신약, 10, 여종, 美, 상륙, 한다]"
5,2018-11-28,news,economy,이데일리,박일경,[재송]27일 장 마감 후 주요 종목뉴스,,,C20,0.875,화학 물질 및 화학제품 제조업; 의약품 제외,,,,DeepSearch-CMG제약-news-2018-01-01-2018-12-31-20...,"[[, 재송, ], 27, 일, 장, 마감, 후, 주요, 종목, 뉴스]","[[, 재송, ], 27, 일, 장, 마감, 후, 주요, 종목, 뉴스]"


# 모델 학습

In [10]:
from tensorflow.keras.models import load_model

In [11]:
model=tf.keras.models.load_model(
     './NLP/best_model_1018_labelbal.h5')

In [12]:
x=df_X['del_stopword']
y=df_X['polarity.label']

In [13]:
X_train = pd.read_csv('./final_X_train.csv')

In [14]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(X_train)

In [15]:
X_encoded = tokenizer.texts_to_sequences(x)

In [16]:
max_len=77

In [17]:
X_encoded = pad_sequences(X_encoded, maxlen=max_len)

In [18]:
pred=model.predict(X_encoded)
pred[:5]



array([[0.41763908, 0.14792828, 0.43443257],
       [0.41763908, 0.14792828, 0.43443257],
       [0.46546778, 0.13079725, 0.4037349 ],
       [0.41763908, 0.14792828, 0.43443257],
       [0.41763908, 0.14792828, 0.43443257]], dtype=float32)

In [19]:
len(df_X),len(pred)

(88485, 88485)

In [20]:
label = np.argmax(pred,axis=1)

In [21]:
len(label)

88485

In [22]:
df_X['polarity.label'] = label

In [23]:
df_X.head(2)

Unnamed: 0,date,category,section,publisher,author,title,content_url,attachment,industry.label,industry.score,industry.name,polarity.label,polarity.score,polarity.name,corp,tokenized,del_stopword
4,2018-12-05,news,tech,조선일보,최인준,간암·뇌전증·치매… 한국 신약 10여종 美 상륙한다,,,,,,2,,,DeepSearch-CMG제약-news-2018-01-01-2018-12-31-20...,"[간암, ·, 뇌전증, ·, 치매, …, 한국, 신약, 10, 여종, 美, 상륙, 한다]","[간암, ·, 뇌전증, ·, 치매, …, 한국, 신약, 10, 여종, 美, 상륙, 한다]"
5,2018-11-28,news,economy,이데일리,박일경,[재송]27일 장 마감 후 주요 종목뉴스,,,C20,0.875,화학 물질 및 화학제품 제조업; 의약품 제외,2,,,DeepSearch-CMG제약-news-2018-01-01-2018-12-31-20...,"[[, 재송, ], 27, 일, 장, 마감, 후, 주요, 종목, 뉴스]","[[, 재송, ], 27, 일, 장, 마감, 후, 주요, 종목, 뉴스]"


In [24]:
df_O.head(2)

Unnamed: 0,date,category,section,publisher,author,title,content_url,attachment,industry.label,industry.score,industry.name,polarity.label,polarity.score,polarity.name,corp
0,2018-12-27,news,economy,전자신문,,[ET투자뉴스]CMG제약_기관의 힘? 대량순매수 이후.. 현재 +3.05%,,,C21,1.0,의료용 물질 및 의약품 제조업,1.0,0.643,긍정,DeepSearch-CMG제약-news-2018-01-01-2018-12-31-20...
1,2018-12-24,news,economy,한국경제,,[한경로보뉴스] '와이지엔터테인먼트' 52주 신고가 경신,,,C26,0.59,"전자 부품, 컴퓨터, 영상, 음향 및 통신장비 제조업",1.0,0.667,긍정,DeepSearch-CMG제약-news-2018-01-01-2018-12-31-20...


In [129]:
df = pd.concat([df_X,df_O], axis=0)
len(df)

143830

In [130]:
df.to_csv('./final_1018.csv')

# 모델용 df

In [131]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 143830 entries, 4 to 143827
Data columns (total 17 columns):
 #   Column          Non-Null Count   Dtype  
---  ------          --------------   -----  
 0   date            143830 non-null  object 
 1   category        143830 non-null  object 
 2   section         143830 non-null  object 
 3   publisher       143830 non-null  object 
 4   author          54147 non-null   object 
 5   title           143830 non-null  object 
 6   content_url     0 non-null       float64
 7   attachment      0 non-null       float64
 8   industry.label  137471 non-null  object 
 9   industry.score  137471 non-null  float64
 10  industry.name   137471 non-null  object 
 11  polarity.label  143830 non-null  float64
 12  polarity.score  55345 non-null   float64
 13  polarity.name   55345 non-null   object 
 14  corp            143830 non-null  object 
 15  tokenized       88485 non-null   object 
 16  del_stopword    88485 non-null   object 
dtypes: float64

In [132]:
df['polarity.label'] = df['polarity.label'].replace([0,1,-1],[0,1,2])

In [133]:
df.head(2)

Unnamed: 0,date,category,section,publisher,author,title,content_url,attachment,industry.label,industry.score,industry.name,polarity.label,polarity.score,polarity.name,corp,tokenized,del_stopword
4,2018-12-05,news,tech,조선일보,최인준,간암·뇌전증·치매… 한국 신약 10여종 美 상륙한다,,,,,,2.0,,,DeepSearch-CMG제약-news-2018-01-01-2018-12-31-20...,"[간암, ·, 뇌전증, ·, 치매, …, 한국, 신약, 10, 여종, 美, 상륙, 한다]","[간암, ·, 뇌전증, ·, 치매, …, 한국, 신약, 10, 여종, 美, 상륙, 한다]"
5,2018-11-28,news,economy,이데일리,박일경,[재송]27일 장 마감 후 주요 종목뉴스,,,C20,0.875,화학 물질 및 화학제품 제조업; 의약품 제외,2.0,,,DeepSearch-CMG제약-news-2018-01-01-2018-12-31-20...,"[[, 재송, ], 27, 일, 장, 마감, 후, 주요, 종목, 뉴스]","[[, 재송, ], 27, 일, 장, 마감, 후, 주요, 종목, 뉴스]"


In [134]:
df = df.iloc[:,[0,5,11,14]]

In [135]:
df.head(2)

Unnamed: 0,date,title,polarity.label,corp
4,2018-12-05,간암·뇌전증·치매… 한국 신약 10여종 美 상륙한다,2.0,DeepSearch-CMG제약-news-2018-01-01-2018-12-31-20...
5,2018-11-28,[재송]27일 장 마감 후 주요 종목뉴스,2.0,DeepSearch-CMG제약-news-2018-01-01-2018-12-31-20...


In [136]:
df["date"] = pd.to_datetime(df["date"])

In [137]:
df.head(2)

Unnamed: 0,date,title,polarity.label,corp
4,2018-12-05,간암·뇌전증·치매… 한국 신약 10여종 美 상륙한다,2.0,DeepSearch-CMG제약-news-2018-01-01-2018-12-31-20...
5,2018-11-28,[재송]27일 장 마감 후 주요 종목뉴스,2.0,DeepSearch-CMG제약-news-2018-01-01-2018-12-31-20...


In [138]:
df = df.reset_index()

In [139]:
del df['index']

In [140]:
len(df)

143830

In [141]:
corp =[]
for txt in df['corp']:
    name=txt.split(sep='-')[1]
    corp.append(name)

In [142]:
df['name'] = corp

In [143]:
df['pos']=0
df['neg']=0

In [144]:
df[df['polarity.label']!=0][:5]

Unnamed: 0,date,title,polarity.label,corp,name,pos,neg
0,2018-12-05,간암·뇌전증·치매… 한국 신약 10여종 美 상륙한다,2.0,DeepSearch-CMG제약-news-2018-01-01-2018-12-31-20...,CMG제약,0,0
1,2018-11-28,[재송]27일 장 마감 후 주요 종목뉴스,2.0,DeepSearch-CMG제약-news-2018-01-01-2018-12-31-20...,CMG제약,0,0
3,2018-10-01,"차병원·바이오그룹, 대졸 신입사원 공개채용",2.0,DeepSearch-CMG제약-news-2018-01-01-2018-12-31-20...,CMG제약,0,0
4,2018-09-18,"[한경로보뉴스] 전일, 코스닥 외국인 순매도상위에 제약 업종 3종목",2.0,DeepSearch-CMG제약-news-2018-01-01-2018-12-31-20...,CMG제약,0,0
5,2018-09-05,[코스피·코스닥 전 거래일(4일) 주요 공시],2.0,DeepSearch-CMG제약-news-2018-01-01-2018-12-31-20...,CMG제약,0,0


In [145]:
for i,data in enumerate(df['polarity.label']):
    if data==1:
        df.iloc[i,5]=1
    elif data==2:
        df.iloc[i,6]=-1
        # print(1)

In [146]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 143830 entries, 0 to 143829
Data columns (total 7 columns):
 #   Column          Non-Null Count   Dtype         
---  ------          --------------   -----         
 0   date            143830 non-null  datetime64[ns]
 1   title           143830 non-null  object        
 2   polarity.label  143830 non-null  float64       
 3   corp            143830 non-null  object        
 4   name            143830 non-null  object        
 5   pos             143830 non-null  int64         
 6   neg             143830 non-null  int64         
dtypes: datetime64[ns](1), float64(1), int64(2), object(3)
memory usage: 7.7+ MB


# 연도별로/월별로

In [147]:
import datetime as dt

In [148]:
year =[]
for dt in  df['date']:
    y=dt.year
    year.append(y)

In [149]:
month=[]
for dt in  df['date']:
    m=dt.month
    month.append(m)

In [150]:
df['year'] = year
df['month'] = month

In [151]:
df.info(2)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 143830 entries, 0 to 143829
Data columns (total 9 columns):
 #   Column          Non-Null Count   Dtype         
---  ------          --------------   -----         
 0   date            143830 non-null  datetime64[ns]
 1   title           143830 non-null  object        
 2   polarity.label  143830 non-null  float64       
 3   corp            143830 non-null  object        
 4   name            143830 non-null  object        
 5   pos             143830 non-null  int64         
 6   neg             143830 non-null  int64         
 7   year            143830 non-null  int64         
 8   month           143830 non-null  int64         
dtypes: datetime64[ns](1), float64(1), int64(4), object(3)
memory usage: 9.9+ MB


In [152]:
df_2010 = df[df['year']==2010]
df_2011 = df[df['year']==2011]
df_2012 = df[df['year']==2012]
df_2013 = df[df['year']==2013]
df_2014 = df[df['year']==2014]
df_2015 = df[df['year']==2015]
df_2016 = df[df['year']==2016]
df_2017 = df[df['year']==2017]
df_2018 = df[df['year']==2018]

In [153]:
# df_2010

## 연간

In [154]:
df_2010 = df_2010.iloc[:,[4,5,6]]
df_2010 = df_2010.groupby('name').sum()
df_2010['date'] = '2010'
df_2010 = df_2010.reset_index()
df_2010.head(2)

Unnamed: 0,name,pos,neg,date
0,HLB,0,-1,2010
1,경남제약,9,-48,2010


In [155]:
df_2011 = df_2011.iloc[:,[4,5,6]]
df_2011 = df_2011.groupby('name').sum()
df_2011['date'] = '2011'
df_2011 = df_2011.reset_index()

# df_2010.head(2)

In [156]:
df_2012 = df_2012.iloc[:,[4,5,6]]
df_2012 = df_2012.groupby('name').sum()
df_2012['date'] = '2012'
df_2012 = df_2012.reset_index()

# df_2010.head(2)

In [157]:
df_2013 = df_2013.iloc[:,[4,5,6]]
df_2013 = df_2013.groupby('name').sum()
df_2013['date'] = '2013'
df_2013 = df_2013.reset_index()

# df_2010.head(2)

In [158]:
df_2014 = df_2014.iloc[:,[4,5,6]]
df_2014 = df_2014.groupby('name').sum()
df_2014['date'] = '2014'
df_2014 = df_2014.reset_index()

# df_2010.head(2)

In [159]:
df_2015 = df_2015.iloc[:,[4,5,6]]
df_2015 = df_2015.groupby('name').sum()
df_2015['date'] = '2015'
df_2015 = df_2015.reset_index()

# df_2010.head(2)

In [160]:
df_2016 = df_2016.iloc[:,[4,5,6]]
df_2016 = df_2016.groupby('name').sum()
df_2016['date'] = '2016'
df_2016 = df_2016.reset_index()

# df_2010.head(2)

In [161]:
df_2017 = df_2017.iloc[:,[4,5,6]]
df_2017 = df_2017.groupby('name').sum()
df_2017['date'] = '2017'
df_2017 = df_2017.reset_index()

# df_2010.head(2)

In [162]:
df_2018 = df_2018.iloc[:,[4,5,6]]
df_2018 = df_2018.groupby('name').sum()
df_2018['date'] = '2018'
df_2018 = df_2018.reset_index()

# df_2010.head(2)

In [163]:
df_2010 = df_2010[['name','date','pos','neg']]
df_2011 = df_2011[['name','date','pos','neg']]
df_2012 = df_2012[['name','date','pos','neg']]
df_2013 = df_2013[['name','date','pos','neg']]
df_2014 = df_2014[['name','date','pos','neg']]
df_2015 = df_2015[['name','date','pos','neg']]
df_2016 = df_2016[['name','date','pos','neg']]
df_2017 = df_2017[['name','date','pos','neg']]
df_2018 = df_2018[['name','date','pos','neg']]


In [164]:
# df_2010 = df_2010.groupby('name').sum()
# df_2011 = df_2011.groupby('name').sum()
# df_2012 = df_2012.groupby('name').sum()
# df_2013 = df_2013.groupby('name').sum()
# df_2014 = df_2014.groupby('name').sum()
# df_2015 = df_2015.groupby('name').sum()
# df_2016 = df_2016.groupby('name').sum()
# df_2017 = df_2017.groupby('name').sum()
# df_2018 = df_2018.groupby('name').sum()


In [165]:
df_year = pd.concat([df_2010,df_2011,df_2012,df_2013,df_2014,df_2015,df_2016,df_2017,df_2018])

In [166]:
df_year.tail()

Unnamed: 0,name,date,pos,neg
113,휴마시스,2018,18,-21
114,휴메딕스,2018,28,-38
115,휴비츠,2018,20,-26
116,휴온스,2018,153,-189
117,휴온스글로벌,2018,86,-46


In [167]:
df_year.to_csv('./news_연간(10-18).csv',index=False)

## 월간

In [168]:
df_2010 = df[df['year']==2010]
df_2011 = df[df['year']==2011]
df_2012 = df[df['year']==2012]
df_2013 = df[df['year']==2013]
df_2014 = df[df['year']==2014]
df_2015 = df[df['year']==2015]
df_2016 = df[df['year']==2016]
df_2017 = df[df['year']==2017]
df_2018 = df[df['year']==2018]

In [169]:
dflist_10=[]
for i in range(1,13):
    df_i = df_2010[df_2010['month']==i]
    dflist_10.append(df_i)

In [170]:
# dflist_10[0]

In [171]:
dflist_10_1 = []
for df in dflist_10:
    tmp=df.groupby('name').sum()
    dflist_10_1.append(tmp)

In [172]:
dflist_10_1[1].head()

Unnamed: 0_level_0,polarity.label,pos,neg,year,month
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
경남제약,22.0,2,-10,32160,32
경동제약,26.0,8,-9,38190,38
광동제약,30.0,4,-13,46230,46
국제약품,4.0,0,-2,4020,4
녹십자,72.0,10,-31,104520,104


In [173]:
dflist_11=[]
for i in range(1,13):
    df_i = df_2011[df_2011['month']==i]
    dflist_11.append(df_i)

In [174]:
# dflist_11[1]

In [175]:
dflist_11_1 = []
for df in dflist_11:
    tmp=df.groupby('name').sum()
    dflist_11_1.append(tmp)

In [176]:
dflist_12=[]
for i in range(1,13):
    df_i = df_2012[df_2012['month']==i]
    dflist_12.append(df_i)

In [177]:
dflist_12_1 = []
for df in dflist_12:
    tmp=df.groupby('name').sum()
    dflist_12_1.append(tmp)

In [178]:
dflist_13=[]
for i in range(1,13):
    df_i = df_2013[df_2013['month']==i]
    dflist_13.append(df_i)

In [179]:
dflist_13_1 = []
for df in dflist_13:
    tmp=df.groupby('name').sum()
    dflist_13_1.append(tmp)

In [180]:
dflist_14=[]
for i in range(1,13):
    df_i = df_2014[df_2014['month']==i]
    dflist_14.append(df_i)

In [181]:
dflist_14_1 = []
for df in dflist_14:
    tmp=df.groupby('name').sum()
    dflist_14_1.append(tmp)

In [182]:
dflist_15=[]
for i in range(1,13):
    df_i = df_2015[df_2015['month']==i]
    dflist_15.append(df_i)

In [183]:
dflist_15_1 = []
for df in dflist_15:
    tmp=df.groupby('name').sum()
    dflist_15_1.append(tmp)

In [184]:
dflist_16=[]
for i in range(1,13):
    df_i = df_2016[df_2016['month']==i]
    dflist_16.append(df_i)

In [185]:
dflist_16_1 = []
for df in dflist_16:
    tmp=df.groupby('name').sum()
    dflist_16_1.append(tmp)

In [186]:
dflist_17=[]
for i in range(1,13):
    df_i = df_2017[df_2017['month']==i]
    dflist_17.append(df_i)

In [187]:
dflist_17_1 = []
for df in dflist_17:
    tmp=df.groupby('name').sum()
    dflist_17_1.append(tmp)

In [188]:
dflist_18=[]
for i in range(1,13):
    df_i = df_2018[df_2018['month']==i]
    dflist_18.append(df_i)

In [189]:
dflist_18_1 = []
for df in dflist_18:
    tmp=df.groupby('name').sum()
    dflist_18_1.append(tmp)

In [190]:
dflist_18_1[1].head()

Unnamed: 0_level_0,polarity.label,pos,neg,year,month
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
CMG제약,6.0,2,-2,12108,12
JW생명과학,18.0,4,-7,38342,38
JW신약,5.0,1,-2,6054,6
JW중외제약,17.0,5,-6,32288,32
강스템바이오텍,12.0,2,-5,18162,18


# 예쁘게

In [191]:
dflist_10_2=[]
for i,df in enumerate(dflist_10_1):
    df_i = df.iloc[:,[1,2]]
    df_i = df_i.reset_index()
    df_i['date'] = f'2010-{i+1}'
    df_i = df_i[['name','date','pos','neg']]
    dflist_10_2.append(df_i)

In [192]:
dflist_10_2[1].head(2)

Unnamed: 0,name,date,pos,neg
0,경남제약,2010-2,2,-10
1,경동제약,2010-2,8,-9


In [193]:
dflist_11_2=[]
for i,df in enumerate(dflist_11_1):
    df_i = df.iloc[:,[1,2]]
    df_i = df_i.reset_index()
    df_i['date'] = f'2011-{i+1}'
    df_i = df_i[['name','date','pos','neg']]
    dflist_11_2.append(df_i)

In [194]:
dflist_12_2=[]
for i,df in enumerate(dflist_12_1):
    df_i = df.iloc[:,[1,2]]
    df_i = df_i.reset_index()
    df_i['date'] = f'2012-{i+1}'
    df_i = df_i[['name','date','pos','neg']]
    dflist_12_2.append(df_i)

In [195]:
dflist_13_2=[]
for i,df in enumerate(dflist_13_1):
    df_i = df.iloc[:,[1,2]]
    df_i = df_i.reset_index()
    df_i['date'] = f'2013-{i+1}'
    df_i = df_i[['name','date','pos','neg']]
    dflist_13_2.append(df_i)

In [196]:
dflist_14_2=[]
for i,df in enumerate(dflist_14_1):
    df_i = df.iloc[:,[1,2]]
    df_i = df_i.reset_index()
    df_i['date'] = f'2014-{i+1}'
    df_i = df_i[['name','date','pos','neg']]
    dflist_14_2.append(df_i)

In [197]:
dflist_15_2=[]
for i,df in enumerate(dflist_15_1):
    df_i = df.iloc[:,[1,2]]
    df_i = df_i.reset_index()
    df_i['date'] = f'2015-{i+1}'
    df_i = df_i[['name','date','pos','neg']]
    dflist_15_2.append(df_i)

In [198]:
dflist_16_2=[]
for i,df in enumerate(dflist_16_1):
    df_i = df.iloc[:,[1,2]]
    df_i = df_i.reset_index()
    df_i['date'] = f'2016-{i+1}'
    df_i = df_i[['name','date','pos','neg']]
    dflist_16_2.append(df_i)

In [199]:
dflist_17_2=[]
for i,df in enumerate(dflist_17_1):
    df_i = df.iloc[:,[1,2]]
    df_i = df_i.reset_index()
    df_i['date'] = f'2017-{i+1}'
    df_i = df_i[['name','date','pos','neg']]
    dflist_17_2.append(df_i)

In [200]:
dflist_18_2=[]
for i,df in enumerate(dflist_18_1):
    df_i = df.iloc[:,[1,2]]
    df_i = df_i.reset_index()
    df_i['date'] = f'2018-{i+1}'
    df_i = df_i[['name','date','pos','neg']]
    dflist_18_2.append(df_i)

In [201]:
# dflist_18_2[5]

In [202]:
# dflist_18_2[0]

# 합치기

In [203]:
ticker = pd.read_csv('../데이터/최종기업ticker.csv', names = ['name','ticker'])
ticker =ticker.dropna()

In [204]:
ticker.head(2)

Unnamed: 0,name,ticker
1,CMG제약,58820
2,HLB,28300


In [205]:
len(dflist_10_2)

12

In [206]:
dflist_10_3 = []
for i,df in enumerate(dflist_10_2):
    merge_10 = pd.merge(ticker,df, how='left', left_on='name', right_on='name')
    merge_10 = merge_10.drop('ticker', axis=1)
    merge_10['date'] = f'2010-{i+1}'
    dflist_10_3.append(merge_10)

In [207]:
dflist_10_3[11].tail(2)

Unnamed: 0,name,date,pos,neg
124,휴온스,2010-12,1.0,-5.0
125,휴온스글로벌,2010-12,,


In [208]:
dflist_10_3 = pd.concat(dflist_10_3)

In [209]:
dflist_10_3.head(2)

Unnamed: 0,name,date,pos,neg
0,CMG제약,2010-1,,
1,HLB,2010-1,,


In [210]:
dflist_11_3 = []
for df in dflist_11_2:
    merge_11 = pd.merge(ticker,df, how='left', left_on='name', right_on='name')
    merge_11 = merge_11.drop('ticker', axis=1)
    merge_11['date'] = f'2011-{i+1}'

    dflist_11_3.append(merge_11)

In [211]:
dflist_11_3 = pd.concat(dflist_11_3)

In [212]:
dflist_12_3 = []
for df in dflist_12_2:
    merge_12 = pd.merge(ticker,df, how='left', left_on='name', right_on='name')
    merge_12 = merge_12.drop('ticker', axis=1)
    merge_12['date'] = f'2012-{i+1}'

    dflist_12_3.append(merge_12)

In [213]:
dflist_12_3 = pd.concat(dflist_12_3)

In [214]:
dflist_13_3 = []
for df in dflist_13_2:
    merge_13 = pd.merge(ticker,df, how='left', left_on='name', right_on='name')
    merge_13 = merge_13.drop('ticker', axis=1)
    merge_13['date'] = f'2013-{i+1}'

    dflist_13_3.append(merge_13)

In [215]:
dflist_13_3 = pd.concat(dflist_13_3)

In [216]:
dflist_14_3 = []
for df in dflist_14_2:
    merge_14 = pd.merge(ticker,df, how='left', left_on='name', right_on='name')
    merge_14 = merge_14.drop('ticker', axis=1)
    merge_14['date'] = f'2014-{i+1}'

    dflist_14_3.append(merge_14)

In [217]:
dflist_14_3 = pd.concat(dflist_14_3)

In [218]:
dflist_15_3 = []
for df in dflist_15_2:
    merge_15 = pd.merge(ticker,df, how='left', left_on='name', right_on='name')
    merge_15 = merge_15.drop('ticker', axis=1)
    merge_15['date'] = f'2015-{i+1}'

    dflist_15_3.append(merge_15)

In [219]:
dflist_15_3 = pd.concat(dflist_15_3)

In [220]:
dflist_16_3 = []
for df in dflist_16_2:
    merge_16 = pd.merge(ticker,df, how='left', left_on='name', right_on='name')
    merge_16 = merge_16.drop('ticker', axis=1)
    merge_16['date'] = f'2016-{i+1}'

    dflist_16_3.append(merge_16)

In [221]:
dflist_16_3 = pd.concat(dflist_16_3)

In [222]:
dflist_17_3 = []
for df in dflist_17_2:
    merge_17 = pd.merge(ticker,df, how='left', left_on='name', right_on='name')
    merge_17 = merge_17.drop('ticker', axis=1)
    merge_17['date'] = f'2017-{i+1}'

    dflist_17_3.append(merge_17)

In [223]:
dflist_17_3 = pd.concat(dflist_17_3)

In [224]:
dflist_18_3 = []
for df in dflist_18_2:
    merge_18 = pd.merge(ticker,df, how='left', left_on='name', right_on='name')
    merge_18 = merge_18.drop('ticker', axis=1)
    merge_18['date'] = f'2018-{i+1}'

    dflist_18_3.append(merge_18)

In [225]:
dflist_18_3 = pd.concat(dflist_18_3)

In [226]:
news =pd.DataFrame()

In [227]:
news =news.append(dflist_10_3)
news =news.append(dflist_11_3)
news =news.append(dflist_12_3)
news =news.append(dflist_13_3)
news =news.append(dflist_14_3)
news =news.append(dflist_15_3)
news =news.append(dflist_16_3)
news =news.append(dflist_17_3)
news =news.append(dflist_18_3)

In [228]:
news[news['date']=='2010-1']


Unnamed: 0,name,date,pos,neg
0,CMG제약,2010-1,,
1,HLB,2010-1,,
2,HLB생명과학,2010-1,,
3,JW생명과학,2010-1,,
4,JW신약,2010-1,,
...,...,...,...,...
121,휴마시스,2010-1,,
122,휴메딕스,2010-1,,
123,휴비츠,2010-1,1.0,-1.0
124,휴온스,2010-1,6.0,0.0


In [229]:
news.to_csv('./news_월간(10-18).csv',index=False)