In [2]:
import os
import pandas as pd
import numpy as np
import re
from tqdm import tqdm
import urllib.request
import seaborn as sns
import matplotlib.pyplot as plt
import tensorflow_addons as tfa
import tensorflow as tf
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from transformers import BertTokenizer, TFBertForSequenceClassification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, \
                            roc_auc_score, confusion_matrix, classification_report, \
                            matthews_corrcoef, cohen_kappa_score, log_loss

 The versions of TensorFlow you are currently using is 2.6.0 and is not supported. 
Some things might work, some things might not.
If you were to encounter a bug, do not file an issue.
If you want to make sure you're using a tested and supported configuration, either change the TensorFlow version or the TensorFlow Addons's version. 
You can find the compatibility matrix in TensorFlow Addon's readme:
https://github.com/tensorflow/addons
  from .autonotebook import tqdm as notebook_tqdm


In [5]:
! pip freeze > requirements_2.txt 

In [3]:
MODEL_NAME = "klue/bert-base"
model = TFBertForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=3, from_pt=True)
tokenizer = BertTokenizer.from_pretrained(MODEL_NAME)

Metal device set to: Apple M1

systemMemory: 8.00 GB
maxCacheSize: 2.67 GB



2022-08-12 23:41:13.450434: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-08-12 23:41:13.451393: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertForSequenceClassification: ['bert.embeddings.position_ids']
- This IS expected if you are initializing TFBertForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertForSequenceClassification from a PyTorch mo

In [4]:
# 최고 성능의 모델 불러오기
sentiment_model_best = tf.keras.models.load_model('/Users/seop/Documents/GitHub/Prediction-of-IPO-stock-price-using-chatbot/jiseop_test/best_model.h5',
                                                  custom_objects={'TFBertForSequenceClassification': TFBertForSequenceClassification})

In [4]:
data1.rename(columns=data1.iloc[0],inplace = True)
data1 = data1.drop(data1.index[0])

In [5]:
data1.reset_index(inplace = True)
data1.drop('index',axis=1,inplace=True)

In [6]:
data1.rename(columns = {'name':'기업명',
                       'type': '시장 종류',
                       'day': '상장일',
                       'now_p': '현재가',
                       'gongmo_p': '공모가',
                       'sicho_p': '시초가',
                       'first_p': '종가',
                       'corp_cd': '종목코드'}, inplace = True)

In [7]:
# data1.to_csv('data_1.csv', header = None)

In [8]:
#data200 = pd.read_csv('data200.csv',encoding = 'euc-kr')

In [9]:
data1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1283 entries, 0 to 1282
Data columns (total 8 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   기업명     1283 non-null   object
 1   시장 종류   1283 non-null   object
 2   상장일     1283 non-null   object
 3   현재가     1283 non-null   object
 4   공모가     1283 non-null   object
 5   시초가     1283 non-null   object
 6   종가      1283 non-null   object
 7   종목코드    1283 non-null   object
dtypes: object(8)
memory usage: 80.3+ KB


In [10]:
data1.isnull().sum()

기업명      0
시장 종류    0
상장일      0
현재가      0
공모가      0
시초가      0
종가       0
종목코드     0
dtype: int64

In [11]:
#기업명에 스팩이 들어간 경우 제거
data1 = data1[~data1['기업명'].str.contains('스팩')]

In [12]:
data = pd.read_csv('38com_benefit.csv')

In [13]:
data.drop(['Unnamed: 0'], axis = 1,inplace = True)

In [14]:
data = data.dropna()
data

Unnamed: 0,기업명,경쟁률,의무보유확약,시초/공모%(수익률)
1,루닛,7.10,1.65%,%
3,에이치피에스피,1511.36:1,42.54%,100%
4,영창케미칼,"1,616.27:1",5.92%,-1.08%
6,넥스트칩,1623.41:1,11.58%,31.92%
8,위니아에이드,955:1,1.39%,-9.88%
...,...,...,...,...
712,덕신하우징,650.72:1,59.86%,66.54%
713,파버나인,264.04:1,0.00%,-1.6%
714,창해에탄올,578.20:1,59.50%,100%
716,윈하이텍,444.1:1,55.60%,60.24%


In [15]:
index1 = data[data['의무보유확약'] == '0.00%'].index
#의무보유확약이 0 인 것도 제거
data = data.drop(index1)

In [16]:
#변수들은 int형으로 바꿀 것이기 때문에 특수문자 제거 
data.replace('(:1|%|,|:)','',regex = True, inplace= True)

In [17]:
data.reset_index(inplace = True)
data.drop('index',axis=1,inplace=True)
data = data[1:]

In [18]:
data = data.astype({'경쟁률':'float',
                    '의무보유확약': 'float',
                    '시초/공모%(수익률)':'float'})
data.dtypes 

기업명             object
경쟁률            float64
의무보유확약         float64
시초/공모%(수익률)    float64
dtype: object

In [19]:
#기업명이 스펙인 것 제거
data = data[~data['기업명'].str.contains('스팩')]

In [20]:
#변수 추가로 크롤링 해온 데이터 
data_added = pd.read_csv('38_add_variable.csv', encoding = 'euc-kr')
data_added

Unnamed: 0.1,Unnamed: 0,기업명,매출액,순이익,구주매출,희망공모가액,청약경쟁률,확정공모가
0,0,수산인더스트리,"294,111 (백만원)","53,318 (백만원)","신주모집 : 4,286,000 주 (75%)\n\t/ 구주매출 : 1,429,0...","35,000 ~ 43,100 원",3.39:1 (비례 7:1),"35,000원"
1,1,에이프릴바이오,- (백만원),"-10,220 (백만원)","신주모집 : 1,620,000 주 (100%)\n\t","20,000 ~ 23,000 원",4.76:1 (비례 10:1),"16,000원"
2,2,신한스팩10호,- (백만원),- (백만원),"신주모집 : 3,250,000 주 (100%)\n\t","2,000 ~ 2,000 원",132.38:1 (비례 265:1),"2,000원"
3,3,아이씨에이치,"24,167 (백만원)","2,428 (백만원)","신주모집 : 1,035,000 주 (87.71%)\n\t/ 구주매출 : 145,...","34,000 ~ 44,000 원",2.51:1 (비례 5:1),"34,000원"
4,4,성일하이텍,"65,939 (백만원)","-2,998 (백만원)","신주모집 : 2,670,000 주 (100%)\n\t","40,700 ~ 47,500 원",1207.1:1 (비례 2414:1),"50,000원"
...,...,...,...,...,...,...,...,...
1388,1388,글로벌에스엠테크리미티드,550 (백만원),63 (백만원),"신주모집 : 5,794,020 주 (100%)\n\t","2,800 ~ 3,200 원",522:1,"3,200원"
1389,1389,케이탑리츠,- (백만원),- (백만원),"신주모집 : 3,000,000 주 (100%)\n\t",- ~ - 원,,"5,500원"
1390,1390,모두투어리츠,"3,023 (백만원)",22 (백만원),"신주모집 : 2,350,000 주 (100%)\n\t","6,000 ~ 6,000 원",0.98:1,"6,000원"
1391,1391,신한알파리츠,- (백만원),- (백만원),"신주모집 : 22,800,000 주 (100%)\n\t","5,000 ~ 5,000 원",4.32:1,"5,000원"


In [21]:
data_added.drop(['Unnamed: 0'], axis = 1,inplace = True)

In [22]:
import re
regex = "\(.*\)|\s-\s.*" 
for i in range(len(data_added)):
    data_added['매출액'][i] = re.sub(regex,'',data_added['매출액'][i])
for i in range(len(data_added)):
    data_added['순이익'][i] = re.sub(regex,'',data_added['순이익'][i])


In [23]:
data_added.replace('',np.nan,inplace = True)

In [24]:
data_added.dropna(subset=['매출액'],inplace = True)
data_added.dropna(subset=['순이익'],inplace = True)

In [25]:
data_added = data_added[data_added['희망공모가액'] != '- ~ - 원']

In [26]:
#기존 데이터와 새로운 데이터 합치기 (변수추가)
df_inner_join = pd.merge(data_added,data, left_on = '기업명',right_on ='기업명',how='inner')
df_inner_join

Unnamed: 0,기업명,매출액,순이익,구주매출,희망공모가액,청약경쟁률,확정공모가,경쟁률,의무보유확약,시초/공모%(수익률)
0,에이치피에스피,61174,17658,"신주모집 : 3,000,000 주 (100%)\n\t","23,000 ~ 25,000 원",1159.05:1 (비례 2318:1),"25,000원",1511.36,42.54,100.00
1,영창케미칼,60760,2506,"신주모집 : 2,000,000 주 (83.33%)\n\t/ 구주매출 : 400,...","15,000 ~ 18,600 원",682.13:1 (비례 1364:1),"18,600원",1616.27,5.92,-1.08
2,넥스트칩,10383,-13496,"신주모집 : 2,600,000 주 (100%)\n\t","9,900 ~ 11,600 원",1727.38:1 (비례 3455:1),"13,000원",1623.41,11.58,31.92
3,위니아에이드,418527,24895,"신주모집 : 5,366,087 주 (100%)\n\t","14,200 ~ 16,200 원",111.26:1 (비례 223:1),"16,200원",955.00,1.39,-9.88
4,레이저쎌,3879,-7665,"신주모집 : 1,600,000 주 (100%)\n\t","12,000 ~ 14,000 원",1845.11:1 (비례 3690:1),"16,000원",1442.95,12.43,28.75
...,...,...,...,...,...,...,...,...,...,...
391,데브시스터즈,61303,22326,"신주모집 : 2,700,000 주 (100%)\n\t","43,000 ~ 50,000 원",285.28:1,"53,000원",651.66,29.98,33.96
392,신화콘텍,51059,6446,"신주모집 : 1,600,000 주 (100%)\n\t","8,100 ~ 9,100 원",99.3:1,"9,100원",441.99,10.37,-10.00
393,덕신하우징,104434,11718,"신주모집 : 2,000,000 주 (100%)\n\t","9,600 ~ 11,000 원",899.07:1,"13,000원",650.72,59.86,66.54
394,창해에탄올,69032,14887,"신주모집 : 1,900,871 주 (100%)\n\t","6,000 ~ 6,900 원",675.79:1,"8,300원",578.20,59.50,100.00


In [27]:
data_inner_join = pd.merge(df_inner_join,data1, left_on = '기업명',right_on ='기업명',how='inner')
data_inner_join

Unnamed: 0,기업명,매출액,순이익,구주매출,희망공모가액,청약경쟁률,확정공모가,경쟁률,의무보유확약,시초/공모%(수익률),시장 종류,상장일,현재가,공모가,시초가,종가,종목코드
0,에이치피에스피,61174,17658,"신주모집 : 3,000,000 주 (100%)\n\t","23,000 ~ 25,000 원",1159.05:1 (비례 2318:1),"25,000원",1511.36,42.54,100.00,kosdaq,20220715,64000,25000,50000,43250,403870
1,영창케미칼,60760,2506,"신주모집 : 2,000,000 주 (83.33%)\n\t/ 구주매출 : 400,...","15,000 ~ 18,600 원",682.13:1 (비례 1364:1),"18,600원",1616.27,5.92,-1.08,kosdaq,20220714,17300,18600,18400,16450,112290
2,넥스트칩,10383,-13496,"신주모집 : 2,600,000 주 (100%)\n\t","9,900 ~ 11,600 원",1727.38:1 (비례 3455:1),"13,000원",1623.41,11.58,31.92,kosdaq,20220701,13350,13000,17150,14300,396270
3,위니아에이드,418527,24895,"신주모집 : 5,366,087 주 (100%)\n\t","14,200 ~ 16,200 원",111.26:1 (비례 223:1),"16,200원",955.00,1.39,-9.88,kosdaq,20220623,9960,16200,14600,11000,377460
4,레이저쎌,3879,-7665,"신주모집 : 1,600,000 주 (100%)\n\t","12,000 ~ 14,000 원",1845.11:1 (비례 3690:1),"16,000원",1442.95,12.43,28.75,kosdaq,20220624,13250,16000,20600,17350,412350
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
391,데브시스터즈,61303,22326,"신주모집 : 2,700,000 주 (100%)\n\t","43,000 ~ 50,000 원",285.28:1,"53,000원",651.66,29.98,33.96,kosdaq,20141006,-,53000,71000,61000,194480
392,신화콘텍,51059,6446,"신주모집 : 1,600,000 주 (100%)\n\t","8,100 ~ 9,100 원",99.3:1,"9,100원",441.99,10.37,-10.00,kosdaq,20140808,-,9100,8190,7100,187270
393,덕신하우징,104434,11718,"신주모집 : 2,000,000 주 (100%)\n\t","9,600 ~ 11,000 원",899.07:1,"13,000원",650.72,59.86,66.54,kosdaq,20140801,-,13000,21650,20950,090410
394,창해에탄올,69032,14887,"신주모집 : 1,900,871 주 (100%)\n\t","6,000 ~ 6,900 원",675.79:1,"8,300원",578.20,59.50,100.00,kosdaq,20140730,-,8300,16600,14600,004650


In [28]:
final_data = data_inner_join
final_data.drop('확정공모가',axis = 1)
final_data

Unnamed: 0,기업명,매출액,순이익,구주매출,희망공모가액,청약경쟁률,확정공모가,경쟁률,의무보유확약,시초/공모%(수익률),시장 종류,상장일,현재가,공모가,시초가,종가,종목코드
0,에이치피에스피,61174,17658,"신주모집 : 3,000,000 주 (100%)\n\t","23,000 ~ 25,000 원",1159.05:1 (비례 2318:1),"25,000원",1511.36,42.54,100.00,kosdaq,20220715,64000,25000,50000,43250,403870
1,영창케미칼,60760,2506,"신주모집 : 2,000,000 주 (83.33%)\n\t/ 구주매출 : 400,...","15,000 ~ 18,600 원",682.13:1 (비례 1364:1),"18,600원",1616.27,5.92,-1.08,kosdaq,20220714,17300,18600,18400,16450,112290
2,넥스트칩,10383,-13496,"신주모집 : 2,600,000 주 (100%)\n\t","9,900 ~ 11,600 원",1727.38:1 (비례 3455:1),"13,000원",1623.41,11.58,31.92,kosdaq,20220701,13350,13000,17150,14300,396270
3,위니아에이드,418527,24895,"신주모집 : 5,366,087 주 (100%)\n\t","14,200 ~ 16,200 원",111.26:1 (비례 223:1),"16,200원",955.00,1.39,-9.88,kosdaq,20220623,9960,16200,14600,11000,377460
4,레이저쎌,3879,-7665,"신주모집 : 1,600,000 주 (100%)\n\t","12,000 ~ 14,000 원",1845.11:1 (비례 3690:1),"16,000원",1442.95,12.43,28.75,kosdaq,20220624,13250,16000,20600,17350,412350
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
391,데브시스터즈,61303,22326,"신주모집 : 2,700,000 주 (100%)\n\t","43,000 ~ 50,000 원",285.28:1,"53,000원",651.66,29.98,33.96,kosdaq,20141006,-,53000,71000,61000,194480
392,신화콘텍,51059,6446,"신주모집 : 1,600,000 주 (100%)\n\t","8,100 ~ 9,100 원",99.3:1,"9,100원",441.99,10.37,-10.00,kosdaq,20140808,-,9100,8190,7100,187270
393,덕신하우징,104434,11718,"신주모집 : 2,000,000 주 (100%)\n\t","9,600 ~ 11,000 원",899.07:1,"13,000원",650.72,59.86,66.54,kosdaq,20140801,-,13000,21650,20950,090410
394,창해에탄올,69032,14887,"신주모집 : 1,900,871 주 (100%)\n\t","6,000 ~ 6,900 원",675.79:1,"8,300원",578.20,59.50,100.00,kosdaq,20140730,-,8300,16600,14600,004650


In [29]:
final_data.replace('(:1|:|원)','',regex = True, inplace= True)
final_data

Unnamed: 0,기업명,매출액,순이익,구주매출,희망공모가액,청약경쟁률,확정공모가,경쟁률,의무보유확약,시초/공모%(수익률),시장 종류,상장일,현재가,공모가,시초가,종가,종목코드
0,에이치피에스피,61174,17658,"신주모집 3,000,000 주 (100%)\n\t","23,000 ~ 25,000",1159.05 (비례 2318),25000,1511.36,42.54,100.00,kosdaq,20220715,64000,25000,50000,43250,403870
1,영창케미칼,60760,2506,"신주모집 2,000,000 주 (83.33%)\n\t/ 구주매출 400,00...","15,000 ~ 18,600",682.13 (비례 1364),18600,1616.27,5.92,-1.08,kosdaq,20220714,17300,18600,18400,16450,112290
2,넥스트칩,10383,-13496,"신주모집 2,600,000 주 (100%)\n\t","9,900 ~ 11,600",1727.38 (비례 3455),13000,1623.41,11.58,31.92,kosdaq,20220701,13350,13000,17150,14300,396270
3,위니아에이드,418527,24895,"신주모집 5,366,087 주 (100%)\n\t","14,200 ~ 16,200",111.26 (비례 223),16200,955.00,1.39,-9.88,kosdaq,20220623,9960,16200,14600,11000,377460
4,레이저쎌,3879,-7665,"신주모집 1,600,000 주 (100%)\n\t","12,000 ~ 14,000",1845.11 (비례 3690),16000,1442.95,12.43,28.75,kosdaq,20220624,13250,16000,20600,17350,412350
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
391,데브시스터즈,61303,22326,"신주모집 2,700,000 주 (100%)\n\t","43,000 ~ 50,000",285.28,53000,651.66,29.98,33.96,kosdaq,20141006,-,53000,71000,61000,194480
392,신화콘텍,51059,6446,"신주모집 1,600,000 주 (100%)\n\t","8,100 ~ 9,100",99.3,9100,441.99,10.37,-10.00,kosdaq,20140808,-,9100,8190,7100,187270
393,덕신하우징,104434,11718,"신주모집 2,000,000 주 (100%)\n\t","9,600 ~ 11,000",899.07,13000,650.72,59.86,66.54,kosdaq,20140801,-,13000,21650,20950,090410
394,창해에탄올,69032,14887,"신주모집 1,900,871 주 (100%)\n\t","6,000 ~ 6,900",675.79,8300,578.20,59.50,100.00,kosdaq,20140730,-,8300,16600,14600,004650


In [30]:
final_data.loc[final_data['구주매출'].str.contains('100%') == True,'구주매출'] = 1

In [31]:
#final_data.to_csv('final.csv')

In [32]:
final = pd.read_csv('final_data.csv',encoding='euc-kr')
final

Unnamed: 0,기업명,매출액(백만원),순이익(백만원),구주매출,희망공모가(최저),희망공모가(최고),청약경쟁률(:1),확정공모가(원),경쟁률(:1),의무보유확약(:1),시초/공모%(수익률),공모가(원),시초가(원),상장일
0,에이치피에스피,61174,17658,1.00,"23,000 ~ 25,000","23,000 ~ 25,000",1159.05,25000,1511.36,42.54,100.00,25000,50000,20220715
1,영창케미칼,60760,2506,0.83,"15,000 ~ 18,600","15,000 ~ 18,600",682.13,18600,1616.27,5.92,-1.08,18600,18400,20220714
2,넥스트칩,10383,-13496,1.00,"9,900 ~ 11,600","9,900 ~ 11,600",1727.38,13000,1623.41,11.58,31.92,13000,17150,20220701
3,위니아에이드,418527,24895,1.00,"14,200 ~ 16,200","14,200 ~ 16,200",111.26,16200,955.00,1.39,-9.88,16200,14600,20220623
4,레이저쎌,3879,-7665,1.00,"12,000 ~ 14,000","12,000 ~ 14,000",1845.11,16000,1442.95,12.43,28.75,16000,20600,20220624
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
391,데브시스터즈,61303,22326,1.00,"43,000 ~ 50,000","43,000 ~ 50,000",285.28,53000,651.66,29.98,33.96,53000,71000,20141006
392,신화콘텍,51059,6446,1.00,"8,100 ~ 9,100","8,100 ~ 9,100",99.3,9100,441.99,10.37,-10.00,9100,8190,20140808
393,덕신하우징,104434,11718,1.00,"9,600 ~ 11,000","9,600 ~ 11,000",899.07,13000,650.72,59.86,66.54,13000,21650,20140801
394,창해에탄올,69032,14887,1.00,"6,000 ~ 6,900","6,000 ~ 6,900",675.79,8300,578.20,59.50,100.00,8300,16600,20140730


In [33]:
final['희망공모가(최저)'] = final['희망공모가(최저)'].str[:10]
final['희망공모가(최고)'] = final['희망공모가(최고)'].str[9:]
final

Unnamed: 0,기업명,매출액(백만원),순이익(백만원),구주매출,희망공모가(최저),희망공모가(최고),청약경쟁률(:1),확정공모가(원),경쟁률(:1),의무보유확약(:1),시초/공모%(수익률),공모가(원),시초가(원),상장일
0,에이치피에스피,61174,17658,1.00,"23,000 ~","~ 25,000",1159.05,25000,1511.36,42.54,100.00,25000,50000,20220715
1,영창케미칼,60760,2506,0.83,"15,000 ~","~ 18,600",682.13,18600,1616.27,5.92,-1.08,18600,18400,20220714
2,넥스트칩,10383,-13496,1.00,"9,900 ~",11600,1727.38,13000,1623.41,11.58,31.92,13000,17150,20220701
3,위니아에이드,418527,24895,1.00,"14,200 ~","~ 16,200",111.26,16200,955.00,1.39,-9.88,16200,14600,20220623
4,레이저쎌,3879,-7665,1.00,"12,000 ~","~ 14,000",1845.11,16000,1442.95,12.43,28.75,16000,20600,20220624
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
391,데브시스터즈,61303,22326,1.00,"43,000 ~","~ 50,000",285.28,53000,651.66,29.98,33.96,53000,71000,20141006
392,신화콘텍,51059,6446,1.00,"8,100 ~",9100,99.3,9100,441.99,10.37,-10.00,9100,8190,20140808
393,덕신하우징,104434,11718,1.00,"9,600 ~",11000,899.07,13000,650.72,59.86,66.54,13000,21650,20140801
394,창해에탄올,69032,14887,1.00,"6,000 ~",6900,675.79,8300,578.20,59.50,100.00,8300,16600,20140730


In [34]:
final.replace('(,|~)','',regex = True, inplace= True)
final

Unnamed: 0,기업명,매출액(백만원),순이익(백만원),구주매출,희망공모가(최저),희망공모가(최고),청약경쟁률(:1),확정공모가(원),경쟁률(:1),의무보유확약(:1),시초/공모%(수익률),공모가(원),시초가(원),상장일
0,에이치피에스피,61174,17658,1.00,23000,25000,1159.05,25000,1511.36,42.54,100.00,25000,50000,20220715
1,영창케미칼,60760,2506,0.83,15000,18600,682.13,18600,1616.27,5.92,-1.08,18600,18400,20220714
2,넥스트칩,10383,-13496,1.00,9900,11600,1727.38,13000,1623.41,11.58,31.92,13000,17150,20220701
3,위니아에이드,418527,24895,1.00,14200,16200,111.26,16200,955.00,1.39,-9.88,16200,14600,20220623
4,레이저쎌,3879,-7665,1.00,12000,14000,1845.11,16000,1442.95,12.43,28.75,16000,20600,20220624
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
391,데브시스터즈,61303,22326,1.00,43000,50000,285.28,53000,651.66,29.98,33.96,53000,71000,20141006
392,신화콘텍,51059,6446,1.00,8100,9100,99.3,9100,441.99,10.37,-10.00,9100,8190,20140808
393,덕신하우징,104434,11718,1.00,9600,11000,899.07,13000,650.72,59.86,66.54,13000,21650,20140801
394,창해에탄올,69032,14887,1.00,6000,6900,675.79,8300,578.20,59.50,100.00,8300,16600,20140730


In [35]:
final.dtypes

기업명             object
매출액(백만원)        object
순이익(백만원)        object
구주매출           float64
희망공모가(최저)       object
희망공모가(최고)       object
청약경쟁률(:1)       object
확정공모가(원)        object
경쟁률(:1)        float64
의무보유확약(:1)     float64
시초/공모%(수익률)    float64
공모가(원)           int64
시초가(원)           int64
상장일              int64
dtype: object

In [36]:
final = final.astype({'매출액(백만원)':'float',
                    '순이익(백만원)': 'float',
                    '희망공모가(최저)':'float',
                     '희망공모가(최고)':'float',
                     '청약경쟁률(:1)': 'float',
                     })
final.dtypes 

기업명             object
매출액(백만원)       float64
순이익(백만원)       float64
구주매출           float64
희망공모가(최저)      float64
희망공모가(최고)      float64
청약경쟁률(:1)      float64
확정공모가(원)        object
경쟁률(:1)        float64
의무보유확약(:1)     float64
시초/공모%(수익률)    float64
공모가(원)           int64
시초가(원)           int64
상장일              int64
dtype: object

In [37]:
final.drop('확정공모가(원)',axis = 1)

Unnamed: 0,기업명,매출액(백만원),순이익(백만원),구주매출,희망공모가(최저),희망공모가(최고),청약경쟁률(:1),경쟁률(:1),의무보유확약(:1),시초/공모%(수익률),공모가(원),시초가(원),상장일
0,에이치피에스피,61174.0,17658.0,1.00,23000.0,25000.0,1159.05,1511.36,42.54,100.00,25000,50000,20220715
1,영창케미칼,60760.0,2506.0,0.83,15000.0,18600.0,682.13,1616.27,5.92,-1.08,18600,18400,20220714
2,넥스트칩,10383.0,-13496.0,1.00,9900.0,11600.0,1727.38,1623.41,11.58,31.92,13000,17150,20220701
3,위니아에이드,418527.0,24895.0,1.00,14200.0,16200.0,111.26,955.00,1.39,-9.88,16200,14600,20220623
4,레이저쎌,3879.0,-7665.0,1.00,12000.0,14000.0,1845.11,1442.95,12.43,28.75,16000,20600,20220624
...,...,...,...,...,...,...,...,...,...,...,...,...,...
391,데브시스터즈,61303.0,22326.0,1.00,43000.0,50000.0,285.28,651.66,29.98,33.96,53000,71000,20141006
392,신화콘텍,51059.0,6446.0,1.00,8100.0,9100.0,99.30,441.99,10.37,-10.00,9100,8190,20140808
393,덕신하우징,104434.0,11718.0,1.00,9600.0,11000.0,899.07,650.72,59.86,66.54,13000,21650,20140801
394,창해에탄올,69032.0,14887.0,1.00,6000.0,6900.0,675.79,578.20,59.50,100.00,8300,16600,20140730


In [38]:
final.to_csv('refined_data.csv')

In [40]:
'''qwert.dropna(subset=['매출액(백만원)'],inplace = True)
qwert.dropna(subset=['순이익(백만원)'],inplace = True)
qwert.dropna(subset=['유통가능물량'],inplace = True)
qwert.dropna(subset=['총 발행주식'],inplace = True)
qwert.dropna(subset=['유통%'],inplace = True)
qwert.drop(['Unnamed: 0','최종유통','최종유통%','기업명.1','Unnamed: 15'], axis = 1,inplace = True)
qwert'''

"qwert.dropna(subset=['매출액(백만원)'],inplace = True)\nqwert.dropna(subset=['순이익(백만원)'],inplace = True)\nqwert.dropna(subset=['유통가능물량'],inplace = True)\nqwert.dropna(subset=['총 발행주식'],inplace = True)\nqwert.dropna(subset=['유통%'],inplace = True)\nqwert.drop(['Unnamed: 0','최종유통','최종유통%','기업명.1','Unnamed: 15'], axis = 1,inplace = True)\nqwert"

In [3]:
import pandas as pd
df=pd.read_csv('data.csv')
df.drop('Unnamed: 0',axis = 1 ,inplace=True)
df

Unnamed: 0,name,type,day,now_p,gongmo_p,sicho_p,first_p
0,쏘카(유가),kospi,20220818,53500,-,-,예정
1,에이치와이티씨,kosdaq,20220809,-,15000,-,예정
2,유안타스팩9호,kosdaq,20220804,-,2000,-,예정
3,새빗켐,kosdaq,20220804,-,35000,-,예정
4,수산인더스트리(유가),kospi,20220801,32500,35000,33000,32500
...,...,...,...,...,...,...,...
404,키움스팩6호,kosdaq,20220407,2225,2000,2180,2215
405,지투파워,kosdaq,20220401,52300,16400,32800,35250
406,유진스팩8호,kosdaq,20220331,2120,2000,2115,2110
407,코람코더원리츠(유가),kospi,20220328,5370,5000,5260,5410
