In [2]:
import warnings
warnings.filterwarnings('ignore')

In [7]:
from pykrx import stock
import pandas as pd
import numpy as np
from scipy.stats import ttest_ind
from sklearn.linear_model import LassoCV, RidgeCV
from sklearn.feature_selection import SequentialFeatureSelector
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from tqdm import tqdm
ticker_list = stock.get_market_ticker_list(market="KOSPI")

# precision_svm 값과 종목코드를 저장할 빈 리스트
precision_list = []

# 각 종목에 대해 반복
for ticker in tqdm(ticker_list, desc="Processing"):
    try:
        ticker_name = stock.get_market_ticker_name(ticker)
        start_date = "2021-03-20"
        end_date = "2024-03-20"
        # 주어진 기간 동안의 일별 거래량 정보 가져오기
        df = stock.get_market_ohlcv_by_date(fromdate=start_date, todate=end_date, ticker=ticker)


        def calculate_macd(df, short_window=12, long_window=26, signal_window=9):
            """MACD 및 MACD 신호 계산"""
            df['EMA_short'] = df['종가'].ewm(span=short_window, adjust=False).mean()
            df['EMA_long'] = df['종가'].ewm(span=long_window, adjust=False).mean()
            df['MACD'] = df['EMA_short'] - df['EMA_long']
            df['MACD_Signal'] = df['MACD'].ewm(span=signal_window, adjust=False).mean()
            return df

        def calculate_rsi(df, window=14, signal_window=9):
            """RSI 및 RSI 신호 계산"""
            delta = df['종가'].diff()
            gain = (delta.where(delta > 0, 0)).rolling(window=window).mean()
            loss = (-delta.where(delta < 0, 0)).rolling(window=window).mean()

            rs = gain / loss
            df['RSI'] = 100 - (100 / (1 + rs))
            
            # RSI 신호선 추가
            df['RSI_Signal'] = df['RSI'].ewm(span=signal_window, adjust=False).mean()
            return df

        # MACD 및 MACD 신호 계산
        df = calculate_macd(df)

        df = calculate_rsi(df)

        # '종가' 컬럼이 포함된 DataFrame을 가정합니다. 예를 들어, df라고 합시다.
        # df = pd.read_csv('path_to_your_data.csv') # 데이터 파일을 로드하는 예시

        # 지정된 기간에 대한 SMA 계산
        periods = [5, 20, 60, 120, 240]
        for period in periods:
            df[f'SMA_{period}'] = df['종가'].rolling(window=period).mean()

        # 지정된 기간에 대한 EMA 계산
        for period in periods:
            df[f'EMA_{period}'] = df['종가'].ewm(span=period, adjust=False).mean()

        for i in range(len(periods)):
            for j in range(i + 1, len(periods)):
                smaller_period = periods[i]
                larger_period = periods[j]
                df[f'SMA_{smaller_period}_minus_SMA_{larger_period}'] = df[f'SMA_{smaller_period}'] - df[f'SMA_{larger_period}']

        # EMA 간의 차이 계산
        for i in range(len(periods)):
            for j in range(i + 1, len(periods)):
                smaller_period = periods[i]
                larger_period = periods[j]
                df[f'EMA_{smaller_period}_minus_EMA_{larger_period}'] = df[f'EMA_{smaller_period}'] - df[f'EMA_{larger_period}']

        for column in ['시가', '고가', '저가','거래량','MACD','MACD_Signal','RSI','RSI_Signal']:
            df[f'{column}_등락률'] = df[column].pct_change() * 100

        # SMA 및 EMA의 전 거래일 대비 등락률 계산 및 DataFrame에 추가
        periods = [5, 20, 60, 120, 240]

        # SMA 등락률 계산 및 추가
        for period in periods:
            df[f'SMA_{period}_등락률'] = df[f'SMA_{period}'].pct_change() * 100

        # EMA 등락률 계산 및 추가
        for period in periods:
            df[f'EMA_{period}_등락률'] = df[f'EMA_{period}'].pct_change() * 100

        # 'MACD'가 0 이상일 때 1, 아니면 0을 할당
        df['MACD_Binary'] = (df['MACD'] >= 0).astype(int)

        # 'MACD - MACD_Signal'이 0 이상일 때 1, 아니면 0을 할당
        df['MACD_minus_Signal_Binary'] = ((df['MACD'] - df['MACD_Signal']) >= 0).astype(int)

        # 'RSI - RSI_Signal'이 0 이상일 때 1, 아니면 0을 할당
        df['RSI_minus_Signal_Binary'] = ((df['RSI'] - df['RSI_Signal']) >= 0).astype(int)

        # 시가 대비 종가 등락률 컬럼 추가
        df['시가_대비_종가_등락률'] = ((df['종가'] - df['시가']) / df['시가']) * 100
        df['시가_대비_저가_등락률'] = ((df['저가'] - df['시가']) / df['시가']) * 100
        df['시가_대비_고가_등락률'] = ((df['고가'] - df['시가']) / df['시가']) * 100
        df['저가_대비_종가_등락률'] = ((df['종가'] - df['저가']) / df['저가']) * 100
        df['저가_대비_고가_등락률'] = ((df['고가'] - df['저가']) / df['저가']) * 100
        df['고가_대비_종가_등락률'] = ((df['종가'] - df['고가']) / df['고가']) * 100
        # 종가 - SMA [5, 20, 60, 120, 240] 값의 차이 컬럼 추가
        for period in [5, 20, 60, 120, 240]:
            df[f'종가_minus_SMA_{period}'] = df['종가'] - df[f'SMA_{period}']

        # 종가 - EMA [5, 20, 60, 120, 240] 값의 차이 컬럼 추가
        for period in [5, 20, 60, 120, 240]:
            df[f'종가_minus_EMA_{period}'] = df['종가'] - df[f'EMA_{period}']


        #csv_file_path = "C:/apps/h1/이스트소프트_data.csv"  # 저장할 파일 경로 및 이름 설정
        #df.to_csv(csv_file_path, encoding='utf-8-sig')

        #print(f"Data saved to {csv_file_path}")

        # 등락률을 기준으로 다음 날 등락률 계산 후 target 생성
        df['next_day_return'] = df['등락률'].shift(-1)
        df['target'] = df['next_day_return'].apply(lambda x: 1 if x > 0.25 else -1)
        df.dropna(inplace=True)  # 마지막 행 삭제

        first_column_name = df.columns[0]

        df1= df.iloc[:-50, :]
        X = df1.drop(['next_day_return','target'], axis=1)
        y = df1['target']

        # Calculating correlations for Forward Selection
        correlations = X.corrwith(y).abs().sort_values(ascending=False).reset_index()
        correlations.columns = ['Feature', 'Correlation']

        top_8_features = correlations.sort_values(by='Correlation', ascending=False).head(8)['Feature']

        # X와 y 정의 (df2와 이전 코드에서의 처리 과정을 바탕으로)
        X_top8 = X[top_8_features]
        y = df1['target'].map({-1: 0, 1: 1})  # y 값을 0과 1로 조정

        # 데이터를 학습 세트와 테스트 세트로 분할
        X_train, X_test, y_train, y_test = train_test_split(X_top8, y, test_size=0.25, random_state=42)

        svm_model = SVC()
        svm_model.fit(X_train, y_train)
        y_pred_svm = svm_model.predict(X_test)

        # Precision score 계산
        precision_svm = precision_score(y_test, y_pred_svm, average='macro')
        precision_list.append({'Ticker': ticker, 'Name': ticker_name, 'Precision_SVM': precision_svm})
        pass
    except Exception as e:
        print(f"Error processing {ticker}: {e}")
        continue

# 결과를 데이터프레임으로 변환하고 precision_svm 값에 따라 내림차순 정렬
precision_df = pd.DataFrame(precision_list).sort_values(by='Precision_SVM', ascending=False)

# 데이터프레임 출력
precision_df


Processing:   4%|▎         | 34/953 [00:10<03:53,  3.93it/s]

Error processing 017860: With n_samples=0, test_size=0.25 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.


Processing:  14%|█▎        | 129/953 [00:49<03:12,  4.28it/s]

Error processing 456040: With n_samples=0, test_size=0.25 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.


Processing:  18%|█▊        | 172/953 [01:03<05:05,  2.56it/s]

Error processing 465770: With n_samples=0, test_size=0.25 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.


Processing:  25%|██▌       | 240/953 [01:23<03:00,  3.95it/s]

Error processing 092790: With n_samples=0, test_size=0.25 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.


Processing:  33%|███▎      | 312/953 [01:54<02:15,  4.73it/s]

Error processing 460850: With n_samples=0, test_size=0.25 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.
Error processing 460860: With n_samples=0, test_size=0.25 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.


Processing:  36%|███▌      | 339/953 [02:02<02:19,  4.41it/s]

Error processing 111380: With n_samples=0, test_size=0.25 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.


Processing:  36%|███▋      | 346/953 [02:04<02:46,  3.65it/s]

Error processing 454910: With n_samples=0, test_size=0.25 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.


Processing:  44%|████▍     | 424/953 [02:32<02:39,  3.32it/s]

Error processing 030790: With n_samples=0, test_size=0.25 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.


Processing:  45%|████▌     | 432/953 [02:33<01:43,  5.06it/s]

Error processing 448730: With n_samples=0, test_size=0.25 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.


Processing:  51%|█████▏    | 490/953 [02:47<01:28,  5.23it/s]

Error processing 007610: With n_samples=0, test_size=0.25 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.


Processing:  62%|██████▏   | 589/953 [03:10<01:10,  5.18it/s]

Error processing 278470: With n_samples=0, test_size=0.25 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.
Error processing 450080: With n_samples=0, test_size=0.25 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.


Processing:  68%|██████▊   | 647/953 [03:28<01:42,  2.99it/s]

Error processing 457190: With n_samples=0, test_size=0.25 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.


Processing:  73%|███████▎  | 695/953 [03:47<01:14,  3.45it/s]

Error processing 462520: With n_samples=0, test_size=0.25 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.


Processing:  74%|███████▎  | 702/953 [03:49<00:56,  4.41it/s]

Error processing 109070: With n_samples=0, test_size=0.25 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.


Processing:  75%|███████▌  | 719/953 [03:53<00:53,  4.38it/s]

Error processing 012600: With n_samples=0, test_size=0.25 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.


Processing:  78%|███████▊  | 745/953 [04:01<00:54,  3.84it/s]

Error processing 450140: With n_samples=0, test_size=0.25 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.
Error processing 45014K: With n_samples=0, test_size=0.25 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.


Processing:  92%|█████████▏| 881/953 [04:35<00:23,  3.12it/s]

Error processing 452260: With n_samples=0, test_size=0.25 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.


Processing:  93%|█████████▎| 882/953 [04:35<00:20,  3.40it/s]

Error processing 45226K: With n_samples=0, test_size=0.25 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.
Error processing 451800: With n_samples=0, test_size=0.25 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.


Processing:  94%|█████████▍| 900/953 [04:40<00:14,  3.68it/s]

Error processing 453340: With n_samples=0, test_size=0.25 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.


Processing: 100%|██████████| 953/953 [04:56<00:00,  3.21it/s]


Unnamed: 0,Ticker,Name,Precision_SVM
113,383800,LX홀딩스,0.875000
499,067830,세이브존I&C,0.870536
375,017180,명문제약,0.857143
283,000300,대유플러스,0.845000
530,002870,신풍,0.843750
...,...,...,...
398,003610,방림,0.238532
710,381970,케이카,0.230769
540,403550,쏘카,0.200000
609,446070,유니드비티플러스,0.187500


In [4]:
df1

Unnamed: 0_level_0,시가,고가,저가,종가,거래량,등락률,EMA_short,EMA_long,MACD,MACD_Signal,...,종가_minus_SMA_60,종가_minus_SMA_120,종가_minus_SMA_240,종가_minus_EMA_5,종가_minus_EMA_20,종가_minus_EMA_60,종가_minus_EMA_120,종가_minus_EMA_240,next_day_return,target
날짜,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-03-11,3135,3275,3070,3180,2351671,0.632911,3170.637584,2926.211749,244.425835,238.522244,...,547.500000,222.666667,923.833333,-63.116951,167.087493,418.526170,531.041596,789.025161,0.628931,1
2022-03-14,3180,3240,3070,3200,1919028,0.628931,3175.154879,2946.492360,228.662518,236.550299,...,562.166667,228.958333,936.979167,-28.744634,169.269637,424.148263,541.933471,802.311259,-8.437500,-1
2022-03-15,3165,3170,2920,2930,1869555,-8.437500,3137.438743,2945.270704,192.168040,227.673847,...,290.500000,-52.500000,661.250000,-199.163089,-91.136995,149.094221,267.438703,527.893738,0.341297,1
2022-03-16,2955,3030,2900,2940,835034,0.341297,3107.063552,2944.880281,162.183271,214.575732,...,297.000000,-54.041667,665.479167,-126.108726,-73.409662,153.878017,272.852939,533.429890,0.510204,1
2022-03-17,2965,3000,2910,2955,787556,0.510204,3083.669159,2945.629890,138.039269,199.268439,...,308.000000,-46.833333,674.645833,-74.072484,-52.846837,163.341033,283.095039,543.878604,0.169205,-1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-12-27,2395,2495,2350,2450,3466116,0.204499,2384.218191,2246.707920,137.510272,125.907124,...,483.033333,465.933333,724.700000,-13.783991,149.952693,378.056923,502.979640,586.232265,2.040816,1
2023-12-28,2420,2505,2365,2500,2270612,2.040816,2402.030777,2265.470296,136.560481,128.037795,...,518.283333,514.225000,770.387500,24.144006,180.909579,414.022269,543.839481,630.952329,-0.600000,-1
2024-01-02,2600,2625,2480,2485,2803849,-0.600000,2414.795273,2281.731756,133.063517,129.042940,...,487.700000,496.725000,751.158333,6.096004,150.108667,385.939572,520.098332,610.840692,0.804829,1
2024-01-03,2620,2675,2470,2505,5012099,0.804829,2428.672923,2298.270144,130.402779,129.314908,...,491.016667,514.266667,766.762500,17.397336,153.907841,392.630078,531.171087,625.605499,29.940120,1


In [6]:
csv_file_path = "C:/apps/h1/svmprecision_kospi_data_220311_240104.csv"  # 저장할 파일 경로 및 이름 설정
precision_df.to_csv(csv_file_path, encoding='utf-8-sig')