In [1]:
import MySQLdb
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

In [2]:
# plot 한글
plt.rcParams['font.family'] = 'Malgun Gothic'
sns.set(font="Malgun Gothic", 
        rc={"axes.unicode_minus":False},
        style='darkgrid')
sns.set(rc = {'figure.figsize':(5,15)})

In [3]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.svm import SVR
from sklearn.metrics import r2_score, mean_squared_error
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoost
import warnings
warnings.filterwarnings(action='ignore')

In [4]:
# 2015년부터 있는 데이터
df = pd.read_csv('테슬라 총합.csv', index_col=0)

FileNotFoundError: [Errno 2] No such file or directory: '테슬라 총합.csv'

In [None]:
df.columns

In [None]:
# 3일 shift
df.tesla = df.tesla.shift(-3)
df.dropna(inplace=True)
df.date = pd.to_datetime(df.date)

In [None]:
y = df.tesla
x = df.drop(['date','tesla', 'elec_fee', 'tesla_volatility'], axis=1)

In [None]:
# 75 : 25 비율
x_train, x_test = x.iloc[:1350], x.iloc[1350:]
y_train, y_test = y.iloc[:1350], y.iloc[1350:]

In [None]:
scaled = StandardScaler()
scaled.fit(x_train)
x_train_scaled = scaled.transform(x_train)
x_test_scaled = scaled.transform(x_test)

In [None]:
# model 돌려보기
lr = LinearRegression()
lr.fit(x_train_scaled, y_train)
preds_lr = lr.predict(x_test_scaled)
print('lr_train :', lr.score(x_train_scaled,y_train))
print('lr_test :', lr.score(x_test_scaled,y_test))

rid = Ridge()
rid.fit(x_train_scaled, y_train)
preds_rid = rid.predict(x_test_scaled)
print('='*50)
print('rid_train :', rid.score(x_train_scaled,y_train))
print('rid_test :', rid.score(x_test_scaled,y_test))
las = Lasso()
las.fit(x_train_scaled, y_train)
preds_las = las.predict(x_test_scaled)
print('='*50)
print('las_train :', las.score(x_train_scaled,y_train))
print('las_test :', las.score(x_test_scaled,y_test))
xb = XGBRegressor()
xb.fit(x_train_scaled, y_train)
preds_xb = xb.predict(x_test_scaled)
print('='*50)
print('xgb_train :', xb.score(x_train_scaled,y_train))
print('xgb_test :', xb.score(x_test_scaled,y_test))
lgbm = LGBMRegressor()
lgbm.fit(x_train_scaled, y_train)
preds_lgbm = lgbm.predict(x_test_scaled)
print('='*50)
print('lgbm_train :', lgbm.score(x_train_scaled,y_train))
print('lgbm_test :', lgbm.score(x_test_scaled,y_test))
ela = ElasticNet()
ela.fit(x_train_scaled, y_train)
preds_ela = ela.predict(x_test_scaled)
print('='*50)
print('ela_train :', ela.score(x_train_scaled,y_train))
print('ela_test :', ela.score(x_test_scaled,y_test))

In [None]:
cat = CatBoost()
cat.fit(x_train_scaled, y_train)
preds_cat = cat.predict(x_test_scaled)
print('='*50)
print('cat_test :', r2_score(y_test,preds_cat))

In [None]:
# rid 계수 바꿔보기
for i in [0.0001,0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5, 10, 100]:
    rid = Ridge(alpha=i)
    rid.fit(x_train_scaled, y_train)
    preds_rid = rid.predict(x_test_scaled)
    print('='*50)
    print('alpha =', i)
    print('rid_train :', rid.score(x_train_scaled,y_train))
    print('rid_test :', rid.score(x_test_scaled,y_test))

In [None]:
# las 계수 바꿔보기
for i in [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5, 10, 50, 100, 500, 1000]:
    las = Lasso(alpha=i)
    las.fit(x_train_scaled, y_train)
    preds_las = las.predict(x_test_scaled)
    print('='*50)
    print('alpha =', i)
    print('las_train :', las.score(x_train_scaled,y_train))
    print('las_test :', las.score(x_test_scaled,y_test))

In [None]:
# elastic 계수 바꿔보기
max_score = -99
max_alpha = 0
train_score = 0
max_ratio = 0
test_li = []
train_li = []
for rat in np.arange(0.1,1,0.1):
    for alp in np.arange(0.01,10,0.01):
        ela = ElasticNet(l1_ratio=rat, alpha=alp)
        ela.fit(x_train_scaled, y_train)
        preds_ela = ela.predict(x_test_scaled)
        test_li.append(ela.score(x_test_scaled,y_test))
        train_li.append(ela.score(x_train_scaled,y_train))
        if max_score < ela.score(x_test_scaled,y_test):
            max_score = ela.score(x_test_scaled,y_test)
            max_alpha = alp
            max_ratio = rat
            train_score = ela.score(x_train_scaled,y_train)
print('alpha =', max_alpha)
print('ratio =', max_ratio)
print('ela_train :', train_score)
print('ela_test :', max_score)

In [None]:
# 2020년 이전의 데이터는 주가가 의미있는 변화가 없기 때문에 머신러닝 모델이 망가짐

In [None]:
plt.figure(figsize=(15,5))
fig = px.line(df, x='date', y=df.tesla, hover_data = {'date':'%Y-%m-%d'})
fig.update_xaxes(dtick='M5')

In [None]:
# 2019년부터 있는 데이터
df = pd.read_csv('테슬라 총합 재무제표.csv', index_col=0)
# NaN값 채우기(선형 보간법)
df.interpolate(inplace=True)
# 5일 shift
df.tesla = df.tesla.shift(-5)
df.dropna(inplace=True)
df.date = pd.to_datetime(df.date)

In [None]:
plt.figure(figsize=(15,5))
fig = px.line(df, x='date', y=df.tesla, hover_data = {'date':'%Y-%m-%d'})
fig.update_xaxes(dtick='M5')

In [None]:
# 상관계수가 0.9이상인 feature
df_corr = df.drop('date',axis=1).corr()[df.drop('date',axis=1).corr().tesla > 0.9]
pd.DataFrame(df_corr.tesla)

In [None]:
y = df.tesla
x = df.drop(['date', 'tesla', 'elec_fee', 'tesla_volatility'], axis=1)
# 75 : 25 비율
x_train, x_test = x.iloc[:600], x.iloc[600:]
y_train, y_test = y.iloc[:600], y.iloc[600:]

In [None]:
# 스케일링
scaled = StandardScaler()
scaled.fit(x_train)
x_train_scaled = scaled.transform(x_train)
x_test_scaled = scaled.transform(x_test)

In [None]:
# model 돌려보기
lr = LinearRegression()
lr.fit(x_train_scaled, y_train)
preds_lr = lr.predict(x_test_scaled)
print('lr_train :', lr.score(x_train_scaled,y_train))
print('lr_test :', lr.score(x_test_scaled,y_test))

rid = Ridge()
rid.fit(x_train_scaled, y_train)
preds_rid = rid.predict(x_test_scaled)
print('='*50)
print('rid_train :', rid.score(x_train_scaled,y_train))
print('rid_test :', rid.score(x_test_scaled,y_test))
las = Lasso()
las.fit(x_train_scaled, y_train)
preds_las = las.predict(x_test_scaled)
print('='*50)
print('las_train :', las.score(x_train_scaled,y_train))
print('las_test :', las.score(x_test_scaled,y_test))
xb = XGBRegressor()
xb.fit(x_train_scaled, y_train)
preds_xb = xb.predict(x_test_scaled)
print('='*50)
print('xgb_train :', xb.score(x_train_scaled,y_train))
print('xgb_test :', xb.score(x_test_scaled,y_test))
lgbm = LGBMRegressor()
lgbm.fit(x_train_scaled, y_train)
preds_lgbm = lgbm.predict(x_test_scaled)
print('='*50)
print('lgbm_train :', lgbm.score(x_train_scaled,y_train))
print('lgbm_test :', lgbm.score(x_test_scaled,y_test))
ela = ElasticNet()
ela.fit(x_train_scaled, y_train)
preds_ela = ela.predict(x_test_scaled)
print('='*50)
print('ela_train :', ela.score(x_train_scaled,y_train))
print('ela_test :', ela.score(x_test_scaled,y_test))

In [None]:
cat = CatBoost()
cat.fit(x_train_scaled, y_train)
preds_cat = cat.predict(x_test_scaled)
print('='*50)
print('cat_test :', r2_score(y_test,preds_cat))

In [None]:
# rid 계수 바꿔보기
for i in [0.0001,0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5, 10, 100]:
    rid = Ridge(alpha=i)
    rid.fit(x_train_scaled, y_train)
    preds_rid = rid.predict(x_test_scaled)
    print('='*50)
    print('alpha =', i)
    print('rid_train :', rid.score(x_train_scaled,y_train))
    print('rid_test :', rid.score(x_test_scaled,y_test))

In [None]:
# rid 계수 확인하기
max_score = -20
max_alpha = 0
train_score = 0
test_li = []
train_li = []
for i in np.arange(0.05, 100, 1):
    rid = Ridge(alpha=i)
    rid.fit(x_train_scaled, y_train)
    preds_rid = rid.predict(x_test_scaled)
    test_li.append(rid.score(x_test_scaled,y_test))
    train_li.append(rid.score(x_train_scaled,y_train))
    if max_score < rid.score(x_test_scaled,y_test):
        max_score = rid.score(x_test_scaled,y_test)
        max_alpha = i
        train_score = rid.score(x_train_scaled,y_train)
        
print('alpha =', max_alpha)
print('rid_train :', train_score)
print('rid_test :', max_score)

In [None]:
plt.figure(figsize=(7,5))
plt.plot(test_li, c='r')
plt.plot(train_li, c='b')

In [None]:
rid = Ridge(alpha=18.5)
rid.fit(x_train_scaled, y_train)
preds_rid = rid.predict(x_test_scaled)
print(rid.score(x_train_scaled,y_train))
print(rid.score(x_test_scaled,y_test))
rid.coef_

In [None]:
# etf종목 거래량이 갑자기 떨어진 날이 있음
plt.figure(figsize=(15,5))
plt.plot(range(0,len(preds_rid)),preds_rid, c='r', label='Predict')
plt.plot(range(0,len(preds_rid)),y_test, c='b', label='Real')
plt.legend(loc='best', ncol=2) 
plt.show()

In [None]:
coef_df = pd.DataFrame(ela.coef_, index = x_train.columns)
coef_df = coef_df.sort_values(0, ascending=False)
sns.set(rc = {'figure.figsize':(5,15)})
sns.barplot(x=coef_df[0].sort_values(ascending=False), y=coef_df.index)

In [None]:
# las 계수 바꿔보기
for i in [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5, 10, 50, 100, 500, 1000]:
    las = Lasso(alpha=i)
    las.fit(x_train_scaled, y_train)
    preds_las = las.predict(x_test_scaled)
    print('='*50)
    print('alpha =', i)
    print('las_train :', las.score(x_train_scaled,y_train))
    print('las_test :', las.score(x_test_scaled,y_test))

In [None]:
# las 계수 찾기
max_score = -99
max_alpha = 0
train_score = 0
test_li = []
train_li = []
for i in np.arange(0.05, 10, 0.01):
    las = Lasso(alpha=i)
    las.fit(x_train_scaled, y_train)
    preds_las = las.predict(x_test_scaled)
    test_li.append(las.score(x_test_scaled,y_test))
    train_li.append(las.score(x_train_scaled,y_train))
    if max_score < las.score(x_test_scaled,y_test):
        max_score = las.score(x_test_scaled,y_test)
        max_alpha = i
        train_score = las.score(x_train_scaled,y_train)
        
print('alpha =', max_alpha)
print('las_train :', train_score)
print('las_test :', max_score)

In [None]:
plt.figure(figsize=(7,5))
plt.plot(test_li, c='r')
plt.plot(train_li, c='b')

In [None]:
las = Lasso(alpha=0.35)
las.fit(x_train_scaled, y_train)
preds_las = las.predict(x_test_scaled)
las.coef_

In [None]:
# etf종목 거래량이 갑자기 떨어진 날이 있음
plt.figure(figsize=(15,5))
plt.plot(range(0,len(preds_las)),preds_las, c='r', label='Predict')
plt.plot(range(0,len(preds_las)),y_test, c='b', label='Real')
plt.legend(loc='best', ncol=2) 
plt.show()

In [None]:
# elastic 계수 바꿔보기
max_score = 0
max_alpha = 0
train_score = 0
max_ratio = 0
test_li = []
train_li = []
for rat in np.arange(0.1,1,0.1):
    for alp in np.arange(0.01,10,0.01):
        ela = ElasticNet(l1_ratio=rat, alpha=alp)
        ela.fit(x_train_scaled, y_train)
        preds_ela = ela.predict(x_test_scaled)
        test_li.append(ela.score(x_test_scaled,y_test))
        train_li.append(ela.score(x_train_scaled,y_train))
        if max_score < ela.score(x_test_scaled,y_test):
            max_score = ela.score(x_test_scaled,y_test)
            max_alpha = alp
            max_ratio = rat
            train_score = ela.score(x_train_scaled,y_train)
print('alpha =', max_alpha)
print('ratio =', max_ratio)
print('ela_train :', train_score)
print('ela_test :', max_score)

In [None]:
plt.figure(figsize=(7,5))
plt.plot(test_li, c='r')
plt.plot(train_li, c='b')

In [None]:
ela = ElasticNet(l1_ratio=0.5, alpha=9.44)
ela.fit(x_train_scaled, y_train)
preds_ela = ela.predict(x_test_scaled)
print('='*50)
print('ela_train :', ela.score(x_train_scaled,y_train))
print('ela_test :', ela.score(x_test_scaled,y_test))
ela.coef_

In [None]:
coef_df = pd.DataFrame(ela.coef_, index = x_train.columns)
coef_df = coef_df.sort_values(0, ascending=False)
sns.set(rc = {'figure.figsize':(5,15)})
sns.barplot(x=coef_df[0].sort_values(ascending=False), y=coef_df.index)

In [None]:
plt.figure(figsize=(15,5))
plt.plot(range(0,len(preds_ela)),preds_ela, c='r', label='Predict')
plt.plot(range(0,len(preds_ela)),y_test, c='b', label='Real')
plt.legend(loc='best', ncol=2) 
plt.show()

In [None]:
## 3개의 모델의 오차
print('rid RMSE :', mean_squared_error(y_test, preds_rid)**0.5)
print('las RMSE :', mean_squared_error(y_test, preds_las)**0.5)
print('ela RMSE :', mean_squared_error(y_test, preds_ela)**0.5)

In [None]:
# train, test 비율 바꿔보기

In [None]:
df = pd.read_csv('테슬라 총합 재무제표.csv', index_col=0)
# NaN값 채우기
df.interpolate(inplace=True)
# 5일 shift
df.tesla = df.tesla.shift(-5)
df.dropna(inplace=True)

In [None]:
y = df.tesla
x = df.drop(['date', 'tesla', 'elec_fee', 'tesla_volatility'], axis=1)

In [None]:
# 70 : 30 비율
x_train, x_test = x.iloc[:560], x.iloc[560:]
y_train, y_test = y.iloc[:560], y.iloc[560:]

In [None]:
scaled = StandardScaler()
scaled.fit(x_train)
x_train_scaled = scaled.transform(x_train)
x_test_scaled = scaled.transform(x_test)

In [None]:
# rid 계수 확인하기
max_score = -20
max_alpha = 0
train_score = 0
test_li = []
train_li = []
for i in np.arange(0.05, 100, 0.01):
    rid = Ridge(alpha=i)
    rid.fit(x_train_scaled, y_train)
    preds_rid = rid.predict(x_test_scaled)
    test_li.append(rid.score(x_test_scaled,y_test))
    train_li.append(rid.score(x_train_scaled,y_train))
    if max_score < rid.score(x_test_scaled,y_test):
        max_score = rid.score(x_test_scaled,y_test)
        max_alpha = i
        train_score = rid.score(x_train_scaled,y_train)
        
print('alpha =', max_alpha)
print('rid_train :', train_score)
print('rid_test :', max_score)

In [None]:
plt.figure(figsize=(7,5))
plt.plot(test_li, c='r')
plt.plot(train_li, c='b')

In [None]:
rid = Ridge(alpha=3.01)
rid.fit(x_train_scaled, y_train)
preds_rid = rid.predict(x_test_scaled)
rid.coef_
print(rid.score(x_train_scaled,y_train))
print(rid.score(x_test_scaled,y_test))

In [None]:
plt.figure(figsize=(15,5))
plt.plot(range(0,len(preds_rid)),preds_rid, c='r', label='Predict')
plt.plot(range(0,len(preds_rid)),y_test, c='b', label='Real')
plt.legend(loc='best', ncol=2) 
plt.show()

In [None]:
# las 계수 찾기
max_score = -99
max_alpha = 0
train_score = 0
test_li = []
train_li = []
for i in np.arange(0.05, 1, 0.01):
    las = Lasso(alpha=i)
    las.fit(x_train_scaled, y_train)
    preds_las = las.predict(x_test_scaled)
    test_li.append(las.score(x_test_scaled,y_test))
    train_li.append(las.score(x_train_scaled,y_train))
    if max_score < las.score(x_test_scaled,y_test):
        max_score = las.score(x_test_scaled,y_test)
        max_alpha = i
        train_score = las.score(x_train_scaled,y_train)
        
print('alpha =', max_alpha)
print('las_train :', train_score)
print('las_test :', max_score)

In [None]:
plt.figure(figsize=(7,5))
plt.plot(test_li, c='r')
plt.plot(train_li, c='b')

In [None]:
las = Lasso(alpha=0.18)
las.fit(x_train_scaled, y_train)
preds_las = las.predict(x_test_scaled)

In [None]:
plt.figure(figsize=(15,5))
plt.plot(range(0,len(preds_las)),preds_las, c='r', label='Predict')
plt.plot(range(0,len(preds_las)),y_test, c='b', label='Real')
plt.legend(loc='best', ncol=2) 
plt.show()

In [None]:
# elastic 계수 바꿔보기
max_score = 0
max_alpha = 0
train_score = 0
max_ratio = 0
test_li = []
train_li = []
for rat in np.arange(0.1,1,0.1):
    for alp in np.arange(0.01,10,0.01):
        ela = ElasticNet(l1_ratio=rat, alpha=alp)
        ela.fit(x_train_scaled, y_train)
        preds_ela = ela.predict(x_test_scaled)
        test_li.append(ela.score(x_test_scaled,y_test))
        train_li.append(ela.score(x_train_scaled,y_train))
        if max_score < ela.score(x_test_scaled,y_test):
            max_score = ela.score(x_test_scaled,y_test)
            max_alpha = alp
            max_ratio = rat
            train_score = ela.score(x_train_scaled,y_train)
print('alpha =', max_alpha)
print('ratio =', max_ratio)
print('ela_train :', train_score)
print('ela_test :', max_score)

In [None]:
plt.figure(figsize=(7,5))
plt.plot(test_li, c='r')
plt.plot(train_li, c='b')

In [None]:
ela = ElasticNet(l1_ratio=0.2, alpha=9.35)
ela.fit(x_train_scaled, y_train)
preds_ela = ela.predict(x_test_scaled)
print('='*50)
print('ela_train :', ela.score(x_train_scaled,y_train))
print('ela_test :', ela.score(x_test_scaled,y_test))

In [None]:
plt.figure(figsize=(15,5))
plt.plot(range(0,len(preds_ela)),preds_ela, c='r', label='Predict')
plt.plot(range(0,len(preds_ela)),y_test, c='b', label='Real')
plt.legend(loc='best', ncol=2) 
plt.show()

In [None]:
## 3개의 모델의 오차
print('rid RMSE :', mean_squared_error(y_test, preds_rid)**0.5)
print('las RMSE :', mean_squared_error(y_test, preds_las)**0.5)
print('ela RMSE :', mean_squared_error(y_test, preds_ela)**0.5)

In [None]:
# feature 조정
df = pd.read_csv('테슬라 총합 재무제표.csv', index_col=0)

In [None]:
# NaN값 채우기
df.interpolate(inplace=True)

In [None]:
# 5일 shift
df.tesla = df.tesla.shift(-5)
df.dropna(inplace=True)

In [None]:
# 재무제표 드랍
y = df.tesla
x = df.drop(['date','tesla', 'elec_fee', 'tesla_volatility','snp_500', 'snp_500_volatility',
            '매출액', '매출원가', '매출총이익', '영업이익', '순이익', '자산총계', '매출총이익률', '영업이익률',
       '주당순이익EPS(달러)', '주가수익배수PER(배)', '총자산이익률ROA(%)'], axis=1)

In [None]:
# 75 : 25 비율
x_train, x_test = x.iloc[:600], x.iloc[600:]
y_train, y_test = y.iloc[:600], y.iloc[600:]

In [None]:
scaled = StandardScaler()
scaled.fit(x_train)
x_train_scaled = scaled.transform(x_train)
x_test_scaled = scaled.transform(x_test)

In [None]:
# model 돌려보기
lr = LinearRegression()
lr.fit(x_train_scaled, y_train)
preds_lr = lr.predict(x_test_scaled)
print('lr_train :', lr.score(x_train_scaled,y_train))
print('lr_test :', lr.score(x_test_scaled,y_test))

rid = Ridge()
rid.fit(x_train_scaled, y_train)
preds_rid = rid.predict(x_test_scaled)
print('='*50)
print('rid_train :', rid.score(x_train_scaled,y_train))
print('rid_test :', rid.score(x_test_scaled,y_test))
las = Lasso()
las.fit(x_train_scaled, y_train)
preds_las = las.predict(x_test_scaled)
print('='*50)
print('las_train :', las.score(x_train_scaled,y_train))
print('las_test :', las.score(x_test_scaled,y_test))
xb = XGBRegressor()
xb.fit(x_train_scaled, y_train)
preds_xb = xb.predict(x_test_scaled)
print('='*50)
print('xgb_train :', xb.score(x_train_scaled,y_train))
print('xgb_test :', xb.score(x_test_scaled,y_test))
lgbm = LGBMRegressor()
lgbm.fit(x_train_scaled, y_train)
preds_lgbm = lgbm.predict(x_test_scaled)
print('='*50)
print('lgbm_train :', lgbm.score(x_train_scaled,y_train))
print('lgbm_test :', lgbm.score(x_test_scaled,y_test))
ela = ElasticNet()
ela.fit(x_train_scaled, y_train)
preds_ela = ela.predict(x_test_scaled)
print('='*50)
print('ela_train :', ela.score(x_train_scaled,y_train))
print('ela_test :', ela.score(x_test_scaled,y_test))

In [None]:
cat = CatBoost()
cat.fit(x_train_scaled, y_train)
preds_cat = cat.predict(x_test_scaled)
print('='*50)
print('cat_test :', r2_score(y_test,preds_cat))

In [None]:
# rid 계수 바꿔보기
for i in [0.0001,0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5, 10, 100]:
    rid = Ridge(alpha=i)
    rid.fit(x_train_scaled, y_train)
    preds_rid = rid.predict(x_test_scaled)
    print('='*50)
    print('alpha =', i)
    print('rid_train :', rid.score(x_train_scaled,y_train))
    print('rid_test :', rid.score(x_test_scaled,y_test))

In [None]:
# rid 계수 확인하기
max_score = -20
max_alpha = 0
train_score = 0
test_li = []
train_li = []
for i in np.arange(0.05, 10, 0.01):
    rid = Ridge(alpha=i)
    rid.fit(x_train_scaled, y_train)
    preds_rid = rid.predict(x_test_scaled)
    test_li.append(rid.score(x_test_scaled,y_test))
    train_li.append(rid.score(x_train_scaled,y_train))
    if max_score < rid.score(x_test_scaled,y_test):
        max_score = rid.score(x_test_scaled,y_test)
        max_alpha = i
        train_score = rid.score(x_train_scaled,y_train)
        
print('alpha =', max_alpha)
print('rid_train :', train_score)
print('rid_test :', max_score)

In [None]:
plt.figure(figsize=(7,5))
plt.plot(test_li, c='r')
plt.plot(train_li, c='b')

In [None]:
rid = Ridge(alpha=8.75)
rid.fit(x_train_scaled, y_train)
preds_rid = rid.predict(x_test_scaled)
rid.coef_
print(rid.score(x_train_scaled,y_train))
print(rid.score(x_test_scaled,y_test))

In [None]:
# etf종목 거래량이 갑자기 떨어진 날이 있음
plt.figure(figsize=(15,5))
plt.plot(range(0,len(preds_rid)),preds_rid, c='r', label='Predict')
plt.plot(range(0,len(preds_rid)),y_test, c='b', label='Real')
plt.legend(loc='best', ncol=2) 
plt.show()

In [None]:
# las 계수 바꿔보기
for i in [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5, 10, 50, 100, 500, 1000]:
    las = Lasso(alpha=i)
    las.fit(x_train_scaled, y_train)
    preds_las = las.predict(x_test_scaled)
    print('='*50)
    print('alpha =', i)
    print('las_train :', las.score(x_train_scaled,y_train))
    print('las_test :', las.score(x_test_scaled,y_test))

In [None]:
# las 계수 찾기
max_score = -99
max_alpha = 0
train_score = 0
test_li = []
train_li = []
for i in np.arange(0.05, 1, 0.01):
    las = Lasso(alpha=i)
    las.fit(x_train_scaled, y_train)
    preds_las = las.predict(x_test_scaled)
    test_li.append(las.score(x_test_scaled,y_test))
    train_li.append(las.score(x_train_scaled,y_train))
    if max_score < las.score(x_test_scaled,y_test):
        max_score = las.score(x_test_scaled,y_test)
        max_alpha = i
        train_score = las.score(x_train_scaled,y_train)
        
print('alpha =', max_alpha)
print('las_train :', train_score)
print('las_test :', max_score)

In [None]:
plt.figure(figsize=(7,5))
plt.plot(test_li, c='r')
plt.plot(train_li, c='b')

In [None]:
las = Lasso(alpha=0.31)
las.fit(x_train_scaled, y_train)
preds_las = las.predict(x_test_scaled)
las.coef_

In [None]:
plt.figure(figsize=(15,5))
plt.plot(range(0,len(preds_las)),preds_las, c='r', label='Predict')
plt.plot(range(0,len(preds_las)),y_test, c='b', label='Real')
plt.legend(loc='best', ncol=2) 
plt.show()

In [None]:
# elastic 계수 바꿔보기
max_score = 0
max_alpha = 0
train_score = 0
max_ratio = 0
test_li = []
train_li = []
for rat in np.arange(0.1,1,0.1):
    for alp in np.arange(0.01,10,0.01):
        ela = ElasticNet(l1_ratio=rat, alpha=alp)
        ela.fit(x_train_scaled, y_train)
        preds_ela = ela.predict(x_test_scaled)
        test_li.append(ela.score(x_test_scaled,y_test))
        train_li.append(ela.score(x_train_scaled,y_train))
        if max_score < ela.score(x_test_scaled,y_test):
            max_score = ela.score(x_test_scaled,y_test)
            max_alpha = alp
            max_ratio = rat
            train_score = ela.score(x_train_scaled,y_train)
print('alpha =', max_alpha)
print('ratio =', max_ratio)
print('ela_train :', train_score)
print('ela_test :', max_score)

In [None]:
plt.figure(figsize=(7,5))
plt.plot(test_li, c='r')
plt.plot(train_li, c='b')

In [None]:
ela = ElasticNet(l1_ratio=0.8, alpha=0.05)
ela.fit(x_train_scaled, y_train)
preds_ela = ela.predict(x_test_scaled)
print('='*50)
print('ela_train :', ela.score(x_train_scaled,y_train))
print('ela_test :', ela.score(x_test_scaled,y_test))

In [None]:
ela.coef_

In [None]:
coef_df = pd.DataFrame(ela.coef_, index = x_train.columns)
coef_df = coef_df.sort_values(0, ascending=False)
sns.set(rc = {'figure.figsize':(5,15)})
sns.barplot(x=coef_df[0].sort_values(ascending=False), y=coef_df.index)

In [None]:
# etf종목 거래량이 갑자기 떨어진 날이 있음
plt.figure(figsize=(15,5))
plt.plot(range(0,len(preds_ela)),preds_ela, c='r', label='Predict')
plt.plot(range(0,len(preds_ela)),y_test, c='b', label='Real')
plt.legend(loc='best', ncol=2) 
plt.show()

In [None]:
## 3개의 모델의 오차
print('rid RMSE :', mean_squared_error(y_test, preds_rid)**0.5)
print('las RMSE :', mean_squared_error(y_test, preds_las)**0.5)
print('ela RMAE :', mean_squared_error(y_test, preds_ela)**0.5)

In [None]:
# train, test 비율 바꿔보기

In [None]:
df = pd.read_csv('테슬라 총합 재무제표.csv', index_col=0)
# NaN값 채우기
df.interpolate(inplace=True)
# 5일 shift
df.tesla = df.tesla.shift(-5)
df.dropna(inplace=True)

In [None]:
# 재무제표 드랍
y = df.tesla
x = df.drop(['date','tesla', 'elec_fee', 'tesla_volatility','snp_500', 'snp_500_volatility',
            '매출액', '매출원가', '매출총이익', '영업이익', '순이익', '자산총계', '매출총이익률', '영업이익률',
       '주당순이익EPS(달러)', '주가수익배수PER(배)', '총자산이익률ROA(%)'], axis=1)

In [None]:
# 70 : 30 비율
x_train, x_test = x.iloc[:560], x.iloc[560:]
y_train, y_test = y.iloc[:560], y.iloc[560:]

In [None]:
scaled = StandardScaler()
scaled.fit(x_train)
x_train_scaled = scaled.transform(x_train)
x_test_scaled = scaled.transform(x_test)

In [None]:
# rid 계수 확인하기
max_score = -20
max_alpha = 0
train_score = 0
test_li = []
train_li = []
for i in np.arange(0.05, 1000, 1):
    rid = Ridge(alpha=i)
    rid.fit(x_train_scaled, y_train)
    preds_rid = rid.predict(x_test_scaled)
    test_li.append(rid.score(x_test_scaled,y_test))
    train_li.append(rid.score(x_train_scaled,y_train))
    if max_score < rid.score(x_test_scaled,y_test):
        max_score = rid.score(x_test_scaled,y_test)
        max_alpha = i
        train_score = rid.score(x_train_scaled,y_train)
        
print('alpha =', max_alpha)
print('rid_train :', train_score)
print('rid_test :', max_score)

In [None]:
plt.figure(figsize=(7,5))
plt.plot(test_li, c='r')
plt.plot(train_li, c='b')

In [None]:
rid = Ridge(alpha=34.05)
rid.fit(x_train_scaled, y_train)
preds_rid = rid.predict(x_test_scaled)
rid.coef_
print(rid.score(x_train_scaled,y_train))
print(rid.score(x_test_scaled,y_test))

In [None]:
plt.figure(figsize=(15,5))
plt.plot(range(0,len(preds_rid)),preds_rid, c='r', label='Predict')
plt.plot(range(0,len(preds_rid)),y_test, c='b', label='Real')
plt.legend(loc='best', ncol=2) 
plt.show()

In [None]:
# las 계수 찾기
max_score = -99
max_alpha = 0
train_score = 0
test_li = []
train_li = []
for i in np.arange(0.05, 100, 0.01):
    las = Lasso(alpha=i)
    las.fit(x_train_scaled, y_train)
    preds_las = las.predict(x_test_scaled)
    test_li.append(las.score(x_test_scaled,y_test))
    train_li.append(las.score(x_train_scaled,y_train))
    if max_score < las.score(x_test_scaled,y_test):
        max_score = las.score(x_test_scaled,y_test)
        max_alpha = i
        train_score = las.score(x_train_scaled,y_train)
        
print('alpha =', max_alpha)
print('las_train :', train_score)
print('las_test :', max_score)

In [None]:
plt.figure(figsize=(7,5))
plt.plot(test_li, c='r')
plt.plot(train_li, c='b')

In [None]:
las = Lasso(alpha=43.4)
las.fit(x_train_scaled, y_train)
preds_las = las.predict(x_test_scaled)
las.coef_

In [None]:
plt.figure(figsize=(15,5))
plt.plot(range(0,len(preds_las)),preds_las, c='r', label='Predict')
plt.plot(range(0,len(preds_las)),y_test, c='b', label='Real')
plt.legend(loc='best', ncol=2) 
plt.show()

In [None]:
# elastic 계수 바꿔보기
max_score = 0
max_alpha = 0
train_score = 0
max_ratio = 0
test_li = []
train_li = []
for rat in np.arange(0.1,1,0.1):
    for alp in np.arange(0.01,10,0.01):
        ela = ElasticNet(l1_ratio=rat, alpha=alp)
        ela.fit(x_train_scaled, y_train)
        preds_ela = ela.predict(x_test_scaled)
        test_li.append(ela.score(x_test_scaled,y_test))
        train_li.append(ela.score(x_train_scaled,y_train))
        if max_score < ela.score(x_test_scaled,y_test):
            max_score = ela.score(x_test_scaled,y_test)
            max_alpha = alp
            max_ratio = rat
            train_score = ela.score(x_train_scaled,y_train)
print('alpha =', max_alpha)
print('ratio =', max_ratio)
print('ela_train :', train_score)
print('ela_test :', max_score)

In [None]:
plt.figure(figsize=(7,5))
plt.plot(test_li, c='r')
plt.plot(train_li, c='b')

In [None]:
ela = ElasticNet(l1_ratio=0.9, alpha=0.61)
ela.fit(x_train_scaled, y_train)
preds_ela = ela.predict(x_test_scaled)
print('='*50)
print('ela_train :', ela.score(x_train_scaled,y_train))
print('ela_test :', ela.score(x_test_scaled,y_test))

In [None]:
plt.figure(figsize=(15,5))
plt.plot(range(0,len(preds_ela)),preds_ela, c='r', label='Predict')
plt.plot(range(0,len(preds_ela)),y_test, c='b', label='Real')
plt.legend(loc='best', ncol=2) 
plt.show()

In [None]:
## 3개의 모델의 오차
print('rid RMSE :', mean_squared_error(y_test, preds_rid)**0.5)
print('las RMSE :', mean_squared_error(y_test, preds_las)**0.5)
print('ela RMSE :', mean_squared_error(y_test, preds_ela)**0.5)

In [None]:
from itertools import combinations

In [None]:
# 10개의 무작위 column 조합
df = pd.read_csv('테슬라 총합 재무제표.csv', index_col=0)
# NaN값 채우기
df.interpolate(inplace=True)
# 5일 shift
df.tesla = df.tesla.shift(-5)
df.dropna(inplace=True)

In [None]:
cols = df.columns.to_list()
cols = cols[5:]

In [None]:
max_score = -99
max_alpha = 0
train_score = 0
max_col = []
for combi in combinations(cols, 10):
    col = list(combi)
    y = df.tesla
    x = df[col]
    # 70 : 30 비율
    x_train, x_test = x.iloc[:560], x.iloc[560:]
    y_train, y_test = y.iloc[:560], y.iloc[560:]
    scaled = StandardScaler()
    scaled.fit(x_train)
    x_train_scaled = scaled.transform(x_train)
    x_test_scaled = scaled.transform(x_test)
    for i in [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5, 10, 50, 100, 500, 1000]:
        rid = Ridge(alpha=i)
        rid.fit(x_train_scaled, y_train)
        preds_rid = rid.predict(x_test_scaled)
        if max_score < rid.score(x_test_scaled,y_test):
            max_score = rid.score(x_test_scaled,y_test)
            max_alpha = i
            max_col = col
            train_score = rid.score(x_train_scaled,y_train)

In [None]:
print(max_col)
print(max_alpha)
print(max_score)
print(train_score)

In [None]:
y = df.tesla
x = df[['oil_price', 'oil_price_volatility', 'carbon_credits', 'carbon_credits_volatility', 'EVE종가',
        'EVE거래량', 'EVE변동률', 'kars', 'li_price', '주가수익배수PER(배)']]
# 75 : 25 비율
x_train, x_test = x.iloc[:560], x.iloc[560:]
y_train, y_test = y.iloc[:560], y.iloc[560:]
scaled = StandardScaler()
scaled.fit(x_train)
x_train_scaled = scaled.transform(x_train)
x_test_scaled = scaled.transform(x_test)
rid = Ridge(alpha=10)
rid.fit(x_train_scaled, y_train)
preds_rid = rid.predict(x_test_scaled)
rid.coef_
print(rid.score(x_train_scaled,y_train))
print(rid.score(x_test_scaled,y_test))

In [None]:
plt.figure(figsize=(15,5))
plt.plot(range(0,len(preds_rid)),preds_rid, c='r', label='Predict')
plt.plot(range(0,len(preds_rid)),y_test, c='b', label='Real')
plt.legend(loc='best', ncol=2) 
plt.show()

In [None]:
print('rid RMSE :', mean_squared_error(y_test, preds_rid)**0.5)