In [2]:
import talib
import pandas as pd
import yfinance as yf
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest, f_regression, mutual_info_regression
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split


## Gather Data

In [17]:
ticker = yf.download("INTC","2010-01-01", "2024-12-30")

[*********************100%***********************]  1 of 1 completed


In [18]:
ticker.columns = [col[0] for col in ticker.columns]
ticker.index = pd.to_datetime(ticker.index)
ticker.head()
ticker.describe()


Unnamed: 0,Close,High,Low,Open,Volume
count,3772.0,3772.0,3772.0,3772.0,3772.0
mean,30.436122,30.779566,30.082522,30.428695,38150290.0
std,12.662469,12.831899,12.49397,12.66353,23233850.0
min,11.702464,11.868034,11.656105,11.841542,5893800.0
25%,18.600048,18.735297,18.443119,18.58014,22902980.0
50%,28.320572,28.528991,27.960949,28.288991,31766250.0
75%,41.767867,42.230448,41.181743,41.723774,46289480.0
max,62.083344,62.29252,60.600833,62.02876,300895900.0


In [19]:
# Price Trend Indicators
ticker['SMA_20'] = talib.SMA(ticker['Close'], timeperiod=20)
ticker['EMA_20'] = talib.EMA(ticker['Close'], timeperiod=20)

# MACD
ticker['MACD'], ticker['MACD_Signal'], ticker['MACD_Hist'] = talib.MACD(ticker['Close'])

# Bollinger Bands
ticker['BB_Upper'], ticker['BB_Middle'], ticker['BB_Lower'] = talib.BBANDS(ticker['Close'])

# Parabolic SAR
ticker['SAR'] = talib.SAR(ticker['High'], ticker['Low'])

# Momentum Indicators
ticker['RSI'] = talib.RSI(ticker['Close'])
ticker['STOCH_K'], ticker['STOCH_D'] = talib.STOCH(ticker['High'], ticker['Low'], ticker['Close'])
ticker['WILLR'] = talib.WILLR(ticker['High'], ticker['Low'], ticker['Close'])
ticker['ROC'] = talib.ROC(ticker['Close'])

# Volume Indicators
ticker['OBV'] = talib.OBV(ticker['Close'], ticker['Volume'])
ticker['AD'] = talib.AD(ticker['High'], ticker['Low'], ticker['Close'], ticker['Volume'])
ticker['MFI'] = talib.MFI(ticker['High'], ticker['Low'], ticker['Close'], ticker['Volume'])

# Volatility Indicators
ticker['ATR'] = talib.ATR(ticker['High'], ticker['Low'], ticker['Close'])
ticker['STDDEV'] = talib.STDDEV(ticker['Close'])

# Trend Strength Indicators
ticker['ADX'] = talib.ADX(ticker['High'], ticker['Low'], ticker['Close'])
ticker['PLUS_DI'] = talib.PLUS_DI(ticker['High'], ticker['Low'], ticker['Close'])
ticker['MINUS_DI'] = talib.MINUS_DI(ticker['High'], ticker['Low'], ticker['Close'])

# Ichimoku Cloud
ticker['ICHIMOKU_CONV'] = talib.HT_TRENDLINE(ticker['Close'])

# For Fibonacci and Pivot Points, you'll need custom calculations:
def calculate_pivot_points(df):
    pivot = (df['High'] + df['Low'] + df['Close']) / 3
    r1 = 2 * pivot - df['Low']
    s1 = 2 * pivot - df['High']
    return pivot, r1, s1

ticker['PIVOT'], ticker['R1'], ticker['S1'] = calculate_pivot_points(ticker)

In [20]:
ticker.tail()

Unnamed: 0_level_0,Close,High,Low,Open,Volume,SMA_20,EMA_20,MACD,MACD_Signal,MACD_Hist,...,MFI,ATR,STDDEV,ADX,PLUS_DI,MINUS_DI,ICHIMOKU_CONV,PIVOT,R1,S1
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2024-12-20,19.52,19.76,18.9,18.969999,108453700,21.628,21.195282,-1.084199,-0.867241,-0.216957,...,20.171163,0.897071,0.684398,16.62175,18.332285,32.600312,22.198348,19.393333,19.886667,19.026667
2024-12-23,20.200001,20.23,19.52,19.58,57988400,21.413,21.100493,-1.025782,-0.898949,-0.126832,...,26.713395,0.883708,0.52906,16.764969,21.079153,30.729447,22.028616,19.983334,20.446667,19.736668
2024-12-24,20.4,20.4,19.959999,20.219999,29884000,21.1895,21.033779,-0.952369,-0.909633,-0.042736,...,31.498856,0.852015,0.518058,16.66266,21.726792,29.595918,21.873951,20.253333,20.546666,20.106666
2024-12-26,20.440001,20.67,20.08,20.23,39846200,21.009,20.977229,-0.880808,-0.903868,0.02306,...,40.142496,0.8333,0.543824,16.194119,22.942376,28.099149,21.767907,20.396667,20.713334,20.123334
2024-12-27,20.299999,20.610001,20.09,20.209999,48260800,20.8415,20.912731,-0.825872,-0.888269,0.062397,...,41.550338,0.810921,0.336476,15.759046,21.891538,26.812114,21.653998,20.333333,20.576667,20.056666


In [23]:
ticker.columns

Index(['Close', 'High', 'Low', 'Open', 'Volume', 'SMA_20', 'EMA_20', 'MACD',
       'MACD_Signal', 'MACD_Hist', 'BB_Upper', 'BB_Middle', 'BB_Lower', 'SAR',
       'RSI', 'STOCH_K', 'STOCH_D', 'WILLR', 'ROC', 'OBV', 'AD', 'MFI', 'ATR',
       'STDDEV', 'ADX', 'PLUS_DI', 'MINUS_DI', 'ICHIMOKU_CONV', 'PIVOT', 'R1',
       'S1'],
      dtype='object')

In [22]:
ticker.to_csv("INTC_Stock.csv")

## Feature Selection

In [None]:

# Load and prepare data
df = pd.read_csv("INTC_Stock.csv", index_col='Date', parse_dates=True)
df['Target'] = df['Close'].shift(-1) / df['Close'] - 1
df = df.dropna()

X = df.drop(['Target', 'Close', 'High', 'Low', 'Open'], axis=1)
y = df['Target']

# Split and scale
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Adjust feature selection parameters
def correlation_selection(X, y, threshold=0.01):  # Lowered from 0.1
    correlations = pd.Series(abs(np.corrcoef(X, y, rowvar=False)[:-1, -1]), 
                           index=X.columns)
    selected = correlations[correlations > threshold].sort_values(ascending=False)
    print("\nFeature correlations with target:")
    print(selected)
    return selected.index.tolist()

def rf_importance_selection(X, y, threshold=0.005):  # Lowered from 0.01
    rf = RandomForestRegressor(n_estimators=100, random_state=42)
    rf.fit(X, y)
    importances = pd.Series(rf.feature_importances_, index=X.columns).sort_values(ascending=False)
    print("\nRandom Forest feature importances:")
    print(importances)
    return importances[importances > threshold].index.tolist()

def mutual_info_selection(X, y, k=15):  # Increased from 10
    selector = SelectKBest(score_func=mutual_info_regression, k=k)
    selector.fit(X, y)
    scores = pd.Series(selector.scores_, index=X.columns).sort_values(ascending=False)
    print("\nMutual Information scores:")
    print(scores)
    return X.columns[selector.get_support()].tolist()

# Apply updated selection methods
X_train_df = pd.DataFrame(X_train_scaled, columns=X.columns)
corr_features = correlation_selection(X_train_df, y_train)
mi_features = mutual_info_selection(X_train_df, y_train)
rf_features = rf_importance_selection(X_train_df, y_train)

# Find common features
common_features = list(set(corr_features) & set(mi_features) & set(rf_features))
print("\nCommon features:", common_features)
print("Correlation-based features:", corr_features)
print("\nMutual Information features:", mi_features)
print("\nRandom Forest important features:", rf_features)
print("\nCommon features:", list(set(corr_features) & set(mi_features) & set(rf_features)))