Stage 1: Data collection
- Create CV file where any stock entered data is automatically parsed and saved

In [None]:
import csv
import yfinance as yf

# Step 1: Create or locate the CSV file
csv_file = "StockData.csv"

try:
    # Check if the file already exists
    with open(csv_file, "r"):
        pass
except FileNotFoundError:
    # Create a new file with header if it doesn't exist
    with open(csv_file, "w", newline="") as file:
        writer = csv.writer(file)
        writer.writerow(["Ticker Symbol", "Average volume", "t-13 vol", "t-13 change", "t-12 vol", "t-12 change", "t-11 vol", "t-11 change", "t-10 vol", "t-10 change", "t-9 vol", "t-9 change", "t-8 vol", "t-8 change", "t-7 vol", "t-7 change","t-6 vol", "t-6 change","t-5 vol", "t-5 change","t-4 vol", "t-4 change", "t-3 vol",
                         "t-3 change", "t-2 volume",
                         "t-2 change", "t-1 volume",
                         "t-1 change", "Result change"])

# Step 2: Read the Russell 2000 stocks from the CSV file
russell2000_file = "SP500.csv"

with open(russell2000_file, "r") as file:
    reader = csv.reader(file)
    next(reader)  # Skip the header row

    # Step 3 and 4: Loop through the stocks and get information
    for row in reader:
        ticker = row[0]  # Ticker symbol from the CSV file

        try:
            stock = yf.Ticker(ticker)
            info = stock.history(period="14d")
            #print(info)

            # Step 5: Create a new row in the CSV file
            with open(csv_file, "a", newline="") as file:
                writer = csv.writer(file)
                avgVolume = info["Volume"].mean()

                # Step 6: Add information to the row
                row = [
                    ticker,
                    info["Volume"].mean(),  # Average volume

                    (info["Volume"].iloc[0]/avgVolume)*100,  # 13 days ago's volume
                    (info["Close"].iloc[0] - info["Open"].iloc[0])/(info["Open"].iloc[0])*100,  # 13 days ago's daily % change

                    (info["Volume"].iloc[1]/avgVolume)*100,  # 12 days ago's volume
                    (info["Close"].iloc[1] - info["Open"].iloc[1])/(info["Open"].iloc[1])*100,  # 12 days ago's daily % change

                    (info["Volume"].iloc[2]/avgVolume)*100,  # 11 day ago's volume
                    (info["Close"].iloc[2] - info["Open"].iloc[2])/(info["Open"].iloc[2])*100,  # 11 day ago's daily % change

                    (info["Volume"].iloc[3]/avgVolume)*100,  # 10 day ago's volume
                    (info["Close"].iloc[3] - info["Open"].iloc[3])/(info["Open"].iloc[3])*100,  # 10 day ago's daily % change

                    (info["Volume"].iloc[4]/avgVolume)*100,
                    (info["Close"].iloc[4] - info["Open"].iloc[4])/(info["Open"].iloc[4])*100, 

                    (info["Volume"].iloc[5]/avgVolume)*100, 
                    (info["Close"].iloc[5] - info["Open"].iloc[5])/(info["Open"].iloc[5])*100, 
                    
                    (info["Volume"].iloc[6]/avgVolume)*100,  
                    (info["Close"].iloc[6] - info["Open"].iloc[6])/(info["Open"].iloc[6])*100, 

                    (info["Volume"].iloc[7]/avgVolume)*100,  
                    (info["Close"].iloc[7] - info["Open"].iloc[7])/(info["Open"].iloc[7])*100, 

                    (info["Volume"].iloc[8]/avgVolume)*100,
                    (info["Close"].iloc[8] - info["Open"].iloc[8])/(info["Open"].iloc[8])*100, 

                    (info["Volume"].iloc[9]/avgVolume)*100,  
                    (info["Close"].iloc[9] - info["Open"].iloc[9])/(info["Open"].iloc[9])*100,

                    (info["Volume"].iloc[10]/avgVolume)*100,  
                    (info["Close"].iloc[10] - info["Open"].iloc[10])/(info["Open"].iloc[10])*100,

                    (info["Volume"].iloc[11]/avgVolume)*100,  
                    (info["Close"].iloc[11] - info["Open"].iloc[11])/(info["Open"].iloc[11])*100,

                    (info["Volume"].iloc[12]/avgVolume)*100,  
                    (info["Close"].iloc[12] - info["Open"].iloc[12])/(info["Open"].iloc[4])*100,    

                    (info["Close"].iloc[13] - info["Open"].iloc[13])/(info["Open"].iloc[13])*100,  # 0 days ago daily %
                ]
                writer.writerow(row)
                print(f"Data for {ticker} saved successfully.")

        except Exception as e:
            print(f"An error occurred for {ticker}:", str(e))

print("All data saved successfully.")


Ml part
- Create buckets of % change
- Create SVC model
- Test model

In [3]:
#Create buckets

import pandas as pd
df = pd.read_csv(csv_file)

def makeBins(s): #s is the result change
    if(s < -10):
        return -1
    elif(s < -5):
        return 0
    elif(s < -2):
        return .5
    elif(s < -1):
        return .75
    elif(s < 0):
        return 1
    elif(s < .5):
        return 1.125
    elif(s < 1):
        return 1.25
    elif (s < 2):
        return 1.5
    elif( s <= 5):
        return 2
    elif(s < 10):
        return 3
    elif(s > 10):
        return 4
    else:
        return 

    
df['BIN'] = df['Result change'].apply(makeBins, 0)
df

Unnamed: 0,Ticker Symbol,Average volume,t-13 vol,t-13 change,t-12 vol,t-12 change,t-11 vol,t-11 change,t-10 vol,t-10 change,...,t-4 vol,t-4 change,t-3 vol,t-3 change,t-2 volume,t-2 change,t-1 volume,t-1 change,Result change,BIN
0,spy,80109750.0,97.568024,-0.084786,82.931728,0.380728,79.341154,-0.329659,68.939548,0.465774,...,116.051292,1.329239,111.516513,1.079006,77.546491,0.00963,129.820028,-0.955714,-0.81194,1.0
1,spy,80149530.0,82.89057,0.380728,79.301778,-0.329659,68.905335,0.465774,94.62364,0.162937,...,111.461169,1.079006,77.508006,0.00963,129.7556,-0.947511,114.201298,-0.810305,-0.442334,1.0
2,spy,80435200.0,79.02013,-0.329659,68.660611,0.465774,94.287575,0.162937,91.324942,0.002428,...,77.232729,0.00963,129.294761,-0.947511,113.795701,-0.81194,117.985533,-0.436901,0.909736,1.25
3,spy,80794270.0,68.355469,0.465774,93.868542,0.162937,90.919075,0.002428,79.624586,0.155347,...,128.720149,-0.947511,113.28997,-0.81194,117.461181,-0.442334,108.725537,0.906036,-0.055697,1.0
4,spy,81350070.0,93.227212,0.162937,90.297896,0.002428,79.080573,0.155347,120.180232,-1.096011,...,112.515948,-0.81194,116.658661,-0.442334,107.9827,0.909736,61.520293,-0.056553,-0.048649,1.0
5,spy,81472530.0,90.162166,0.002428,78.961704,0.155347,119.999584,-1.096011,98.741253,-0.580256,...,116.483307,-0.442334,107.820387,0.909736,61.427819,-0.055697,60.413121,-0.049143,-0.248864,1.0
6,spy,81654700.0,78.785548,0.155347,119.731876,-1.096011,98.520971,-0.580256,113.855545,1.329239,...,107.57985,0.909736,61.290779,-0.055697,60.278345,-0.048649,117.743258,-0.25031,0.043693,1.125
7,spy,82057550.0,119.144068,-1.096011,98.037295,-0.580256,113.296586,1.329239,108.869449,1.079006,...,60.98988,-0.055697,59.982416,-0.048649,117.165212,-0.248864,85.497437,0.043323,-0.442653,1.0
8,spy,81683530.0,98.486194,-0.580256,113.815355,1.329239,109.367947,1.079006,76.052418,0.00963,...,60.257068,-0.048649,117.701696,-0.248864,85.888918,0.043693,86.23452,-0.441213,0.191647,1.125
9,spy,81713690.0,113.773348,1.329239,109.327581,1.079006,76.024349,0.00963,127.271821,-0.947511,...,117.658254,-0.248864,85.857218,0.043693,86.202692,-0.442653,66.438562,0.192048,-0.390906,1.0


In [4]:
X = df[["t-13 vol", "t-13 change", 
        "t-12 vol", "t-12 change",
         "t-11 vol", "t-11 change", 
         "t-10 vol", "t-10 change", 
         "t-9 vol", "t-9 change", 
         "t-8 vol", "t-8 change", 
         "t-7 vol", "t-7 change",
         "t-6 vol", "t-6 change",
         "t-5 vol", "t-5 change",
         "t-4 vol", "t-4 change", 
         "t-3 vol", "t-3 change", 
         "t-2 volume", "t-2 change", 
         "t-1 volume", "t-1 change", 
        ]].values

y = df[['BIN']].values.astype(int)

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [5]:
#SVC classifier

from sklearn.svm import SVC


# import metrics to compute accuracy
from sklearn.metrics import accuracy_score


# instantiate classifier with default hyperparameters
svc=SVC() 


# fit classifier to training set
svc.fit(X_train,y_train)


# make predictions on test set
y_pred=svc.predict(X_test)

# compute and print accuracy score
print('Model accuracy score with default hyperparameters: {0:0.4f}'. format(accuracy_score(y_test, y_pred)))

  return f(*args, **kwargs)


ValueError: The number of classes has to be greater than one; got 1 class

In [None]:
#run random stocks to test

#using data from VCIG
VCIG = [[.01, -6.47, .01 , 4.49, 0, .31, .03,6.34,.01,-1.85,.01, 6.64,.01,-1.55,0,-8.27, .01,3,.83,-4.71,10.22, 90.79, .37,-12.68,2.25, 70.74]]  # Replace with your own data
print("VCIG")
print(svc.predict(VCIG))

#using data from ZURA
ZURA = [[0.34336622417832047,-2.9895408728201183,5.661219564137148,0.13209711780605024,0.5709519415063534,-6.911145802676313,0.2998367288989849,5.303028934695922,0.3775537729821822,7.6119439313502495,0.5673741747710656,-3.042873991857099,0.26505288563924184,2.282451386419297,0.43837580748207566,7.816710481703206,0.31355150138425497,6.143788030489902,1.1320650244906652,17.181697941053734,1.3498118832966566,35.449740201954924,0.6421097465748563,-3.44827558882159,0.38500745368069855,2.985071865055365]]
print("ZURA ")
print(svc.predict(ZURA))

RCAT = [[ 0.033417711772235965,-4.5454561874557635,0.1080685679086018,-4.222451896873285,0.07215800263722726,-1.5053761786898527,0.04109839552633859,1.5283841089854,0.11777048422957352,-1.8478281784710746,0.09782765623646497,-6.559138606498534,0.053023667670866345,0.0,0.0680481630845934,1.2373428128632054,0.056527137453439465,-2.111110478271656,0.1478868494759233,7.222222148636239,10.695486876425692,31.578949019519264,1.2639440984821506,1.754384313814067,0.6328479166867186,-1.086955465418649]]
print("RCAT ")
print(svc.predict(RCAT))

MRKR = [[ 0.3657559330637997,-2.8248560935074982,0.27514186775930527,2.994016335956057,0.6325217107529416,14.705881940452297,0.619576844280871,5.050500185316258,1.4201279982595139,10.572687733626578,3.052196457385282,14.942533355537016,1.2866181989593344,-19.354836228734793,0.4444404155410918,1.2499987582366165,0.41423572710626033,3.3333300219643105,1.816342440669362,21.705434021629927,0.9513207752415831,10.862616668946751,1.5247022136809472,18.309855371258365,0.6708486683467194,-5.286359621345079]]
print("MRKR " )
print(svc.predict(MRKR))

#AMD,79791471.42857143,65.92546679263788,1.1493301392699915,94.31772425373782,5.360019929212473,102.46107577197205,-4.998788203872088,81.77177188467869,2.324598418477503,94.05967662494541,1.0924969470382213,107.02234019640122,0.5213181969126954,166.01009810751347,-5.887249179055976,120.17336976400807,0.6322635418287151,107.26346872374293,-0.917139749163955,102.57600033515935,-4.562071115634929,73.75700553746972,-0.13434884342501077,103.81510519473707,-5.384418405301338,88.67463994988366,-1.2462579261395332,0.640384771083261
AMD = [[65.92546679263788,1.1493301392699915,94.31772425373782,5.360019929212473,102.46107577197205,-4.998788203872088,81.77177188467869,2.324598418477503,94.05967662494541,1.0924969470382213,107.02234019640122,0.5213181969126954,166.01009810751347,-5.887249179055976,120.17336976400807,0.6322635418287151,107.26346872374293,-0.917139749163955,102.57600033515935,-4.562071115634929,73.75700553746972,-0.13434884342501077,103.81510519473707,-5.384418405301338,88.67463994988366,-1.2462579261395332]]
print("AMD " )
print(svc.predict(AMD))

#KR,5807589.0,103.52660975148207,2.257796483087248,161.8847339231478,-2.6961773280034014,238.9580254387836,5.269555058938761,161.98460324930016,-2.451265053272869,120.30982219988364,-1.790335225150995,102.24380547590404,-1.131315905638422,108.57862014684578,-2.5839276059736846,239.83790863988483,-1.0050242024818261,103.26660512649914,1.2158696284925903,78.63504115046709,-0.42444984608036573,107.16495261630945,-3.721820465680269,89.74292085751937,0.8645519493939985,102.72421137239567,-0.3182809881465182,0.4464302820210788
#should be .44
kr =[[103.52660975148207,2.257796483087248,161.8847339231478,-2.6961773280034014,238.9580254387836,5.269555058938761,161.98460324930016,-2.451265053272869,120.30982219988364,-1.790335225150995,102.24380547590404,-1.131315905638422,108.57862014684578,-2.5839276059736846,239.83790863988483,-1.0050242024818261,103.26660512649914,1.2158696284925903,78.63504115046709,-0.42444984608036573,107.16495261630945,-3.721820465680269,89.74292085751937,0.8645519493939985,102.72421137239567,-0.3182809881465182]]
print("KR ")
print(svc.predict(kr))

Things to improve
 - Add cross train
 - Add generally 3 month trend
 - Output as a percentage of average daily range
 - Use independent model

In [None]:
#LINEAR MODEL

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

X = df[["t-13 vol", "t-13 change", 
        "t-12 vol", "t-12 change",
        "t-11 vol", "t-11 change", 
        "t-10 vol", "t-10 change", 
        "t-9 vol", "t-9 change", 
        "t-8 vol", "t-8 change", 
        "t-7 vol", "t-7 change",
        "t-6 vol", "t-6 change",
        "t-5 vol", "t-5 change",
        "t-4 vol", "t-4 change", 
        "t-3 vol", "t-3 change", 
        "t-2 volume", "t-2 change", 
        "t-1 volume", "t-1 change"]].values

y = df['Result change'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

regressor = LinearRegression()
regressor.fit(X_train, y_train)

y_pred = regressor.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
print('Mean Squared Error:', mse)



In [None]:


#using data from VCIG
VCIG = [[.01, -6.47, .01 , 4.49, 0, .31, .03,6.34,.01,-1.85,.01, 6.64,.01,-1.55,0,-8.27, .01,3,.83,-4.71,10.22, 90.79, .37,-12.68,2.25, 70.74]]  # Replace with your own data
print("VCIG")
print(regressor.predict(scaler.transform(VCIG)))

#using data from ZURA
ZURA = [[0.34336622417832047,-2.9895408728201183,5.661219564137148,0.13209711780605024,0.5709519415063534,-6.911145802676313,0.2998367288989849,5.303028934695922,0.3775537729821822,7.6119439313502495,0.5673741747710656,-3.042873991857099,0.26505288563924184,2.282451386419297,0.43837580748207566,7.816710481703206,0.31355150138425497,6.143788030489902,1.1320650244906652,17.181697941053734,1.3498118832966566,35.449740201954924,0.6421097465748563,-3.44827558882159,0.38500745368069855,2.985071865055365]]
print("ZURA ")
print(regressor.predict(scaler.transform(ZURA)))

RCAT = [[ 0.033417711772235965,-4.5454561874557635,0.1080685679086018,-4.222451896873285,0.07215800263722726,-1.5053761786898527,0.04109839552633859,1.5283841089854,0.11777048422957352,-1.8478281784710746,0.09782765623646497,-6.559138606498534,0.053023667670866345,0.0,0.0680481630845934,1.2373428128632054,0.056527137453439465,-2.111110478271656,0.1478868494759233,7.222222148636239,10.695486876425692,31.578949019519264,1.2639440984821506,1.754384313814067,0.6328479166867186,-1.086955465418649]]
print("RCAT ")
print(regressor.predict(scaler.transform(RCAT)))

MRKR = [[ 0.3657559330637997,-2.8248560935074982,0.27514186775930527,2.994016335956057,0.6325217107529416,14.705881940452297,0.619576844280871,5.050500185316258,1.4201279982595139,10.572687733626578,3.052196457385282,14.942533355537016,1.2866181989593344,-19.354836228734793,0.4444404155410918,1.2499987582366165,0.41423572710626033,3.3333300219643105,1.816342440669362,21.705434021629927,0.9513207752415831,10.862616668946751,1.5247022136809472,18.309855371258365,0.6708486683467194,-5.286359621345079]]
print("MRKR " )
print(regressor.predict(scaler.transform(MRKR)))

#AMD,79791471.42857143,65.92546679263788,1.1493301392699915,94.31772425373782,5.360019929212473,102.46107577197205,-4.998788203872088,81.77177188467869,2.324598418477503,94.05967662494541,1.0924969470382213,107.02234019640122,0.5213181969126954,166.01009810751347,-5.887249179055976,120.17336976400807,0.6322635418287151,107.26346872374293,-0.917139749163955,102.57600033515935,-4.562071115634929,73.75700553746972,-0.13434884342501077,103.81510519473707,-5.384418405301338,88.67463994988366,-1.2462579261395332,0.640384771083261
AMD = [[65.92546679263788,1.1493301392699915,94.31772425373782,5.360019929212473,102.46107577197205,-4.998788203872088,81.77177188467869,2.324598418477503,94.05967662494541,1.0924969470382213,107.02234019640122,0.5213181969126954,166.01009810751347,-5.887249179055976,120.17336976400807,0.6322635418287151,107.26346872374293,-0.917139749163955,102.57600033515935,-4.562071115634929,73.75700553746972,-0.13434884342501077,103.81510519473707,-5.384418405301338,88.67463994988366,-1.2462579261395332]]
print("AMD " )
print(regressor.predict(scaler.transform(AMD)))

#KR,5807589.0,103.52660975148207,2.257796483087248,161.8847339231478,-2.6961773280034014,238.9580254387836,5.269555058938761,161.98460324930016,-2.451265053272869,120.30982219988364,-1.790335225150995,102.24380547590404,-1.131315905638422,108.57862014684578,-2.5839276059736846,239.83790863988483,-1.0050242024818261,103.26660512649914,1.2158696284925903,78.63504115046709,-0.42444984608036573,107.16495261630945,-3.721820465680269,89.74292085751937,0.8645519493939985,102.72421137239567,-0.3182809881465182,0.4464302820210788
#should be .44
KR =[[103.52660975148207,2.257796483087248,161.8847339231478,-2.6961773280034014,238.9580254387836,5.269555058938761,161.98460324930016,-2.451265053272869,120.30982219988364,-1.790335225150995,102.24380547590404,-1.131315905638422,108.57862014684578,-2.5839276059736846,239.83790863988483,-1.0050242024818261,103.26660512649914,1.2158696284925903,78.63504115046709,-0.42444984608036573,107.16495261630945,-3.721820465680269,89.74292085751937,0.8645519493939985,102.72421137239567,-0.3182809881465182]]
print("KR ")
print(regressor.predict(scaler.transform(KR)))
