# Stock Price Predictor
Models to predict stock prices. Using data from Yahoo finance imported with the yfinance library. 

In [None]:
import warnings
warnings.filterwarnings("ignore")
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import yfinance as yf
%matplotlib inline

In [None]:
stocks = input("Enter the code of the stock: ")
data = yf.download(stocks, "2010-01-01","2022-04-30", auto_adjust = True)
data.head()

Gets data from January 1st 2010 to April 30 2022

In [None]:
data.shape


In [None]:
data.info()

In [None]:
data.describe().T

In [None]:
data.Close.plot(figsize = (10, 7), color='m' )
plt.ylabel("{} Prices".format(stocks))
plt.title("{} Price Series".format(stocks))
plt.show()
plt.savefig("plot1.png")

In [None]:
data.isnull().sum()

In [None]:
sns.distplot(data['Close'])

In [None]:
sns.distplot(data['Open'])

In [None]:
sns.distplot(data['High'])

We can see from the shape of the data and how the data is distributed that it is very non-linear

# Models to use:
Linear Regression, SVMs

In [None]:
X = data.drop("Close", axis = 1)
y = data.Close

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.2, random_state = 42)
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)
pred1 = linear_model.predict(X_test)

In [None]:
predictions = pd.DataFrame(pred1)
predictions.head()

In [None]:
y_test.head(5)

In [None]:
# Calculating the mean squared error and rmse and r2_score
from sklearn.metrics import r2_score, mean_squared_error
def calculate_metrics(y_test, pred1):
    """
    y_test = ground truth/ actual value
    y_pred = model predictions
    """
    mse = mean_squared_error(y_test, pred1)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, pred1)
    
    print("Mean Squared Error = ", mse)
    print("RMSE = ", rmse)
    print("R2_score = ", r2)

calculate_metrics(y_test, pred1)

In [None]:
from sklearn.linear_model import Lasso, Ridge

lasso_model = Lasso().fit(X_train, y_train)
ridge_model= Ridge().fit(X_train, y_train)

lasso_pred = lasso_model.predict(X_test)
ridge_pred = ridge_model.predict(X_test)

In [None]:
calculate_metrics(y_test, lasso_pred)

In [None]:
calculate_metrics(y_test, ridge_pred)

In [None]:
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV


svr = SVR()

params = {"C": [0.1, 1, 10, 100, 1000],
         "gamma": [1, 0.1, 0.01, 0.001, 0.0001],
          "kernel": ['rbf']
         }
grid = GridSearchCV(SVR(), params, refit = True, verbose = 3)
grid.fit(X_train, y_train)

In [None]:
svr = SVR(C = 1000, gamma = 0.0001, kernel = 'rbf')
svr.fit(X_train, y_train)
svr_pred = svr.predict(X_test)
df = pd.DataFrame(svr_pred)

In [None]:
import joblib
joblib.dump(linear_model, 'linear_model.pkl')

In [None]:
def predict_price(Open, High, Low, Volume):
    test_data = np.array([[Open, High, Low, Volume]])
    trained_model = joblib.load("linear_model.pkl")
    prediction = trained_model.predict(test_data)
    return prediction
    
predict_price(100,220,3400,30000)