In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, RidgeCV, LassoCV, SGDRegressor
from sklearn.metrics import mean_squared_error, r2_score 
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.decomposition import PCA

In [2]:
df = pd.read_csv('D:\DATA FOR MACHINE LEARNING PROJECT\jupyter_notebook_program\stock prediction models\Wipro3.csv')
df.drop(['Unnamed: 0', 'Date'], axis = 1, inplace=True)
df.dropna(inplace=True)
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 5314 entries, 0 to 5313
Data columns (total 12 columns):
Open                    5314 non-null float64
High                    5314 non-null float64
Low                     5314 non-null float64
Last                    5314 non-null float64
Close                   5314 non-null float64
Total Trade Quantity    5314 non-null float64
Turnover (Lacs)         5314 non-null float64
open_next               5314 non-null float64
prev_close              5314 non-null float64
Day                     5314 non-null int64
Month                   5314 non-null int64
Year                    5314 non-null int64
dtypes: float64(9), int64(3)
memory usage: 539.7 KB


In [3]:
df.sample(3)

Unnamed: 0,Open,High,Low,Last,Close,Total Trade Quantity,Turnover (Lacs),open_next,prev_close,Day,Month,Year
193,1870.0,1894.0,1850.0,1850.0,1850.0,1948.0,36.6,1825.0,1844.05,30,11,0
4352,557.45,557.45,546.6,549.5,551.35,2410790.0,13315.93,554.3,550.95,26,7,17
4670,464.15,468.35,463.25,466.15,466.3,1032733.0,4821.27,460.6,463.75,13,11,18


In [4]:
input_cols = [col for col in list(df.columns.values) if col != 'open_next']

In [5]:
X = df[input_cols].values
y = df['open_next'].values

In [6]:
X.shape, y.shape

((5314, 11), (5314,))

In [7]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [8]:
X_scaled.shape

(5314, 11)

In [9]:
pca = PCA()
pca.fit(X_scaled)

PCA(copy=True, iterated_power='auto', n_components=None, random_state=None,
    svd_solver='auto', tol=0.0, whiten=False)

In [10]:
X_pc = pca.transform(X_scaled)

In [11]:
X_pc.shape

(5314, 11)

In [12]:
X_train = X_pc[0:5100]

In [13]:
X_test = X_pc[5100:]

In [14]:
X_train.shape, X_test.shape

((5100, 11), (214, 11))

In [15]:
y_train = y[0:5100]

In [16]:
y_test = y[5100:]

In [17]:
y_train.shape, y_test.shape

((5100,), (214,))

---

In [18]:
mse = {}
r2 = {}

algo_names = ['LinearRegression', 'RidgeCV', 'LassoCV', 'SGDRegressor', 'SVR']
algos = 0,10.0)), [LinearRegression(), 
         RidgeCV(alphas=(0.1,1.
         LassoCV(alphas=(0.1,1.0,10.0)), 
         SGDRegressor(random_state=123, alpha=0.001, penalty='l2'),
         SVR(kernel='linear')]

for algo_names, algo in zip(algo_names,  algos):
    al = algo
    al.fit(X_train, y_train)
    y_pred = al.predict(X_test)
    
    mse[algo_names] = [mean_squared_error(y_test, y_pred)]
    r2[algo_names] = [r2_score(y_test, y_pred)]
    
algo_test = pd.DataFrame.from_dict(mse, orient = 'index',columns = ['Mean Squared Error'])
algo_r2 = pd.DataFrame.from_dict(r2, orient = 'index', columns = ['R2 Score'])
algo_test = pd.DataFrame([algo_test['Mean Squared Error'], algo_r2['R2 Score']]).T.reset_index()
algo_test



Unnamed: 0,index,Mean Squared Error,R2 Score
0,LinearRegression,103.823345,0.906519
1,RidgeCV,91.436221,0.917673
2,LassoCV,74.517051,0.932906
3,SGDRegressor,87.394404,0.921312
4,SVR,60.551349,0.945481
