In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score,mean_absolute_error,mean_absolute_percentage_error
import datetime

In [2]:
df = pd.read_excel("TCS_stock_history.xlsx")

In [3]:
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,2002-08-12,28.794172,29.742206,28.794172,29.51914,212976,0.0,0
1,2002-08-13,29.556316,30.030333,28.905705,29.119476,153576,0.0,0
2,2002-08-14,29.184536,29.184536,26.563503,27.111877,822776,0.0,0
3,2002-08-15,27.111877,27.111877,27.111877,27.111877,0,0.0,0
4,2002-08-16,26.972458,28.255089,26.58209,27.046812,811856,0.0,0


In [5]:
print("Descriptive Statistics:\n",df.describe())
print("\nMissing Values:\n",df.isnull().sum())

Descriptive Statistics:
                                 Date         Open  ...    Dividends  Stock Splits
count                           4463  4463.000000  ...  4463.000000   4463.000000
mean   2012-08-23 19:22:31.109119488   866.936239  ...     0.071533      0.001344
min              2002-08-12 00:00:00    24.146938  ...     0.000000      0.000000
25%              2008-02-14 12:00:00   188.951782  ...     0.000000      0.000000
50%              2012-09-04 00:00:00   530.907530  ...     0.000000      0.000000
75%              2017-03-22 12:00:00  1156.462421  ...     0.000000      0.000000
max              2021-09-30 00:00:00  3930.000000  ...    40.000000      2.000000
std                              NaN   829.905368  ...     0.965401      0.051842

[8 rows x 8 columns]

Missing Values:
 Date            0
Open            0
High            0
Low             0
Close           0
Volume          0
Dividends       0
Stock Splits    0
dtype: int64


In [6]:
df['dateOrdinal'] = df['Date'].apply(lambda x: x.toordinal())
X = df[['dateOrdinal']].values
y = df['Close'].values

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)


In [8]:
lin_model = LinearRegression()
lin_model.fit(X_train,y_train)
lin_preds =lin_model.predict(X_test)

In [10]:
poly_model = make_pipeline(PolynomialFeatures(degree=3),LinearRegression())
poly_model.fit(X_train,y_train)
poly_preds = poly_model.predict(X_test)

In [11]:
rbf_model = SVR(kernel='rbf',C=100,gamma=0.00001,epsilon=0.1)
rbf_model.fit(X_train,y_train)
rbf_preds = rbf_model.predict(X_test)

In [15]:
# Step 8: Evaluation metrics
def evaluate(name, y_true, y_pred):
    r2 = r2_score(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    mape = mean_absolute_percentage_error(y_true, y_pred)
    print(f"\n{name} Performance:")
    print(f"R² Score: {r2:.4f}")
    print(f"MAE: {mae:.2f}")
    print(f"MAPE: {mape:.2%}")
    return r2

# Call evaluate for each model and store the R² scores
r2_lin = evaluate("Linear Regression", y_test, lin_preds)
r2_poly = evaluate("Polynomial Regression", y_test, poly_preds)
r2_rbf = evaluate("RBF Regression", y_test, rbf_preds)


Linear Regression Performance:
R² Score: -2.1237
MAE: 918.92
MAPE: 37.90%

Polynomial Regression Performance:
R² Score: -0.2675
MAE: 529.55
MAPE: 20.83%

RBF Regression Performance:
R² Score: -7.7080
MAE: 1576.56
MAPE: 65.90%


In [16]:
r2_scores = {'Linear': r2_lin, 'Polynomial': r2_poly, 'RBF': r2_rbf}
best_model_name = max(r2_scores, key=r2_scores.get)
print(f"\n✅ Best model based on R²: {best_model_name}")


✅ Best model based on R²: Polynomial


In [17]:
def predict_for_date(date_str):
    date = datetime.datetime.strptime(date_str, "%Y-%m-%d").toordinal()
    if best_model_name == 'Linear':
        prediction = lin_model.predict([[date]])
    elif best_model_name == 'Polynomial':
        prediction = poly_model.predict([[date]])
    else:
        prediction = rbf_model.predict([[date]])
    print(f"Predicted Close Price on {date_str}: ₹{prediction[0]:.2f}")

In [18]:
predict_for_date("2025-04-15")
predict_for_date("2023-01-01")

Predicted Close Price on 2025-04-15: ₹2950.26
Predicted Close Price on 2023-01-01: ₹2386.87
