In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler ,LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix

In [5]:
df = pd.read_csv('/content/nifty_500.csv')

In [6]:
percentage_columns = ["Change", "Percentage Change", "365 Day Percentage Change", "30 Day Percentage Change"]
for col in percentage_columns:
    df[col] = pd.to_numeric(df[col].astype(str).str.replace('%', '', regex=True), errors='coerce')

In [7]:
df.head()

Unnamed: 0,Company Name,Symbol,Industry,Series,Open,High,Low,Previous Close,Last Traded Price,Change,Percentage Change,Share Volume,Value (Indian Rupee),52 Week High,52 Week Low,365 Day Percentage Change,30 Day Percentage Change
0,3M India Ltd.,3MINDIA,Diversified,EQ,21950.0,21999.0,21126.05,21854.05,21575.0,-279.05,-1.28,4159,89653570.0,27800.0,17273.0,-10.18,8.22
1,Aarti Drugs Ltd.,AARTIDRUGS,Healthcare,EQ,400.5,401.8,394.1,403.85,400.0,-3.85,-0.95,31782,12627310.0,749.75,378.0,-42.92,-5.1
2,Aavas Financiers Ltd.,AAVAS,Financial Services,EQ,1997.1,2004.05,1894.5,2015.45,1943.15,-72.3,-3.59,150704,290772800.0,3340.0,1815.0,-25.69,-9.39
3,ABB India Ltd.,ABB,Capital Goods,EQ,2260.35,2311.5,2260.35,2300.9,2280.0,-20.9,-0.91,97053,221093500.0,2487.85,1618.05,27.25,-5.21
4,Abbott India Ltd.,ABBOTINDIA,Healthcare,EQ,18700.4,19200.0,18605.0,18760.4,19199.8,439.4,2.34,12396,234677000.0,23934.45,15514.0,11.61,5.84


In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 501 entries, 0 to 500
Data columns (total 17 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   Company Name               501 non-null    object 
 1   Symbol                     501 non-null    object 
 2   Industry                   501 non-null    object 
 3   Series                     501 non-null    object 
 4   Open                       501 non-null    float64
 5   High                       501 non-null    float64
 6   Low                        501 non-null    float64
 7   Previous Close             501 non-null    float64
 8   Last Traded Price          501 non-null    float64
 9   Change                     496 non-null    float64
 10  Percentage Change          498 non-null    float64
 11  Share Volume               501 non-null    int64  
 12  Value (Indian Rupee)       501 non-null    float64
 13  52 Week High               501 non-null    float64

In [9]:
df["Percentage Change"] = (df["Percentage Change"] > 0).astype(int)

In [10]:
df["Percentage Change"].value_counts()

Unnamed: 0_level_0,count
Percentage Change,Unnamed: 1_level_1
1,291
0,210


In [11]:
for col in df.columns:
    if df[col].dtype == "object":  # Categorical columns
        df[col] = df[col].fillna(df[col].mode()[0])  # Fill with most frequent value
    else:  # Numerical columns
        df[col] = df[col].fillna(df[col].median())

In [12]:
le_industry = LabelEncoder()
df["Industry"] = le_industry.fit_transform(df["Industry"])

if "Series" in df.columns:
    le_series = LabelEncoder()
    df["Series"] = le_series.fit_transform(df["Series"])

In [13]:
if "Value (Indian Rupee)" in df.columns:
    df["Value (Indian Rupee)"] = df["Value (Indian Rupee)"].astype(float)

In [14]:
df.to_csv("clean_nifty_500.csv", index=False)

In [15]:
from sklearn.metrics import classification_report,confusion_matrix

In [16]:
X = df.select_dtypes(include=["number"]).drop(columns=["Percentage Change"])
y = df["Percentage Change"]

In [17]:
X = X.fillna(X.mean())

In [18]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [19]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [20]:
svc = SVC(kernel="rbf", random_state=42)
svc.fit(X_train_scaled, y_train)

In [21]:
cv_scores = cross_val_score(svc, X_train_scaled, y_train, cv=5)
print("Cross-Validation Scores:", cv_scores)
print("Mean CV Accuracy:", np.mean(cv_scores))

Cross-Validation Scores: [0.8    0.775  0.7375 0.725  0.7875]
Mean CV Accuracy: 0.765


In [22]:
y_pred = svc.predict(X_test_scaled)

In [23]:
print("Classification Report:")
print(classification_report(y_test, y_pred))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

Classification Report:
              precision    recall  f1-score   support

           0       0.88      0.52      0.66        42
           1       0.74      0.95      0.83        59

    accuracy                           0.77       101
   macro avg       0.81      0.74      0.74       101
weighted avg       0.80      0.77      0.76       101

Confusion Matrix:
[[22 20]
 [ 3 56]]


In [24]:
from sklearn.model_selection import GridSearchCV

In [25]:
# defining paramter range
param_grid = {'C' : [0.1 , 1 , 10 , 100 , 1000],
              'gamma' : [1 , 0.1 , 0.01 , 0.001 , 0.0001],
              'kernel': ['rbf']}

In [26]:
grid = GridSearchCV(SVC(),param_grid=param_grid,refit = True,cv=5,verbose=3)

In [27]:
grid.fit(X_train,y_train)

Fitting 5 folds for each of 25 candidates, totalling 125 fits
[CV 1/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.588 total time=   0.0s
[CV 2/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.588 total time=   0.1s
[CV 3/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.575 total time=   0.0s
[CV 4/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.575 total time=   0.0s
[CV 5/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.575 total time=   0.0s
[CV 1/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.588 total time=   0.0s
[CV 2/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.588 total time=   0.0s
[CV 3/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.575 total time=   0.0s
[CV 4/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.575 total time=   0.0s
[CV 5/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.575 total time=   0.0s
[CV 1/5] END .....C=0.1, gamma=0.01, kernel=rbf;, score=0.588 total time=   0.0s
[CV 2/5] END .....C=0.1, gamma=0.01, kernel=rbf

In [28]:
y_pred2 = grid.predict(X_test)
print(classification_report(y_test,y_pred2))
print(confusion_matrix(y_test,y_pred2))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00        42
           1       0.58      1.00      0.74        59

    accuracy                           0.58       101
   macro avg       0.29      0.50      0.37       101
weighted avg       0.34      0.58      0.43       101

[[ 0 42]
 [ 0 59]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
