In [None]:
#importing the required libraries
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

In [None]:
#loading the dataset
df = pd.read_csv('/content/quantvision_financial_dataset_200.csv')
df.head()

Unnamed: 0,lookback_days,asset_type,market_regime,high_volatility,trend_continuation,technical_score,edge_density,slope_strength,candlestick_variance,pattern_symmetry,future_trend
0,48,equity,bullish,0,1,59.99,0.504,0.298,1.572,0.768,1
1,38,index,bullish,1,1,78.54,0.559,0.037,0.692,0.538,1
2,24,equity,bullish,1,0,56.03,0.617,0.212,1.419,0.301,1
3,52,equity,bullish,0,0,66.51,0.36,0.347,0.699,0.498,1
4,17,equity,bullish,1,1,61.21,0.492,0.144,2.52,0.828,1


In [None]:
df.info()
df.isnull().sum()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 11 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   lookback_days         200 non-null    int64  
 1   asset_type            200 non-null    object 
 2   market_regime         200 non-null    object 
 3   high_volatility       200 non-null    int64  
 4   trend_continuation    200 non-null    int64  
 5   technical_score       200 non-null    float64
 6   edge_density          200 non-null    float64
 7   slope_strength        200 non-null    float64
 8   candlestick_variance  200 non-null    float64
 9   pattern_symmetry      200 non-null    float64
 10  future_trend          200 non-null    int64  
dtypes: float64(5), int64(4), object(2)
memory usage: 17.3+ KB


Unnamed: 0,0
lookback_days,0
asset_type,0
market_regime,0
high_volatility,0
trend_continuation,0
technical_score,0
edge_density,0
slope_strength,0
candlestick_variance,0
pattern_symmetry,0


In [None]:
df.columns = df.columns.str.strip().str.lower()
df.columns

Index(['lookback_days', 'asset_type', 'market_regime', 'high_volatility',
       'trend_continuation', 'technical_score', 'edge_density',
       'slope_strength', 'candlestick_variance', 'pattern_symmetry',
       'future_trend'],
      dtype='object')

In [None]:
#splitting features and targets(x=feature, y=target)
x = df.drop("future_trend", axis = 1)
y = df["future_trend"]

In [None]:
#encoding categorical variables because ML models cannot understand text, therefore all the columns need to be converted into numeric form
x_encoded = pd.get_dummies(x, columns=["asset_type", "market_regime"], drop_first=True)

In [None]:
#splitting data into testing and training sets
#I have used 80% training and 20%testing model
x_train, x_test, y_train, y_test = train_test_split(
    x_encoded, y, test_size=0.2, random_state=42
)


In [None]:
#scaling numerical features using standardscaler
#Logistic regression works better with scaled data, NN requires scaled dataset
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)


In [None]:
#Training logistic data
log_model = LogisticRegression(max_iter=2000,C=0.1,solver='liblinear',class_weight='balanced')
log_model.fit(x_train, y_train)
y_pred_log = log_model.predict(x_test)



In [None]:
#Training neural network
mlp = MLPClassifier(
    hidden_layer_sizes=(128,64,32), #3 layers
    activation = 'tanh',
    solver='adam',
    max_iter=1000,
    learning_rate_init=0.001,
    alpha=0.01,
    random_state=42,
)
mlp.fit(x_train, y_train)
y_pred_mlp = mlp.predict(x_test)

In [None]:
#Evaluating metrics
def get_metrics(y_true,y_pred):
  return{
      "Accuracy":accuracy_score(y_true,y_pred),
      "Precision":precision_score(y_true, y_pred),
      "recall":recall_score(y_true,y_pred),
      "f1 score":f1_score(y_true,y_pred),
      "Confusion matrix":confusion_matrix(y_true,y_pred)
  }


In [None]:
log_metrics=get_metrics(y_test,y_pred_log)
nn_metrics=get_metrics(y_test,y_pred_mlp)

In [None]:
#comparing logmodel and nn model
comparision_table = pd.DataFrame(
    [log_metrics,nn_metrics],
    index = ["Logistic Regression", "Nueral Network"]
)
comparision_table

Unnamed: 0,Accuracy,Precision,recall,f1 score,Confusion matrix
Logistic Regression,0.775,1.0,0.763158,0.865672,"[[2, 0], [9, 29]]"
Nueral Network,0.95,0.95,1.0,0.974359,"[[0, 2], [0, 38]]"


# ANALYSIS AND FINANCIAL INTERPRETATION

Ques1. Why Logistic regression performs resonably good or bad ?   
Ans. The logistic regression attains an accuracy of 77.5% with a very high precision(1) but lower recall(=0.76).   
Logistic regression is a linear model, so it works wellwhen the relationship between the input and the target is approximately linear.100% precision means that whenever it predicts a positive movement it is always correct.       
Logistic regression is stable, but it fails to fully capture market complexity, leading to missed oppurtunities.


Ques2. Why nueral network performs better or worse ?    
Ans. The nueral network achieves 95% accuracy and precision, perfect recall and 97% f1 score. this is because nn can model non linear relationships, which are very common in financial markets.         
Nueral Network outperforms logistic regression.


Ques3.Effect of volatility on predictions ?      
Ans. Volatility increases uncertainl, ehuch affects both models differently.   
     High volatility increases prediction risk, making model outputs less reliable for trading decisions.


Ques4. Role of trend continuation ???      
Ans. Both models benefit from trend continuation, but to different extents :    


*   Logistic Regression : Works mainly in strong, consistent trends.
        
    




*   Nueral Networks: Performs better during sustained bearish or bullish phases.





Ques5. Situation where model fails and why ?    
Ans. Logistic Regresseion: Market behavior is nonlinear, there are sudden reversals or shocks.     
Nueral Network: fails when model is overfitted or when market conditions change significantly from the training data.