In [10]:
import pandas as pd
df=pd.read_csv('cleaned_nifty.csv')

In [11]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor



# Feature Engineering
# You can add additional features or apply transformations to existing ones here

# Split the data into features and target variable
X = df.drop(columns=['Year', 'Annual'])
y = df['Annual']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model Selection and Hyperparameter Tuning
models = {
    'Linear Regression': LinearRegression(),
    'Ridge Regression': Ridge(),
    'Lasso Regression': Lasso(),
    'ElasticNet Regression': ElasticNet(),
    'Decision Tree Regressor': DecisionTreeRegressor(),
    'Random Forest Regressor': RandomForestRegressor()
}

param_grids = {
    'Linear Regression': {},
    'Ridge Regression': {'model__alpha': [0.1, 1.0, 10.0]},  # Specify alpha for Ridge model
    'Lasso Regression': {'model__alpha': [0.1, 1.0, 10.0]},  # Specify alpha for Lasso model
    'ElasticNet Regression': {'model__alpha': [0.1, 1.0, 10.0], 'model__l1_ratio': [0.1, 0.5, 0.9]},  # Specify alpha and l1_ratio for ElasticNet model
    'Decision Tree Regressor': {'model__max_depth': [None, 10, 20]},  # Specify max_depth for Decision Tree model
    'Random Forest Regressor': {'model__n_estimators': [100, 200, 300], 'model__max_depth': [None, 10, 20]}  # Specify n_estimators and max_depth for Random Forest model
}

best_models = {}

for model_name, model in models.items():
    pipeline = Pipeline([
        ('scaler', StandardScaler()),  # Scale features
        ('model', model)               # Model
    ])
    
    # Grid search for hyperparameter tuning
    grid_search = GridSearchCV(pipeline, param_grid=param_grids[model_name], cv=5, scoring='neg_mean_squared_error', verbose=0)
    grid_search.fit(X_train, y_train)
    
    # Best model from grid search
    best_models[model_name] = grid_search.best_estimator_

# Evaluate models
results = {}
for model_name, model in best_models.items():
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mdape = mean_absolute_percentage_error(y_test, y_pred) * 100
    accuracy = 100 - mdape
    
    results[model_name] = {'MSE': mse, 'MDAPE': mdape, 'Accuracy': accuracy}

# Print results
for model_name, metrics in results.items():
    print(f"Model: {model_name}")
    print("Split Size= 80:20")
    print(f"MSE: {metrics['MSE']}")
    print(f"MDAPE: {metrics['MDAPE']}")
    print(f"Accuracy: {metrics['Accuracy']}")
    print()


Model: Linear Regression
Split Size= 80:20
MSE: 4.804405282375961
MDAPE: 17.089167355908465
Accuracy: 82.91083264409153

Model: Ridge Regression
Split Size= 80:20
MSE: 2.296241820092828
MDAPE: 13.029696799178863
Accuracy: 86.97030320082114

Model: Lasso Regression
Split Size= 80:20
MSE: 2.0963941157179393
MDAPE: 10.26447416333147
Accuracy: 89.73552583666853

Model: ElasticNet Regression
Split Size= 80:20
MSE: 21.31685907760745
MDAPE: 27.68760227303201
Accuracy: 72.31239772696799

Model: Decision Tree Regressor
Split Size= 80:20
MSE: 919.1478199999999
MDAPE: 264.33959827160606
Accuracy: -164.33959827160606

Model: Random Forest Regressor
Split Size= 80:20
MSE: 454.52518987599996
MDAPE: 191.80476324074155
Accuracy: -91.80476324074155



In [12]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor



# Feature Engineering
# You can add additional features or apply transformations to existing ones here

# Split the data into features and target variable
X = df.drop(columns=['Year', 'Annual'])
y = df['Annual']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Model Selection and Hyperparameter Tuning
models = {
    'Linear Regression': LinearRegression(),
    'Ridge Regression': Ridge(),
    'Lasso Regression': Lasso(),
    'ElasticNet Regression': ElasticNet(),
    'Decision Tree Regressor': DecisionTreeRegressor(),
    'Random Forest Regressor': RandomForestRegressor()
}

param_grids = {
    'Linear Regression': {},
    'Ridge Regression': {'model__alpha': [0.1, 1.0, 10.0]},  # Specify alpha for Ridge model
    'Lasso Regression': {'model__alpha': [0.1, 1.0, 10.0]},  # Specify alpha for Lasso model
    'ElasticNet Regression': {'model__alpha': [0.1, 1.0, 10.0], 'model__l1_ratio': [0.1, 0.5, 0.9]},  # Specify alpha and l1_ratio for ElasticNet model
    'Decision Tree Regressor': {'model__max_depth': [None, 10, 20]},  # Specify max_depth for Decision Tree model
    'Random Forest Regressor': {'model__n_estimators': [100, 200, 300], 'model__max_depth': [None, 10, 20]}  # Specify n_estimators and max_depth for Random Forest model
}

best_models = {}

for model_name, model in models.items():
    pipeline = Pipeline([
        ('scaler', StandardScaler()),  # Scale features
        ('model', model)               # Model
    ])
    
    # Grid search for hyperparameter tuning
    grid_search = GridSearchCV(pipeline, param_grid=param_grids[model_name], cv=5, scoring='neg_mean_squared_error', verbose=0)
    grid_search.fit(X_train, y_train)
    
    # Best model from grid search
    best_models[model_name] = grid_search.best_estimator_

# Evaluate models
results = {}
for model_name, model in best_models.items():
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mdape = mean_absolute_percentage_error(y_test, y_pred) * 100
    accuracy = 100 - mdape
    
    results[model_name] = {'MSE': mse, 'MDAPE': mdape, 'Accuracy': accuracy}

# Print results
for model_name, metrics in results.items():
    print(f"Model: {model_name}")
    print("Split Size= 75:25")
    print(f"MSE: {metrics['MSE']}")
    print(f"MDAPE: {metrics['MDAPE']}")
    print(f"Accuracy: {metrics['Accuracy']}")
    print()


Model: Linear Regression
Split Size= 75:25
MSE: 42.23090955523725
MDAPE: 34.86714645434498
Accuracy: 65.13285354565502

Model: Ridge Regression
Split Size= 75:25
MSE: 10.528663953718945
MDAPE: 16.828471957336042
Accuracy: 83.17152804266397

Model: Lasso Regression
Split Size= 75:25
MSE: 4.685744866673038
MDAPE: 12.357690293588995
Accuracy: 87.642309706411

Model: ElasticNet Regression
Split Size= 75:25
MSE: 11.19596968046875
MDAPE: 16.15452775971616
Accuracy: 83.84547224028384

Model: Decision Tree Regressor
Split Size= 75:25
MSE: 1208.7939999999999
MDAPE: 297.66420269172806
Accuracy: -197.66420269172806

Model: Random Forest Regressor
Split Size= 75:25
MSE: 398.2966594083335
MDAPE: 164.6858566280988
Accuracy: -64.6858566280988



In [13]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor



# Feature Engineering
# You can add additional features or apply transformations to existing ones here

# Split the data into features and target variable
X = df.drop(columns=['Year', 'Annual'])
y = df['Annual']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Model Selection and Hyperparameter Tuning
models = {
    'Linear Regression': LinearRegression(),
    'Ridge Regression': Ridge(),
    'Lasso Regression': Lasso(),
    'ElasticNet Regression': ElasticNet(),
    'Decision Tree Regressor': DecisionTreeRegressor(),
    'Random Forest Regressor': RandomForestRegressor()
}

param_grids = {
    'Linear Regression': {},
    'Ridge Regression': {'model__alpha': [0.1, 1.0, 10.0]},  # Specify alpha for Ridge model
    'Lasso Regression': {'model__alpha': [0.1, 1.0, 10.0]},  # Specify alpha for Lasso model
    'ElasticNet Regression': {'model__alpha': [0.1, 1.0, 10.0], 'model__l1_ratio': [0.1, 0.5, 0.9]},  # Specify alpha and l1_ratio for ElasticNet model
    'Decision Tree Regressor': {'model__max_depth': [None, 10, 20]},  # Specify max_depth for Decision Tree model
    'Random Forest Regressor': {'model__n_estimators': [100, 200, 300], 'model__max_depth': [None, 10, 20]}  # Specify n_estimators and max_depth for Random Forest model
}

best_models = {}

for model_name, model in models.items():
    pipeline = Pipeline([
        ('scaler', StandardScaler()),  # Scale features
        ('model', model)               # Model
    ])
    
    # Grid search for hyperparameter tuning
    grid_search = GridSearchCV(pipeline, param_grid=param_grids[model_name], cv=5, scoring='neg_mean_squared_error', verbose=0)
    grid_search.fit(X_train, y_train)
    
    # Best model from grid search
    best_models[model_name] = grid_search.best_estimator_

# Evaluate models
results = {}
for model_name, model in best_models.items():
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mdape = mean_absolute_percentage_error(y_test, y_pred) * 100
    accuracy = 100 - mdape
    
    results[model_name] = {'MSE': mse, 'MDAPE': mdape, 'Accuracy': accuracy}

# Print results
for model_name, metrics in results.items():
    print(f"Model: {model_name}")
    print("Split Size= 70:30")
    print(f"MSE: {metrics['MSE']}")
    print(f"MDAPE: {metrics['MDAPE']}")
    print(f"Accuracy: {metrics['Accuracy']}")
    print()


Model: Linear Regression
Split Size= 70:30
MSE: 39.23050619442212
MDAPE: 38.11555342231291
Accuracy: 61.88444657768709

Model: Ridge Regression
Split Size= 70:30
MSE: 8.796828248676631
MDAPE: 22.697339881668597
Accuracy: 77.3026601183314

Model: Lasso Regression
Split Size= 70:30
MSE: 92.33772798124461
MDAPE: 43.46696952731785
Accuracy: 56.53303047268215

Model: ElasticNet Regression
Split Size= 70:30
MSE: 13.504116675325047
MDAPE: 27.890202841243372
Accuracy: 72.10979715875663

Model: Decision Tree Regressor
Split Size= 70:30
MSE: 685.6442142857143
MDAPE: 195.7231129279306
Accuracy: -95.7231129279306

Model: Random Forest Regressor
Split Size= 70:30
MSE: 341.0154535907145
MDAPE: 146.59657889107567
Accuracy: -46.596578891075666



In [None]:
split size=80-20
Model: Linear Regression
MSE: 4.804405282375961
MDAPE: 17.089167355908465
Accuracy: 82.91083264409153

Model: Ridge Regression
MSE: 2.296241820092828
MDAPE: 13.029696799178863
Accuracy: 86.97030320082114

Model: Lasso Regression
MSE: 2.0963941157179393
MDAPE: 10.26447416333147
Accuracy: 89.73552583666853

Model: ElasticNet Regression
MSE: 21.31685907760745
MDAPE: 27.68760227303201
Accuracy: 72.31239772696799

Model: Decision Tree Regressor
MSE: 888.4759799999998
MDAPE: 307.80287184574473
Accuracy: -207.80287184574473

Model: Random Forest Regressor
MSE: 424.46569717066706
MDAPE: 191.343304500251
Accuracy: -91.34330450025101
 
split size=75-25
Model: Linear Regression
MSE: 42.23090955523725
MDAPE: 34.86714645434498
Accuracy: 65.13285354565502

Model: Ridge Regression
MSE: 10.528663953718945
MDAPE: 16.828471957336042
Accuracy: 83.17152804266397

Model: Lasso Regression
MSE: 4.685744866673038
MDAPE: 12.357690293588995
Accuracy: 87.642309706411

Model: ElasticNet Regression
MSE: 11.19596968046875
MDAPE: 16.15452775971616
Accuracy: 83.84547224028384

Model: Decision Tree Regressor
MSE: 792.9035
MDAPE: 226.88579070995334
Accuracy: -126.88579070995334

Model: Random Forest Regressor
MSE: 392.69976370750015
MDAPE: 163.2322500193431
Accuracy: -63.2322500193431

split size=70-30
Model: Linear Regression
MSE: 39.23050619442212
MDAPE: 38.11555342231291
Accuracy: 61.88444657768709

Model: Ridge Regression
MSE: 8.796828248676631
MDAPE: 22.697339881668597
Accuracy: 77.3026601183314

Model: Lasso Regression
MSE: 92.33772798124461
MDAPE: 43.46696952731785
Accuracy: 56.53303047268215

Model: ElasticNet Regression
MSE: 13.504116675325047
MDAPE: 27.890202841243372
Accuracy: 72.10979715875663

Model: Decision Tree Regressor
MSE: 817.1634857142857
MDAPE: 238.77062034313767
Accuracy: -138.77062034313767

Model: Random Forest Regressor
MSE: 294.5530567914287
MDAPE: 150.5291345545939
Accuracy: -50.529134554593895