In [2]:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

# Load Infosys stock data
infy_data = pd.read_csv('infy_stock_data.csv')  # Replace 'infy_stock_data.csv' with your file path

# Handle missing values if any
infy_data = infy_data.dropna()  # Drop rows with missing values
infy_data = pd.get_dummies(infy_data, columns=['Symbol','Series'])

# Feature engineering (if needed)

# Assuming 'Close' is the target variable, convert it to categorical for classification
infy_data['Close'] = pd.qcut(infy_data['Close'], q=2, labels=False)

# Split the data into features and target variable
X = infy_data.drop(columns=['Date', 'Close'])  # Assuming 'Close' is the target variable
y = infy_data['Close']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Logistic Regression
lr_model = LogisticRegression()
lr_model.fit(X_train, y_train)
lr_predictions = lr_model.predict(X_test)
lr_accuracy = accuracy_score(y_test, lr_predictions)
lr_cpr = classification_report(y_test, lr_predictions)
print("Logistic Regression Accuracy:", lr_accuracy)
print("Logistic Regression cpr:", lr_cpr)

# Decision Tree
dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(X_train, y_train)
dt_predictions = dt_model.predict(X_test)
dt_accuracy = accuracy_score(y_test, dt_predictions)
dt_cpr = classification_report(y_test, dt_predictions)
print("Decision Tree Accuracy:", dt_accuracy)
print("Decision Tree cpr:", dt_cpr)

# Random Forest
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
rf_predictions = rf_model.predict(X_test)
rf_accuracy = accuracy_score(y_test, rf_predictions)
rf_cpr = classification_report(y_test, rf_predictions)
print("Random Forest Accuracy:", rf_accuracy)
print("Random Forest cpr:", rf_cpr)
# Gradient Boosting
gb_model = GradientBoostingClassifier(n_estimators=100, random_state=42)
gb_model.fit(X_train, y_train)
gb_predictions = gb_model.predict(X_test)
gb_accuracy = accuracy_score(y_test, gb_predictions)
gb_cpr = classification_report(y_test, gb_predictions)
print("Gradient Boosting Accuracy:", gb_accuracy)
print("Gradient Boosting cpr:", gb_cpr)
# K-Nearest Neighbors
knn_model = KNeighborsClassifier(n_neighbors=5)
knn_model.fit(X_train, y_train)
knn_predictions = knn_model.predict(X_test)
knn_accuracy = accuracy_score(y_test, knn_predictions)
knn_cpr = classification_report(y_test, knn_predictions)
print("KNN Accuracy:", knn_accuracy)
print("KNN cpr:", knn_cpr)

# Support Vector Machine
svm_model = SVC(kernel='rbf')
svm_model.fit(X_train, y_train)
svm_predictions = svm_model.predict(X_test)
svm_accuracy = accuracy_score(y_test, svm_predictions)
svm_cpr = classification_report(y_test, svm_predictions)
print("SVM Accuracy:", svm_accuracy)
print("SVM cpr:", svm_cpr)
# Neural Network
nn_model = MLPClassifier(hidden_layer_sizes=(100, 50), max_iter=1000)
nn_model.fit(X_train, y_train)
nn_predictions = nn_model.predict(X_test)
nn_accuracy = accuracy_score(y_test, nn_predictions)
nn_cpr = classification_report(y_test, nn_predictions)
print("Neural Network Accuracy:", nn_accuracy)
print("Neural Network cpr:", nn_cpr)


Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Logistic Regression Accuracy: 0.48
Logistic Regression cpr:               precision    recall  f1-score   support

           0       0.00      0.00      0.00        26
           1       0.48      1.00      0.65        24

    accuracy                           0.48        50
   macro avg       0.24      0.50      0.32        50
weighted avg       0.23      0.48      0.31        50

Decision Tree Accuracy: 1.0
Decision Tree cpr:               precision    recall  f1-score   support

           0       1.00      1.00      1.00        26
           1       1.00      1.00      1.00        24

    accuracy                           1.00        50
   macro avg       1.00      1.00      1.00        50
weighted avg       1.00      1.00      1.00        50

Random Forest Accuracy: 1.0
Random Forest cpr:               precision    recall  f1-score   support

           0       1.00      1.00      1.00        26
           1       1.00      1.00      1.00        24

    accuracy                

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
