In [5]:
pip install findspark

Collecting findspark
  Downloading findspark-2.0.1-py2.py3-none-any.whl.metadata (352 bytes)
Downloading findspark-2.0.1-py2.py3-none-any.whl (4.4 kB)
Installing collected packages: findspark
Successfully installed findspark-2.0.1


In [6]:
import nltk
from pyspark.sql.functions import when, col
from bs4 import BeautifulSoup
import bs4
import pandas as pd
import numpy as np
import datetime
import matplotlib.pyplot as plt
import os
import seaborn as sns
import xgboost as xgb
from xgboost import XGBRegressor
from pyspark.sql import SparkSession
from pyspark.sql import functions as F
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import TimeSeriesSplit
from sklearn.model_selection import train_test_split
import streamlit as st
import requests
from pyspark.sql import SparkSession
from pyspark.sql.functions import udf
from pyspark.sql.types import StringType
from nltk.sentiment import SentimentIntensityAnalyzer
import findspark
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
findspark.init()


nltk.download('vader_lexicon')
color_pal = sns.color_palette()
plt.style.use('fivethirtyeight')



session = requests.session()

head = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}


def get_id(name):
    search_url = 'https://www.nseindia.com/api/search/autocomplete?q={}'
    get_details = 'https://www.nseindia.com/api/quote-equity?symbol={}'

    session.get('https://www.nseindia.com/', headers=head)


    search_results = session.get(url=search_url.format(name), headers=head)
    search_data = search_results.json()

    if 'symbols' in search_data and search_data['symbols']:
        search_result = search_data['symbols'][0]['symbol']

        company_details = session.get(
            url=get_details.format(search_result), headers=head)

        try:
            identifier = company_details.json()['info']['identifier']
            return identifier
        except KeyError:
            return f"Identifier not found for '{name}'"
    else:
        return f"No results found for '{name}'"

def read_stock_data(directory):
    stock_data = {}
    for filename in os.listdir(directory):
        if filename.endswith(".csv"):
            stock_name = os.path.splitext(filename)[0]
            df = pd.read_csv(os.path.join(directory, filename))
            if 'datetime' in df.columns and 'close' in df.columns:
                stock_data[stock_name] = df[['datetime', 'close']]
    return stock_data




# Read stock data from archive folder
archive_folder = "archive"
stock_data = read_stock_data(archive_folder)


print(stock_data)


# Select a stock for visualization
selected_stock = input("Select a stock: ").upper()

# Display historical stock price data
print("Historical Stock Price Data")
print(selected_stock)
print(stock_data[selected_stock])

# Plot historical stock prices
if selected_stock in stock_data:
    df = stock_data[selected_stock]
    df['Date'] = pd.to_datetime(df['datetime'])
    plt.figure(figsize=(10, 6))
    plt.plot(df['Date'], df['close'])
    plt.xlabel("Date")
    plt.ylabel("Closing Price")
    plt.title(f"{selected_stock} Historical Stock Prices")
    plt.show()


from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

def find_best_random_state(stock_data, selected_stock):
    best_random_state = float('inf')
    best_mae = float('inf')
    best_r2 = float('-inf')
    best_score = float('inf')

    if selected_stock in stock_data:
        df = stock_data[selected_stock]
        df['datetime'] = pd.to_datetime(df['datetime'])
        df['year'] = df['datetime'].dt.year
        df['month'] = df['datetime'].dt.month
        df['day'] = df['datetime'].dt.day

        X = df[['year', 'month', 'day']].values
        y = df['close']

        for random_state in range(1, 1001):
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=random_state)

            model = XGBRegressor(random_state=random_state)
            model.fit(X_train, y_train)

            y_pred = model.predict(X_test)
            score = mean_squared_error(y_test, y_pred)
            mae = mean_absolute_error(y_test, y_pred)
            r2 = r2_score(y_test, y_pred)

            if score < best_score and mae < best_mae:
                best_score = score
                best_mae = mae
                best_r2 = r2
                best_random_state = random_state

    return best_random_state, best_score, best_mae, best_r2

best_random_state, best_score, best_mae, best_r2 = find_best_random_state(stock_data, selected_stock)
print(f"Best random state: {best_random_state}")
print(f"Best mse: {best_score}")
print(f"Best rmse: {np.sqrt(best_score)}")
print(f"Best mae: {best_mae}")
print(f"Best r2: {best_r2}")


def selected_train_model(stock_data, selected_stock):
    models = {}

    # Check if the selected stock is in the stock_data
    if selected_stock in stock_data:
        df = stock_data[selected_stock]
        df['datetime'] = pd.to_datetime(df['datetime'])
        df['year'] = df['datetime'].dt.year
        df['month'] = df['datetime'].dt.month
        df['day'] = df['datetime'].dt.day

        X = df[['year', 'month', 'day']].values
        y = df['close']

        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=best_random_state)

        model = XGBRegressor(random_state=best_random_state)
        model.fit(X_train, y_train)
        models[selected_stock] = model
    else:
        print(f"Error: {selected_stock} is not in the stock_data")

    return models

models = selected_train_model(stock_data, selected_stock)


def predict_price(model, date):
    # Convert the date to a pandas datetime object
    date = pd.to_datetime(date)

    # Extract year, month, and day from the date
    year = date.year
    month = date.month
    day = date.day

    # Make prediction using the model
    prediction = model.predict(np.array([[year, month, day]]))[0]
    return prediction


today_date = datetime.date.today()
if selected_stock in models:
    prediction = predict_price(models[selected_stock], today_date)
    print(f"Predicted closing price for {selected_stock} on {today_date}: {prediction}")
else:
    print(f"Model for {selected_stock} is not available.")


company_name = selected_stock

# Button to trigger the API call
if company_name:
    ticker_symbol = get_id(company_name)
    st.write(
        f"The stock identifier for '{company_name}' is: {ticker_symbol}")

stock_url = f'https://www.nseindia.com/api/chart-databyindex?index={ticker_symbol}'


[nltk_data] Downloading package vader_lexicon to /root/nltk_data...


FileNotFoundError: [Errno 2] No such file or directory: 'archive'

# New Section