<a href="https://colab.research.google.com/github/ommakvana/BERT-MODEL/blob/main/Model_mix_linear.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
!pip install newsapi-python



In [53]:
import pandas as pd
import numpy as np
import datetime as dt
from datetime import timedelta, date
import plotly.graph_objects as go
import plotly.express as px
from datetime import datetime
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from transformers import BertTokenizer, BertForSequenceClassification
import torch
import statsmodels.api as sm
import yfinance as yf
import requests
from newsapi import NewsApiClient  # Importing the NewsApiClient
import warnings

# Suppress specific warnings by their message
warnings.filterwarnings("ignore", message="A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.")
warnings.filterwarnings("ignore", message="No supported index is available. Prediction results will be given with an integer index beginning at `start`.")
warnings.filterwarnings("ignore", message="No supported index is available. In the next version, calling this method in a model without a supported index will result in an exception.")
# warnings.filterwarnings("ignore", message="['/content/drive/MyDrive/CustomZip.zip' has been unzipped into 'model_1'Unzipped files and directories: ['config.json', 'model.safetensors', 'training_args.bin'][*********************100%%**********************]  1 of 1 complete])

import zipfile
import os

# Unzip the model
zip_file_path = '/content/drive/MyDrive/CustomZip.zip'
output_directory = 'model_1'
os.makedirs(output_directory, exist_ok=True)

with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(output_directory)

print(f"'{zip_file_path}' has been unzipped into '{output_directory}'")
unzipped_files = os.listdir(output_directory)
print("Unzipped files and directories:", unzipped_files)

# Load the pre-trained BERT model and tokenizer for sentiment analysis
model_1 = BertForSequenceClassification.from_pretrained(output_directory)
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Initialize NewsAPI client
newsapi = NewsApiClient(api_key='fd4bbea2de854d4f873b147acea59697')

def get_company_news_titles(company_name):
    query_params = {
        'q': company_name,
        'language': 'en',
        'sort_by': 'publishedAt',
        'page_size': 25
    }
    news_data = newsapi.get_everything(**query_params)
    titles = [article['title'] for article in news_data['articles']]
    return titles  # Corrected the return statement

def get_stock_data(ticker):
    start = '2000-01-01'
    end = datetime.today().strftime('%Y-%m-%d')
    stock_data = yf.download(ticker, start, end)

    return stock_data

def get_weighted_sentiment_score(company_name):
    news_titles = get_company_news_titles(company_name)
    inputs = tokenizer(news_titles, padding=True, truncation=True, return_tensors='pt')
    outputs = model_1(**inputs)
    predictions = torch.nn.functional.softmax(outputs.logits, dim=-1).detach().numpy()
    weights = np.linspace(1, 0, num=len(predictions))
    weighted_average = np.average(predictions[:, 1], weights=weights)
    return weighted_average

def combine_data(stock_data, sentiment_score):
    stock_data['Sentiment'] = sentiment_score
    return stock_data

def prepare_features(combined_data, what_to_predict):
    combined_data['Close_1'] = combined_data[what_to_predict].shift(-1)
    combined_data = combined_data.dropna()
    features = combined_data[[what_to_predict, 'Sentiment']]
    labels = combined_data['Close_1']
    return train_test_split(features, labels, test_size=0.2, random_state=42)

def predict_stock_prices(df, what_to_predict):
    p, d, q = 5, 1, 0
    seasonal_order = (1, 1, 0, 12)
    model = sm.tsa.statespace.SARIMAX(df[what_to_predict], order=(p, d, q), seasonal_order=seasonal_order)
    res = model.fit()
    predicted_values = res.get_prediction(start=len(df)).predicted_mean
    last_date = df.index[-1]
    # Generate a new date range for the predicted values
    forecast_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=len(predicted_values))
    # Assign the new date range as the index to the predicted values DataFrame
    predicted_values.index = forecast_dates
    return predicted_values

def unified_stock_prediction(company_name, ticker):
    # Step 1: Get the weighted sentiment score
    weighted_average = get_weighted_sentiment_score(company_name)

    # Step 2: Get stock data
    stock_data = get_stock_data(ticker)

    # Step 3: Combine data
    combined_data = combine_data(stock_data, weighted_average)


    # Step 4: Prepare features
    X_train, X_test, y_train, y_test = prepare_features(combined_data, 'Close')

    # Step 5: Model training and prediction
    model = LinearRegression()
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    mse = np.mean((predictions - y_test) ** 2)

    # Step 6: Predict stock prices
    predicted_values = predict_stock_prices(combined_data, 'Close')

    # Print results
    print(f"Company: {company_name}")
    print(" ")
    print(f"Weighted Sentiment Score: {weighted_average:.2f}")
    print(" ")
    print("Predicted Stock Closing Prices:",predicted_values)
    print(" ")
    print(f"Mean Squared Error: {mse:.2f}")

# Example usage
company_names = ["Suzlon energy"]
tickers = ["SUZLON.NS"]

for company_name, ticker in zip(company_names, tickers):
    unified_stock_prediction(company_name, ticker)


'/content/drive/MyDrive/CustomZip.zip' has been unzipped into 'model_1'
Unzipped files and directories: ['config.json', 'model.safetensors', 'training_args.bin']


[*********************100%%**********************]  1 of 1 completed


Company: Suzlon energy
 
Weighted Sentiment Score: 0.63
 
Predicted Stock Closing Prices: 2024-05-31    46.372617
Freq: D, dtype: float64
 
Mean Squared Error: 15.93
