In [1]:
# Use a separate cell for this, or run it in your terminal
!pip install pandas numpy scikit-learn transformers matplotlib
!pip install yfinance pandas_ta



In [2]:
import yfinance as yf
import pandas as pd

# Define a list of Indian large-cap stock symbols (NSE/BSE)
# Note: yfinance often uses '.NS' for National Stock Exchange India
TICKERS = ['RELIANCE.NS', 'HDFCBANK.NS', 'INFY.NS','ICICIBANK.NS','BHARTIARTL.NS','TCS.NS','LT.NS','KOTAKBANK.NS','AXISBANK.NS','ITC.NS']
START_DATE = '2025-1-12'
END_DATE = '2025-12-12' # Fetching one year of data for initial testing

# Download the data
data = yf.download(TICKERS, start=START_DATE, end=END_DATE)

# The result is a multi-index DataFrame, which is fine, but
# let's simplify for viewing the Close price of all stocks
close_prices = data['Close']

print("--- Sample Close Prices (First 5 Rows) ---")
print(close_prices.head(5))
print("\n--- Data Structure Info ---")
close_prices.info()

  data = yf.download(TICKERS, start=START_DATE, end=END_DATE)
[*********************100%***********************]  10 of 10 completed

--- Sample Close Prices (First 5 Rows) ---
Ticker      AXISBANK.NS  BHARTIARTL.NS  HDFCBANK.NS  ICICIBANK.NS  \
Date                                                                
2025-01-13  1048.404053    1583.958252   804.549927   1220.333862   
2025-01-14  1050.752075    1586.834229   812.319885   1230.803101   
2025-01-15  1025.923218    1594.123291   810.568542   1228.768677   
2025-01-16  1037.113647    1616.684692   815.008545   1239.535645   
2025-01-17   990.203735    1614.007080   807.460571   1216.066650   

Ticker          INFY.NS      ITC.NS  KOTAKBANK.NS        LT.NS  RELIANCE.NS  \
Date                                                                          
2025-01-13  1905.799072  424.407806   1736.398071  3432.289307  1234.917847   
2025-01-14  1884.285889  422.039520   1748.583984  3430.605225  1233.822266   
2025-01-15  1893.609863  422.764526   1787.539062  3469.046387  1247.218628   
2025-01-16  1873.019165  418.414551   1803.470703  3475.486328  1261.411987   




In [3]:
# Cell 3: Data Structure and Preparation (Final Corrected Version)

# 1. Melt the DataFrame
data_long = data.stack(level=1).reset_index()

# 2. Rename the columns explicitly using the 7 names identified:
data_long.columns = ['Date', 'Ticker', 'Close', 'High', 'Low', 'Open', 'Volume']

# 3. Ensure the Date column is a proper datetime object
data_long['Date'] = pd.to_datetime(data_long['Date'])

# 4. Sort the data: Essential for time-series analysis and backtesting.
df_flat = data_long.set_index('Date').sort_values(['Date', 'Ticker'])

# Save the final flat DataFrame to the variable df_features
df_features = df_flat.copy() 

# Display results
print("\n--- Flat DataFrame (df_features) Sample ---")
# Displaying 20 rows helps verify the correct interleaving of the 10 tickers
print(df_features.head(20)) 
print(f"\nTotal rows after flattening: {len(df_features)}")
print(f"Number of Tickers: {df_features['Ticker'].nunique()}")
print(f"Columns in final feature DataFrame: {df_features.columns.tolist()}")


--- Flat DataFrame (df_features) Sample ---
                   Ticker        Close         High          Low         Open  \
Date                                                                            
2025-01-13    AXISBANK.NS  1048.404053  1062.092306  1022.376248  1025.723363   
2025-01-13  BHARTIARTL.NS  1583.958252  1601.313167  1565.016626  1566.900898   
2025-01-13    HDFCBANK.NS   804.549927   812.393912   801.318641   809.063923   
2025-01-13   ICICIBANK.NS  1220.333862  1234.425085  1215.521023  1225.444453   
2025-01-13        INFY.NS  1905.799072  1925.807047  1892.978536  1899.777331   
2025-01-13         ITC.NS   424.407806   429.192737   420.492884   420.492884   
2025-01-13   KOTAKBANK.NS  1736.398071  1742.541013  1721.764918  1730.904405   
2025-01-13          LT.NS  3432.289307  3501.840598  3416.734426  3473.158149   
2025-01-13    RELIANCE.NS  1234.917847  1240.296390  1221.521399  1225.107054   
2025-01-13         TCS.NS  4146.854004  4177.633455  4085.295102

  data_long = data.stack(level=1).reset_index()


In [7]:
# Cell 4: Integrating FinBERT and Scoring Function

from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

# Define the FinBERT model identifier
FINBERT_MODEL = "ProsusAI/finbert" 

try:
    # --- Load Model and Tokenizer ---
    tokenizer = AutoTokenizer.from_pretrained(FINBERT_MODEL)
    model = AutoModelForSequenceClassification.from_pretrained(FINBERT_MODEL)
    
    # --- Define Scoring Function ---
    def score_text(text: str) -> float:
        """
        Processes text using FinBERT and returns the polarity score (Positive - Negative).
        Score is generally between -1.0 and +1.0.
        """
        # Handle empty/null input gracefully
        if not text or not isinstance(text, str):
            return 0.0 # Return neutral score
            
        # Tokenize the input text
        inputs = tokenizer(text, 
                           return_tensors="pt", 
                           padding=True, 
                           truncation=True,
                           max_length=512)
        
        # Get model output (logits) without gradient calculation (faster)
        with torch.no_grad():
            outputs = model(**inputs)
        
        # Convert logits to probabilities using softmax
        probabilities = torch.softmax(outputs.logits, dim=1).squeeze()
        
        # FinBERT labels are typically: 0=Positive, 1=Negative, 2=Neutral
        # We need to map the probability index to the sentiment:
        positive_prob = probabilities[0].item()
        negative_prob = probabilities[1].item()
        
        # Polarity Score = Positive Probability - Negative Probability
        polarity_score = positive_prob - negative_prob
        
        return polarity_score
    print("FinBERT score:")
    
    # --- Quick Verification Test ---
    test_text_one = "Reliance is expected to be extremely bullish"
    test_text_two = "Reliance is expected to be severely bearish"
    
    print(f"Test score 1: {score_text(test_text_one):.4f}")
    print(f"Test score 2: {score_text(test_text_two):.4f}")

except Exception as e:
    print(f"Error loading FinBERT: {e}")
    print("Please ensure all dependencies (pandas, transformers, torch) are correctly installed.")
    print("If the issue persists, check your internet connection for downloading the model weights.")

FinBERT score:
Test score 1: 0.8822
Test score 2: -0.5939


In [1]:
!git config --global user.email "ramanav1618@gmail.com"
!git config --global user.name "Manav Soni"

In [2]:
# 1. Initialize the folder as a Git repository
!git init

# 2. Add your notebook file (the dot means "all files in this folder")
!git add .

# 3. Create the first save point
!git commit -m "Initial upload from Jupyter"

# 4. Point your local Git to your GitHub repo
!git remote add origin https://github.com/ramanav1618-hub/MDGProjectMANAV.git

# 5. Push the code to the 'main' branch
!git branch -M main
!git push -u origin main

Initialized empty Git repository in C:/Users/Manav Soni/Desktop/mdg/.git/




[master (root-commit) 8f3df14] Initial upload from Jupyter
 7 files changed, 5536 insertions(+)
 create mode 100644 .ipynb_checkpoints/Pythonpractice-checkpoint.ipynb
 create mode 100644 .ipynb_checkpoints/mdgproject_initial-checkpoint.ipynb
 create mode 100644 .ipynb_checkpoints/mdgprojectfinal-checkpoint.ipynb
 create mode 100644 Pythonpractice.ipynb
 create mode 100644 mdgproject_initial.ipynb
 create mode 100644 mdgprojectfinal.ipynb
 create mode 100644 nifty500.csv
branch 'main' set up to track 'origin/main'.


To https://github.com/ramanav1618-hub/MDGProjectMANAV.git
 * [new branch]      main -> main


In [3]:
# 1. Remove the unwanted files and folder from the GitHub tracking
!git rm Pythonpractice.ipynb
!git rm mdgproject_initial.ipynb
!git rm -r .ipynb_checkpoints

# 2. Commit the deletion
!git commit -m "Clean up: removed extra notebooks and checkpoints"

# 3. Push the changes to GitHub
!git push

rm 'Pythonpractice.ipynb'
rm 'mdgproject_initial.ipynb'
rm '.ipynb_checkpoints/Pythonpractice-checkpoint.ipynb'
rm '.ipynb_checkpoints/mdgproject_initial-checkpoint.ipynb'
rm '.ipynb_checkpoints/mdgprojectfinal-checkpoint.ipynb'
[main f03f971] Clean up: removed extra notebooks and checkpoints
 5 files changed, 4648 deletions(-)
 delete mode 100644 .ipynb_checkpoints/Pythonpractice-checkpoint.ipynb
 delete mode 100644 .ipynb_checkpoints/mdgproject_initial-checkpoint.ipynb
 delete mode 100644 .ipynb_checkpoints/mdgprojectfinal-checkpoint.ipynb
 delete mode 100644 Pythonpractice.ipynb
 delete mode 100644 mdgproject_initial.ipynb


To https://github.com/ramanav1618-hub/MDGProjectMANAV.git
   8f3df14..f03f971  main -> main
