In [5]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, SimpleRNN, LSTM, Dropout
import kagglehub

# Download the dataset
path = kagglehub.dataset_download("rohanrao/nifty50-stock-market-data")
print("Path to dataset files:", path)

# Load dataset
# Assuming the main file is 'NIFTY50_all.csv' based on the dataset description

# Preprocess data
# Use 'Close' column as the target for prediction
data = df['Close'].values.reshape(-1, 1)

scaler = MinMaxScaler(feature_range=(0, 1))
data_scaled = scaler.fit_transform(data)

# Create sequences for time series prediction
def create_sequences(data, seq_length):
    X, y = [], []
    for i in range(seq_length, len(data)):
        X.append(data[i-seq_length:i, 0])
        y.append(data[i, 0])
    return np.array(X), np.array(y)

sequence_length = 60  # Predict based on the last 60 days
X, y = create_sequences(data_scaled, sequence_length)

X = np.reshape(X, (X.shape[0], X.shape[1], 1))  # Reshape for RNN/LSTM
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define RNN Model
rnn_model = Sequential([
    SimpleRNN(50, activation='relu', return_sequences=True, input_shape=(X.shape[1], 1)),
    Dropout(0.2),
    SimpleRNN(50, activation='relu'),
    Dropout(0.2),
    Dense(1)
])

rnn_model.compile(optimizer='adam', loss='mean_squared_error')
rnn_model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))

# Define LSTM Model
lstm_model = Sequential([
    LSTM(50, activation='relu', return_sequences=True, input_shape=(X.shape[1], 1)),
    Dropout(0.2),
    LSTM(50, activation='relu'),
    Dropout(0.2),
    Dense(1)
])

lstm_model.compile(optimizer='adam', loss='mean_squared_error')
lstm_model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))

# Predictions
rnn_pred = rnn_model.predict(X_test)
lstm_pred = lstm_model.predict(X_test)

# Reverse scaling
rnn_pred = scaler.inverse_transform(rnn_pred)
lstm_pred = scaler.inverse_transform(lstm_pred)
y_test_original = scaler.inverse_transform(y_test.reshape(-1, 1))

# Evaluate
print("RNN Predictions:", rnn_pred[:5])
print("LSTM Predictions:", lstm_pred[:5])
print("Actual Values:", y_test_original[:5])

Downloading from https://www.kaggle.com/api/v1/datasets/download/rohanrao/nifty50-stock-market-data?dataset_version_number=15...


100%|██████████████████████████████████████| 18.4M/18.4M [00:02<00:00, 7.79MB/s]

Extracting files...





Path to dataset files: /Users/kaushal/.cache/kagglehub/datasets/rohanrao/nifty50-stock-market-data/versions/15
Epoch 1/20


  super().__init__(**kwargs)


[1m5879/5879[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 7ms/step - loss: 3.3677e-04 - val_loss: 5.9057e-05
Epoch 2/20
[1m5879/5879[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 7ms/step - loss: 1.8907e-04 - val_loss: 1.1767e-04
Epoch 3/20
[1m5879/5879[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 6ms/step - loss: 1.6851e-04 - val_loss: 1.4840e-04
Epoch 4/20
[1m5879/5879[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 6ms/step - loss: 1.6166e-04 - val_loss: 2.7630e-04
Epoch 5/20
[1m5879/5879[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 6ms/step - loss: 1.5629e-04 - val_loss: 2.0734e-04
Epoch 6/20
[1m5879/5879[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 6ms/step - loss: 1.5466e-04 - val_loss: 2.8444e-04
Epoch 7/20
[1m5879/5879[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 6ms/step - loss: 1.5156e-04 - val_loss: 2.2341e-04
Epoch 8/20
[1m5879/5879[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 6ms/step - los

In [13]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, SimpleRNN, LSTM, Dropout
import kagglehub

# Download the dataset
path = kagglehub.dataset_download("rohanrao/nifty50-stock-market-data")
print("Path to dataset files:", path)

# Load dataset
dataset_file = f"{path}/NIFTY50_all.csv"
df = pd.read_csv(dataset_file)

# Select relevant columns
features = ['Prev Close', 'Open', 'High', 'Low', 'Last', 'VWAP', 'Volume', 
            'Turnover', 'Trades', 'Deliverable Volume', '%Deliverble', 'Close']
df = df[features]

# Handle missing values (if any)
df = df.dropna()

# Separate features (X) and target (y)
target_column = 'Close'
X_raw = df.drop(columns=[target_column])
y_raw = df[target_column].values.reshape(-1, 1)

# Scale features and target
scaler_X = MinMaxScaler(feature_range=(0, 1))
scaler_y = MinMaxScaler(feature_range=(0, 1))

X_scaled = scaler_X.fit_transform(X_raw)
y_scaled = scaler_y.fit_transform(y_raw)

# Create sequences for time series prediction
def create_sequences(data, target, seq_length):
    X, y = [], []
    for i in range(seq_length, len(data)):
        X.append(data[i-seq_length:i])  # Sequence of features
        y.append(target[i])            # Corresponding target
    return np.array(X), np.array(y)

sequence_length = 60  # Use the last 60 timesteps
X, y = create_sequences(X_scaled, y_scaled, sequence_length)

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define RNN Model
rnn_model = Sequential([
    SimpleRNN(50, activation='relu', return_sequences=True, input_shape=(X.shape[1], X.shape[2])),
    Dropout(0.2),
    SimpleRNN(50, activation='relu'),
    Dropout(0.2),
    Dense(1)
])

rnn_model.compile(optimizer='adam', loss='mean_squared_error')
rnn_model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))

# Define LSTM Model
lstm_model = Sequential([
    LSTM(50, activation='relu', return_sequences=True, input_shape=(X.shape[1], X.shape[2])),
    Dropout(0.2),
    LSTM(50, activation='relu'),
    Dropout(0.2),
    Dense(1)
])

lstm_model.compile(optimizer='adam', loss='mean_squared_error')
lstm_model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))

# Predictions
rnn_pred = rnn_model.predict(X_test)
lstm_pred = lstm_model.predict(X_test)

# Reverse scaling
rnn_pred = scaler_y.inverse_transform(rnn_pred)
lstm_pred = scaler_y.inverse_transform(lstm_pred)
y_test_original = scaler_y.inverse_transform(y_test)

# Evaluate
print("RNN Predictions:", rnn_pred[:5])
print("LSTM Predictions:", lstm_pred[:5])
print("Actual Values:", y_test_original[:5])

Path to dataset files: /Users/kaushal/.cache/kagglehub/datasets/rohanrao/nifty50-stock-market-data/versions/15


  super().__init__(**kwargs)


Epoch 1/20
[1m3008/3008[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 6ms/step - loss: 0.0012 - val_loss: 1.5606e-04
Epoch 2/20
[1m3008/3008[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 7ms/step - loss: 3.4456e-04 - val_loss: 1.3829e-04
Epoch 3/20
[1m3008/3008[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 7ms/step - loss: 3.1583e-04 - val_loss: 1.8756e-04
Epoch 4/20
[1m3008/3008[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 7ms/step - loss: 2.5929e-04 - val_loss: 2.6854e-04
Epoch 5/20
[1m3008/3008[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 7ms/step - loss: 3.3363e-04 - val_loss: 4.2251e-04
Epoch 6/20
[1m3008/3008[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 7ms/step - loss: 2.6075e-04 - val_loss: 3.3486e-04
Epoch 7/20
[1m3008/3008[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 7ms/step - loss: 2.2649e-04 - val_loss: 5.1453e-04
Epoch 8/20
[1m3008/3008[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 7ms/ste

In [11]:
dataset_file = f"{path}/NIFTY50_all.csv"
df = pd.read_csv(dataset_file)
df.columns

Index(['Date', 'Symbol', 'Series', 'Prev Close', 'Open', 'High', 'Low', 'Last',
       'Close', 'VWAP', 'Volume', 'Turnover', 'Trades', 'Deliverable Volume',
       '%Deliverble'],
      dtype='object')

In [19]:
import langchain
print(langchain.__version__)


from langchain.llms.huggingface_pipeline import HuggingFacePipeline

hf = HuggingFacePipeline.from_model_id(
    model_id="microsoft/DialoGPT-medium", task="text-generation", pipeline_kwargs={"max_new_tokens": 200, "pad_token_id": 50256},
)

from langchain.prompts import PromptTemplate

template = """Question: {question}

Answer: Let's think step by step."""
prompt = PromptTemplate.from_template(template)

chain = prompt | hf

question = "What is electroencephalography?"

print(chain.invoke({"question": question}))

0.3.8


tokenizer_config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/642 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/863M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

Question: What is electroencephalography?

Answer: Let's think step by step.I'm not a neuroscientist, but I'm pretty sure it's a branch of neuroscience.


In [17]:
pip install -U langchain-community

Collecting langchain-community
  Downloading langchain_community-0.3.8-py3-none-any.whl.metadata (2.9 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting httpx-sse<0.5.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.6.1-py3-none-any.whl.metadata (3.5 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading marshmallow-3.23.1-py3-none-any.whl.metadata (7.5 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)
Downloading langchain_community-0.3.8-py3-none-any.whl (2.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m22.4 MB/s[0m 