In [1]:
import nltk
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

[nltk_data] Downloading package punkt to
[nltk_data]     /users/student/idddp/kushpatil/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to
[nltk_data]     /users/student/idddp/kushpatil/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to
[nltk_data]     /users/student/idddp/kushpatil/nltk_data...


True

In [15]:
# demand_forecast_lstm_runable.py
# Put this file in your project and run: python demand_forecast_lstm_runable.py
# Two safe modes:
#   FORCE_CPU = True      -> runs on CPU only (no GPU/cuDNN used)
#   DISABLE_XLA = FALSE    -> keeps GPU, but disables XLA device fusion to avoid cuDNN init; uses non-cuDNN LSTM

import os

# ----------------- CONFIG -----------------
FORCE_CPU = False        # set True to force CPU (safe, slow)
DISABLE_XLA = True       # set True to disable XLA device fusion (helpful when cuDNN runtime mismatches)
# ------------------------------------------

if FORCE_CPU:
    # Must hide GPUs BEFORE importing tensorflow
    os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

if DISABLE_XLA:
    # Prevent XLA from creating fused GPU devices which may require cuDNN
    # Must set before importing tensorflow
    os.environ["TF_XLA_FLAGS"] = "--tf_xla_enable_xla_devices=false"

# Now safe to import TensorFlow and other libs
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras import Input, Sequential
from tensorflow.keras.layers import RNN, LSTMCell, Dropout, Dense
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import mean_absolute_error, mean_squared_error
import matplotlib.pyplot as plt

# ---------- Create dummy data if missing ----------
CSV_PATH = "sales_data.csv"
if not os.path.exists(CSV_PATH):
    dates = pd.date_range(start="2018-01-01", periods=60, freq="M")
    np.random.seed(42)
    demand = np.abs(200 + 20 * np.sin(np.arange(60) / 6) + np.random.normal(0, 10, 60))
    price = np.round(50 + np.random.normal(0, 2, 60), 2)
    export_index = np.round(100 + np.random.normal(0, 5, 60), 2)
    pd.DataFrame({"date": dates, "demand": demand, "price": price, "export_index": export_index}).to_csv(CSV_PATH, index=False)
    print("Created dummy", CSV_PATH)

# ---------- Load and preprocess ----------
df = pd.read_csv(CSV_PATH, parse_dates=["date"]).sort_values("date").reset_index(drop=True)
features = ["demand", "price", "export_index"]
scaler = MinMaxScaler()
scaled = scaler.fit_transform(df[features])   # numpy float64 by default

LOOKBACK = 12
def create_sequences(X, lookback):
    Xs, ys = [], []
    for i in range(len(X) - lookback):
        Xs.append(X[i:i+lookback])
        ys.append(X[i+lookback, 0])
    return np.array(Xs), np.array(ys)

X, y = create_sequences(scaled, LOOKBACK)

# explicit train/test split
split_idx = int(len(X) * 0.8)
X_train, X_test = X[:split_idx], X[split_idx:]
y_train, y_test = y[:split_idx], y[split_idx:]

# dtype & memory layout fixes required for LSTM-like ops
X_train = np.ascontiguousarray(X_train.astype("float32"))
X_test  = np.ascontiguousarray(X_test.astype("float32"))
y_train = np.ascontiguousarray(y_train.astype("float32").reshape(-1, 1))
y_test  = np.ascontiguousarray(y_test.astype("float32").reshape(-1, 1))

print("TensorFlow:", tf.__version__)
print("GPU devices:", tf.config.list_physical_devices('GPU'))
print("Mode: FORCE_CPU=", FORCE_CPU, " DISABLE_XLA=", DISABLE_XLA)
print("X_train dtype, shape, contiguous:", X_train.dtype, X_train.shape, X_train.flags['C_CONTIGUOUS'])
print("y_train dtype, shape, contiguous:", y_train.dtype, y_train.shape, y_train.flags['C_CONTIGUOUS'])

# ---------- Build model using non-cuDNN LSTM (RNN + LSTMCell) ----------
model = Sequential([
    Input(shape=(X_train.shape[1], X_train.shape[2]), dtype='float32'),
    RNN(LSTMCell(64)),   # non-cuDNN LSTM path
    Dropout(0.2),
    Dense(1, dtype='float32')
])
model.compile(optimizer="adam", loss="mse")

# small explicit holdout for validation
val_split = 0.1
val_count = max(1, int(len(X_train) * val_split))
X_val = X_train[-val_count:]
y_val = y_train[-val_count:]
X_tr = X_train[:-val_count]
y_tr = y_train[:-val_count]

print("Train/Val/Test sizes:", X_tr.shape[0], X_val.shape[0], X_test.shape[0])

es = EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True)
history = model.fit(X_tr, y_tr, validation_data=(X_val, y_val), epochs=50, batch_size=8, callbacks=[es], verbose=2)

# ---------- Predict & evaluate ----------
y_pred = model.predict(X_test)
num_features = scaled.shape[1]
def invert_target(scaled_target):
    dummy = np.zeros((len(scaled_target), num_features))
    dummy[:, 0] = scaled_target.flatten()
    return scaler.inverse_transform(dummy)[:, 0]

y_test_inv = invert_target(y_test)
y_pred_inv = invert_target(y_pred)

print("MAE:", mean_absolute_error(y_test_inv, y_pred_inv))
print("RMSE:", mean_squared_error(y_test_inv, y_pred_inv, squared=False))

plt.plot(y_test_inv, label="Actual")
plt.plot(y_pred_inv, label="Predicted")
plt.legend()
plt.title("Dummy Demand Forecast (safe mode)")
plt.show()


TensorFlow: 2.19.0
GPU devices: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:2', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:3', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:4', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:5', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:6', device_type='GPU')]
Mode: FORCE_CPU= False  DISABLE_XLA= True
X_train dtype, shape, contiguous: float32 (38, 12, 3) True
y_train dtype, shape, contiguous: float32 (38, 1) True
Train/Val/Test sizes: 35 3 10
Epoch 1/50


E0000 00:00:1754429289.442571 1536206 cuda_dnn.cc:522] Loaded runtime CuDNN library: 9.1.0 but source was compiled with: 9.3.0.  CuDNN library needs to have matching major version and equal or higher minor version. If using a binary install, upgrade your CuDNN library.  If building from sources, make sure the library loaded at runtime is compatible with the version specified during compile configuration.
E0000 00:00:1754429289.500925 1536206 cuda_dnn.cc:522] Loaded runtime CuDNN library: 9.1.0 but source was compiled with: 9.3.0.  CuDNN library needs to have matching major version and equal or higher minor version. If using a binary install, upgrade your CuDNN library.  If building from sources, make sure the library loaded at runtime is compatible with the version specified during compile configuration.
2025-08-06 02:58:09.511032: W tensorflow/core/framework/op_kernel.cc:1857] OP_REQUIRES failed at xla_ops.cc:591 : FAILED_PRECONDITION: DNN library initialization failed. Look at the er

FailedPreconditionError: Graph execution error:

Detected at node StatefulPartitionedCall defined at (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main

  File "<frozen runpy>", line 88, in _run_code

  File "/users/student/idddp/kushpatil/.conda/envs/myenv/lib/python3.12/site-packages/ipykernel_launcher.py", line 18, in <module>

  File "/users/student/idddp/kushpatil/.local/lib/python3.12/site-packages/traitlets/config/application.py", line 1075, in launch_instance

  File "/users/student/idddp/kushpatil/.conda/envs/myenv/lib/python3.12/site-packages/ipykernel/kernelapp.py", line 739, in start

  File "/users/student/idddp/kushpatil/.conda/envs/myenv/lib/python3.12/site-packages/tornado/platform/asyncio.py", line 211, in start

  File "/users/student/idddp/kushpatil/.conda/envs/myenv/lib/python3.12/asyncio/base_events.py", line 641, in run_forever

  File "/users/student/idddp/kushpatil/.conda/envs/myenv/lib/python3.12/asyncio/base_events.py", line 1987, in _run_once

  File "/users/student/idddp/kushpatil/.conda/envs/myenv/lib/python3.12/asyncio/events.py", line 88, in _run

  File "/users/student/idddp/kushpatil/.conda/envs/myenv/lib/python3.12/site-packages/ipykernel/kernelbase.py", line 516, in dispatch_queue

  File "/users/student/idddp/kushpatil/.conda/envs/myenv/lib/python3.12/site-packages/ipykernel/kernelbase.py", line 505, in process_one

  File "/users/student/idddp/kushpatil/.conda/envs/myenv/lib/python3.12/site-packages/ipykernel/kernelbase.py", line 397, in dispatch_shell

  File "/users/student/idddp/kushpatil/.conda/envs/myenv/lib/python3.12/site-packages/ipykernel/ipkernel.py", line 368, in execute_request

  File "/users/student/idddp/kushpatil/.conda/envs/myenv/lib/python3.12/site-packages/ipykernel/kernelbase.py", line 752, in execute_request

  File "/users/student/idddp/kushpatil/.conda/envs/myenv/lib/python3.12/site-packages/ipykernel/ipkernel.py", line 455, in do_execute

  File "/users/student/idddp/kushpatil/.conda/envs/myenv/lib/python3.12/site-packages/ipykernel/zmqshell.py", line 577, in run_cell

  File "/users/student/idddp/kushpatil/.local/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3098, in run_cell

  File "/users/student/idddp/kushpatil/.local/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3153, in _run_cell

  File "/users/student/idddp/kushpatil/.local/lib/python3.12/site-packages/IPython/core/async_helpers.py", line 128, in _pseudo_sync_runner

  File "/users/student/idddp/kushpatil/.local/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3365, in run_cell_async

  File "/users/student/idddp/kushpatil/.local/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3610, in run_ast_nodes

  File "/users/student/idddp/kushpatil/.local/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3670, in run_code

  File "/tmp/ipykernel_1520932/1514782999.py", line 98, in <module>

  File "/users/student/idddp/kushpatil/.conda/envs/myenv/lib/python3.12/site-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/users/student/idddp/kushpatil/.conda/envs/myenv/lib/python3.12/site-packages/keras/src/backend/tensorflow/trainer.py", line 377, in fit

  File "/users/student/idddp/kushpatil/.conda/envs/myenv/lib/python3.12/site-packages/keras/src/backend/tensorflow/trainer.py", line 220, in function

  File "/users/student/idddp/kushpatil/.conda/envs/myenv/lib/python3.12/site-packages/keras/src/backend/tensorflow/trainer.py", line 133, in multi_step_on_iterator

DNN library initialization failed. Look at the errors above for more details.
	 [[{{node StatefulPartitionedCall}}]] [Op:__inference_multi_step_on_iterator_12399]

In [6]:
# competitor_analysis_nlp.py
import pandas as pd
import os
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from gensim import corpora, models
from sklearn.feature_extraction.text import TfidfVectorizer

nltk.download("punkt")
nltk.download("stopwords")
nltk.download("wordnet")

CSV_PATH = "competitor_data.csv"

# ---------- Generate Dummy Competitor Data ----------
if not os.path.exists(CSV_PATH):
    products = [
        "Organic Turmeric Powder - Rich color and earthy taste, sourced from Kerala.",
        "Premium Garam Masala Blend - Bold aroma, perfect for curries.",
        "Pure Red Chilli Powder - Fiery heat with vibrant red color.",
        "Authentic Cumin Seeds - Warm, nutty flavor for Indian dishes.",
        "Coriander Powder - Freshly ground, subtle citrus notes."
    ]
    df_dummy = pd.DataFrame({"product_text": products})
    df_dummy.to_csv(CSV_PATH, index=False)
    print(f"Dummy competitor data created: {CSV_PATH}")

# ---------- Load Data ----------
df = pd.read_csv(CSV_PATH)

stop_words = set(stopwords.words("english"))
lemmatizer = WordNetLemmatizer()

def preprocess(text):
    text = re.sub(r"[^a-zA-Z ]", " ", text).lower()
    tokens = nltk.word_tokenize(text)
    return [lemmatizer.lemmatize(t) for t in tokens if t not in stop_words and len(t) > 2]

df["tokens"] = df["product_text"].apply(preprocess)

dictionary = corpora.Dictionary(df["tokens"])
corpus = [dictionary.doc2bow(t) for t in df["tokens"]]

lda_model = models.LdaModel(corpus=corpus, id2word=dictionary, num_topics=3, passes=5, random_state=42)

print("\n--- LDA Topics ---")
for i, topic in lda_model.show_topics(num_topics=3, formatted=True):
    print(f"Topic {i}: {topic}")

tfidf = TfidfVectorizer(max_features=10)
tfidf_matrix = tfidf.fit_transform(df["product_text"])
print("\n--- TF-IDF Keywords ---")
print(tfidf.get_feature_names_out())


[nltk_data] Downloading package punkt to
[nltk_data]     /users/student/idddp/kushpatil/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /users/student/idddp/kushpatil/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     /users/student/idddp/kushpatil/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Dummy competitor data created: competitor_data.csv


LookupError: 
**********************************************************************
  Resource [93mpunkt_tab[0m not found.
  Please use the NLTK Downloader to obtain the resource:

  [31m>>> import nltk
  >>> nltk.download('punkt_tab')
  [0m
  For more information see: https://www.nltk.org/data.html

  Attempted to load [93mtokenizers/punkt_tab/english/[0m

  Searched in:
    - '/users/student/idddp/kushpatil/nltk_data'
    - '/users/student/idddp/kushpatil/.conda/envs/myenv/nltk_data'
    - '/users/student/idddp/kushpatil/.conda/envs/myenv/share/nltk_data'
    - '/users/student/idddp/kushpatil/.conda/envs/myenv/lib/nltk_data'
    - '/usr/share/nltk_data'
    - '/usr/local/share/nltk_data'
    - '/usr/lib/nltk_data'
    - '/usr/local/lib/nltk_data'
**********************************************************************


In [7]:
# sentiment_and_clustering.py
import pandas as pd
import numpy as np
import os
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from sentence_transformers import SentenceTransformer
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt

CSV_PATH = "reviews.csv"

# ---------- Generate Dummy Reviews ----------
if not os.path.exists(CSV_PATH):
    reviews = [
        "Absolutely love this turmeric powder, very fresh!",
        "Too spicy for my taste, but quality is good.",
        "Great aroma and flavor, will buy again.",
        "Not worth the price, packaging was damaged.",
        "Perfect blend for curries, my family loved it.",
        "Mild taste, could be stronger."
    ]
    df_dummy = pd.DataFrame({"review_text": reviews})
    df_dummy.to_csv(CSV_PATH, index=False)
    print(f"Dummy reviews data created: {CSV_PATH}")

# ---------- Load Data ----------
df = pd.read_csv(CSV_PATH)

# Sentiment Analysis
analyzer = SentimentIntensityAnalyzer()
df["sentiment_score"] = df["review_text"].apply(lambda t: analyzer.polarity_scores(t)["compound"])

# Embeddings + Clustering
model = SentenceTransformer("all-MiniLM-L6-v2")
embeddings = model.encode(df["review_text"].tolist())

NUM_CLUSTERS = 2
km = KMeans(n_clusters=NUM_CLUSTERS, random_state=42)
df["cluster"] = km.fit_predict(embeddings)

# Cluster Visualization
pca = PCA(n_components=2)
coords = pca.fit_transform(embeddings)
plt.scatter(coords[:,0], coords[:,1], c=df["cluster"], cmap="viridis")
plt.title("Customer Review Clusters")
plt.show()

print("\n--- Cluster Summary ---")
for c in range(NUM_CLUSTERS):
    cluster_df = df[df["cluster"] == c]
    print(f"Cluster {c}: Avg Sentiment = {cluster_df['sentiment_score'].mean():.2f}")
    print(cluster_df["review_text"].tolist())
    print("-" * 40)


ValueError: Your currently installed version of Keras is Keras 3, but this is not yet supported in Transformers. Please install the backwards-compatible tf-keras package with `pip install tf-keras`.