In [1]:
pip install tensorflow

Collecting tensorflow
  Downloading tensorflow-2.18.0-cp312-cp312-win_amd64.whl.metadata (3.3 kB)
Collecting tensorflow-intel==2.18.0 (from tensorflow)
  Downloading tensorflow_intel-2.18.0-cp312-cp312-win_amd64.whl.metadata (4.9 kB)
Collecting absl-py>=1.0.0 (from tensorflow-intel==2.18.0->tensorflow)
  Downloading absl_py-2.1.0-py3-none-any.whl.metadata (2.3 kB)
Collecting astunparse>=1.6.0 (from tensorflow-intel==2.18.0->tensorflow)
  Downloading astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting flatbuffers>=24.3.25 (from tensorflow-intel==2.18.0->tensorflow)
  Downloading flatbuffers-25.1.24-py2.py3-none-any.whl.metadata (875 bytes)
Collecting gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 (from tensorflow-intel==2.18.0->tensorflow)
  Downloading gast-0.6.0-py3-none-any.whl.metadata (1.3 kB)
Collecting google-pasta>=0.1.1 (from tensorflow-intel==2.18.0->tensorflow)
  Downloading google_pasta-0.2.0-py3-none-any.whl.metadata (814 bytes)
Collecting libclang>=13.0.0 (from tensorf

In [2]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
import pickle

In [3]:
df = pd.read_csv("A_Z_medicines_dataset_of_India.csv")
df.head()


Unnamed: 0,id,name,price(₹),Is_discontinued,manufacturer_name,type,pack_size_label,short_composition1,short_composition2
0,1,Augmentin 625 Duo Tablet,223.42,False,Glaxo SmithKline Pharmaceuticals Ltd,allopathy,strip of 10 tablets,Amoxycillin (500mg),Clavulanic Acid (125mg)
1,2,Azithral 500 Tablet,132.36,False,Alembic Pharmaceuticals Ltd,allopathy,strip of 5 tablets,Azithromycin (500mg),
2,3,Ascoril LS Syrup,118.0,False,Glenmark Pharmaceuticals Ltd,allopathy,bottle of 100 ml Syrup,Ambroxol (30mg/5ml),Levosalbutamol (1mg/5ml)
3,4,Allegra 120mg Tablet,218.81,False,Sanofi India Ltd,allopathy,strip of 10 tablets,Fexofenadine (120mg),
4,5,Avil 25 Tablet,10.96,False,Sanofi India Ltd,allopathy,strip of 15 tablets,Pheniramine (25mg),


In [4]:
df["short_composition2"].fillna("", inplace=True)


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["short_composition2"].fillna("", inplace=True)


In [5]:
df["composition"] = df["short_composition1"] + " " + df["short_composition2"]
df["composition"] = df["composition"].str.lower()

In [6]:
df = df[["name", "price(₹)", "composition"]]


In [7]:
df.head()

Unnamed: 0,name,price(₹),composition
0,Augmentin 625 Duo Tablet,223.42,amoxycillin (500mg) clavulanic acid (125mg)
1,Azithral 500 Tablet,132.36,azithromycin (500mg)
2,Ascoril LS Syrup,118.0,ambroxol (30mg/5ml) levosalbutamol (1mg/5ml)
3,Allegra 120mg Tablet,218.81,fexofenadine (120mg)
4,Avil 25 Tablet,10.96,pheniramine (25mg)


In [8]:
vectorizer = TfidfVectorizer()
composition_matrix = vectorizer.fit_transform(df["composition"])

In [9]:
composition_matrix = composition_matrix.toarray()


In [10]:
medicine_indices = np.arange(len(df))


In [11]:
X_train, X_test, y_train, y_test = train_test_split(composition_matrix, medicine_indices, test_size=0.2, random_state=42)


In [12]:
model = keras.Sequential([
    keras.layers.Dense(128, activation="relu", input_shape=(X_train.shape[1],)),
    keras.layers.Dropout(0.2),
    keras.layers.Dense(64, activation="relu"),
    keras.layers.Dropout(0.2),
    keras.layers.Dense(len(df), activation="softmax")  # Output layer: Probability distribution over medicines
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [13]:
model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])


In [14]:
model.fit(X_train, y_train, epochs=10, batch_size=64, validation_data=(X_test, y_test))


Epoch 1/10
[1m3175/3175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m944s[0m 297ms/step - accuracy: 0.0000e+00 - loss: 12.5826 - val_accuracy: 0.0000e+00 - val_loss: 13.2716
Epoch 2/10
[1m3175/3175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1023s[0m 322ms/step - accuracy: 0.0000e+00 - loss: 12.4053 - val_accuracy: 0.0000e+00 - val_loss: 14.9296
Epoch 3/10
[1m3175/3175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1016s[0m 320ms/step - accuracy: 3.5973e-05 - loss: 12.2574 - val_accuracy: 0.0000e+00 - val_loss: 15.9583
Epoch 4/10
[1m3175/3175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1036s[0m 326ms/step - accuracy: 5.2475e-05 - loss: 11.8008 - val_accuracy: 0.0000e+00 - val_loss: 16.3780
Epoch 5/10
[1m3175/3175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1046s[0m 329ms/step - accuracy: 4.1787e-04 - loss: 10.7751 - val_accuracy: 0.0000e+00 - val_loss: 16.5067
Epoch 6/10
[1m3175/3175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1041s[0m 328ms/step - accuracy: 0

<keras.src.callbacks.history.History at 0x2bac9afd310>

In [15]:
model.save("medicine_recommender.h5")




In [16]:
with open("vectorizer.pkl", "wb") as f:
    pickle.dump(vectorizer, f)

print("Model and vectorizer saved successfully!")

Model and vectorizer saved successfully!


In [17]:
import keras.saving
keras.saving.save_model(model, "medicine_recommender.keras")
