In [1]:
import pandas as pd
import numpy as np
from xgboost import XGBClassifier
from sklearn.preprocessing import LabelEncoder,OneHotEncoder,PowerTransformer,StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer

In [2]:
import pandas as pd
import numpy as np

def generate_vata_balanced_aligned(n_each=400, seed=42):
    np.random.seed(seed)
    data = []
    
    # Normal Vata
    for _ in range(n_each):
        row = {
            "Age": np.random.randint(18, 70),
            "Gender": np.random.choice(["Male", "Female"]),
            "SleepQuality": np.random.choice([3,4,5], p=[0.2,0.5,0.3]),
            "StressLevel": np.random.choice([1,2], p=[0.7,0.3]),
            "Appetite": np.random.choice([3,4,5], p=[0.3,0.5,0.2]),
            "EnergyLevel": np.random.choice([3,4,5], p=[0.3,0.5,0.2]),
            "AngerLevel": np.random.choice([1,2,3], p=[0.5,0.3,0.2]),
            "Forgetfulness": np.random.choice([1,2,3], p=[0.4,0.4,0.2]),
            "AnxietyLevel": np.random.choice([1,2], p=[0.6,0.4]),
            "BowelType": np.random.choice(["Regular","Loose","Constipated"], p=[0.7,0.2,0.1]),
            "Cravings": np.random.choice(["Spicy","Sour","Warm","Salty"], p=[0.2,0.1,0.4,0.3]),
            "SkinType": np.random.choice(["Sensitive","Oily","Dry"], p=[0.2,0.2,0.6]),
            "HairType": np.random.choice(["Thick","Brittle","Thin"], p=[0.2,0.2,0.6]),
            "BodyFrame": np.random.choice(["Small","Medium","Large"], p=[0.6,0.3,0.1]),
            "Sweating": np.random.choice(["Low","Medium","High"], p=[0.5,0.4,0.1]),
            "Vata_Imbalance": "Normal"
        }
        data.append(row)
    
    # Medium Vata
    for _ in range(n_each):
        row = {
            "Age": np.random.randint(18, 70),
            "Gender": np.random.choice(["Male", "Female"]),
            "SleepQuality": np.random.choice([2,3,4], p=[0.3,0.4,0.3]),
            "StressLevel": np.random.choice([2,3,4], p=[0.3,0.4,0.3]),
            "Appetite": np.random.choice([2,3,4], p=[0.3,0.4,0.3]),
            "EnergyLevel": np.random.choice([2,3,4], p=[0.3,0.4,0.3]),
            "AngerLevel": np.random.choice([1,2,3], p=[0.3,0.4,0.3]),
            "Forgetfulness": np.random.choice([2,3,4], p=[0.3,0.4,0.3]),
            "AnxietyLevel": np.random.choice([2,3,4], p=[0.3,0.4,0.3]),
            "BowelType": np.random.choice(["Regular","Loose","Constipated"], p=[0.5,0.3,0.2]),
            "Cravings": np.random.choice(["Spicy","Sour","Warm","Salty"], p=[0.3,0.2,0.3,0.2]),
            "SkinType": np.random.choice(["Sensitive","Oily","Dry"], p=[0.3,0.3,0.4]),
            "HairType": np.random.choice(["Thick","Brittle","Thin"], p=[0.3,0.3,0.4]),
            "BodyFrame": np.random.choice(["Small","Medium","Large"], p=[0.4,0.4,0.2]),
            "Sweating": np.random.choice(["Low","Medium","High"], p=[0.3,0.4,0.3]),
            "Vata_Imbalance": "Medium"
        }
        data.append(row)
    
    # High Vata
    for _ in range(n_each):
        row = {
            "Age": np.random.randint(18, 70),
            "Gender": np.random.choice(["Male", "Female"]),
            "SleepQuality": np.random.choice([1,2,3], p=[0.5,0.4,0.1]),
            "StressLevel": np.random.choice([3,4,5], p=[0.2,0.3,0.5]),
            "Appetite": np.random.choice([1,2,3], p=[0.5,0.3,0.2]),
            "EnergyLevel": np.random.choice([1,2,3], p=[0.4,0.4,0.2]),
            "AngerLevel": np.random.choice([1,2,3], p=[0.4,0.3,0.3]),
            "Forgetfulness": np.random.choice([3,4,5], p=[0.3,0.3,0.4]),
            "AnxietyLevel": np.random.choice([3,4,5], p=[0.2,0.3,0.5]),
            "BowelType": np.random.choice(["Regular","Loose","Constipated"], p=[0.2,0.2,0.6]),
            "Cravings": np.random.choice(["Spicy","Sour","Warm","Salty"], p=[0.3,0.1,0.3,0.3]),
            "SkinType": np.random.choice(["Sensitive","Oily","Dry"], p=[0.3,0.2,0.5]),
            "HairType": np.random.choice(["Thick","Brittle","Thin"], p=[0.2,0.2,0.6]),
            "BodyFrame": np.random.choice(["Small","Medium","Large"], p=[0.5,0.3,0.2]),
            "Sweating": np.random.choice(["Low","Medium","High"], p=[0.6,0.3,0.1]),
            "Vata_Imbalance": "High"
        }
        data.append(row)
    
    return pd.DataFrame(data)

# Example: generate dataset with 1200 rows (400 each class)
df_vata_aligned = generate_vata_balanced_aligned()
df_vata_aligned.to_excel("vata_imbalance_dataset.xlsx", index=False)

In [3]:
df=pd.read_excel('vata_imbalance_dataset.xlsx')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1200 entries, 0 to 1199
Data columns (total 16 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   Age             1200 non-null   int64 
 1   Gender          1200 non-null   object
 2   SleepQuality    1200 non-null   int64 
 3   StressLevel     1200 non-null   int64 
 4   Appetite        1200 non-null   int64 
 5   EnergyLevel     1200 non-null   int64 
 6   AngerLevel      1200 non-null   int64 
 7   Forgetfulness   1200 non-null   int64 
 8   AnxietyLevel    1200 non-null   int64 
 9   BowelType       1200 non-null   object
 10  Cravings        1200 non-null   object
 11  SkinType        1200 non-null   object
 12  HairType        1200 non-null   object
 13  BodyFrame       1200 non-null   object
 14  Sweating        1200 non-null   object
 15  Vata_Imbalance  1200 non-null   object
dtypes: int64(8), object(8)
memory usage: 150.1+ KB


In [4]:
df

Unnamed: 0,Age,Gender,SleepQuality,StressLevel,Appetite,EnergyLevel,AngerLevel,Forgetfulness,AnxietyLevel,BowelType,Cravings,SkinType,HairType,BodyFrame,Sweating,Vata_Imbalance
0,56,Female,5,2,4,3,1,1,2,Regular,Salty,Sensitive,Thin,Medium,Low,Normal
1,38,Male,4,1,4,3,2,1,1,Regular,Warm,Dry,Thick,Small,Medium,Normal
2,20,Male,4,1,3,5,3,3,1,Regular,Warm,Dry,Thick,Small,Low,Normal
3,31,Male,4,1,4,4,2,1,2,Loose,Salty,Dry,Thin,Large,Low,Normal
4,40,Female,3,1,4,3,3,1,1,Regular,Spicy,Dry,Thick,Large,Medium,Normal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1195,47,Female,1,5,2,2,1,4,3,Loose,Warm,Dry,Thin,Large,Medium,High
1196,32,Female,2,5,3,2,2,5,4,Loose,Warm,Dry,Thin,Small,Medium,High
1197,49,Female,2,4,2,2,2,5,5,Regular,Sour,Dry,Brittle,Large,Low,High
1198,62,Male,2,3,3,2,1,5,3,Constipated,Spicy,Sensitive,Thin,Medium,Low,High


In [5]:
pd.value_counts(df['Vata_Imbalance']=="Normal")

  pd.value_counts(df['Vata_Imbalance']=="Normal")


Vata_Imbalance
False    800
True     400
Name: count, dtype: int64

In [6]:
X = df.drop("Vata_Imbalance", axis=1)
y = df["Vata_Imbalance"]

In [7]:
numeric_features = ["Age","SleepQuality","StressLevel","Appetite","EnergyLevel","AngerLevel","Forgetfulness","AnxietyLevel"]
categorical_features = [
    "Gender","BowelType","Cravings","SkinType","HairType",
    "BodyFrame","Sweating",
    
]

In [8]:
preprocessor = ColumnTransformer(transformers=[
    ("num", "passthrough", numeric_features),
    ("cat", OneHotEncoder(handle_unknown="ignore", sparse_output=False), categorical_features)
])


In [9]:
# Define mapping
label_mapping = {
    "Normal": 0,
    "Medium": 1,
    "High": 2
}

# Apply mapping
y_encoded = y.map(label_mapping)  
X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.2, random_state=42)


In [10]:
xgb_model = XGBClassifier(
    objective="multi:softmax",
    num_class=len(np.unique(y_encoded)),
    eval_metric="mlogloss",
    learning_rate=0.05,
    max_depth=8,
    n_estimators=800,
    subsample=0.9,
    colsample_bytree=0.9,
    reg_lambda=2,
    reg_alpha=1,
    random_state=42,
    use_label_encoder=False
)

model = Pipeline(steps=[
    ("preprocessor", preprocessor),
    ("classifier", xgb_model)
])

In [11]:
model.fit(X_train,y_train)

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


In [12]:
y_pred =model.predict(X_test)

In [13]:
accuracy_score(y_test, y_pred)

0.9958333333333333

In [15]:
import pickle

with open("vata_model.pkl", 'wb') as f:
    pickle.dump(model, f)
