In [1]:
import numpy as np, pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

In [2]:
df = pd.read_csv('datasets/training.csv')
df.head()

Unnamed: 0,itching,skin_rash,nodal_skin_eruptions,continuous_sneezing,shivering,chills,joint_pain,stomach_pain,acidity,ulcers_on_tongue,...,blackheads,scurring,skin_peeling,silver_like_dusting,small_dents_in_nails,inflammatory_nails,blister,red_sore_around_nose,yellow_crust_ooze,prognosis
0,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
1,0,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
2,1,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
3,1,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
4,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection


In [3]:
df.shape

(4920, 133)

In [4]:
len(df['prognosis'].unique())

41

In [5]:
X = df[df.columns[:-1]]
y = df['prognosis']

In [6]:
lb = LabelEncoder()

In [7]:
Y = lb.fit_transform(y)

In [8]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [9]:
models = {
    "Random Forest": RandomForestClassifier(n_estimators=100),
    "AdaBoost": AdaBoostClassifier(n_estimators=100),
    "Gradient Boosting": GradientBoostingClassifier(n_estimators=100),
    "SVC": SVC()
}

In [10]:
for model_name, model in models.items():
    model.fit(X_train, Y_train)
    Y_pred = model.predict(X_test)

In [11]:
for i in range(len(models)):
    print(list(models.keys())[i], accuracy_score(Y_test, Y_pred))

Random Forest 1.0
AdaBoost 1.0
Gradient Boosting 1.0
SVC 1.0


In [12]:
rfc = RandomForestClassifier(n_estimators=100)
rfc.fit(X_train, Y_train)

In [13]:
import pickle

In [14]:
with open('trained_model.pkl', 'wb') as f:
    pickle.dump(rfc, f)

In [15]:
with open('label_encoder.pkl', 'wb') as f:
    pickle.dump(lb, f)