# Human Cell Classification ( that whether infected by maleria or not.)

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn import metrics
import joblib

## Load Dataset

In [2]:
dataframe = pd.read_csv("dataset.csv")
print(dataframe.head())

         Label  area_0  area_1   area_2  area_3  area_4
0  Parasitized   175.5   126.0    131.0  8902.5     0.0
1  Parasitized   222.0  9847.5      0.0     0.0     0.0
2  Parasitized   179.5   256.5  12413.0     0.0     0.0
3  Parasitized    18.0   187.5   9306.5     0.0     0.0
4  Parasitized   142.5   156.5   6669.5     0.0     0.0


## Split into training and test data

In [3]:
x = dataframe.drop(["Label"],axis=1)
y = dataframe["Label"]

x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.2,random_state=42)

## Build a model

In [4]:
# random forest classifier is used for this huge dataset
#svm will be suitable for small datasets

model = RandomForestClassifier(n_estimators=100,max_depth=5)
model.fit(x_train,y_train)

#save the model after training using joblib to not retrain it again
joblib.dump(model,"rf_malaria_100_5")

['rf_malaria_100_5']

In [5]:
# Make predictions and get classification report

predictions = model.predict(x_test)

In [6]:
print(metrics.classification_report(predictions,y_test))
print(model.score(x_test,y_test))

              precision    recall  f1-score   support

 Parasitized       0.90      0.90      0.90      2772
  Uninfected       0.90      0.89      0.90      2740

    accuracy                           0.90      5512
   macro avg       0.90      0.90      0.90      5512
weighted avg       0.90      0.90      0.90      5512

0.8985849056603774


### save the model using pickle

In [8]:
import pickle
with open('maleria_classification_model.pkl', 'wb') as file:
    pickle.dump(model, file)