In [1]:
# Step 1: Import Libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder
import pickle


In [2]:
# Step 2: Load dataset (make sure Skin_Type.csv is in the same folder)
data = pd.read_csv('Skin_Type.csv')

In [3]:
# Step 3: Quick look at data
print("First 5 rows of dataset:")
print(data.head())
print("\nDataset info:")
print(data.info())

First 5 rows of dataset:
   Age  Gender Hydration_Level Oil_Level Sensitivity  Humidity  Temperature  \
0   36    Male             Low       Low        High      31.9         10.1   
1   36  Female            High       Low        High      30.1         20.5   
2   34  Female            High    Medium         Low      53.0         19.5   
3   51    Male          Medium      High         Low      57.3         22.7   
4   36    Male            High    Medium      Medium      64.4         24.0   

     Skin_Type  
0          Dry  
1  Combination  
2       Normal  
3         Oily  
4       Normal  

Dataset info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2000 entries, 0 to 1999
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Age              2000 non-null   int64  
 1   Gender           2000 non-null   object 
 2   Hydration_Level  2000 non-null   object 
 3   Oil_Level        2000 non-null   object 
 

In [4]:
# Step 4: Handle missing values
for col in data.columns:
    if data[col].dtype == 'object':
        data[col] = data[col].fillna(data[col].mode()[0])
    else:
        data[col] = data[col].fillna(data[col].median())


In [5]:
# Step 5: Encode categorical features
label_encoders = {}
for col in data.columns:
    if data[col].dtype == 'object':
        le = LabelEncoder()
        data[col] = le.fit_transform(data[col])
        label_encoders[col] = le

In [6]:
# Step 6: Split features and target
X = data.drop('Skin_Type', axis=1)  # Replace if your target column is different
y = data['Skin_Type']

In [7]:
# Step 7: Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [8]:
# Step 8: Train Random Forest Classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [9]:
# Step 9: Predict on test set
y_pred = model.predict(X_test)

In [10]:
# Step 10: Evaluate the model
print("Accuracy on test set:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Accuracy on test set: 1.0

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00       106
           1       1.00      1.00      1.00        96
           2       1.00      1.00      1.00       107
           3       1.00      1.00      1.00        91

    accuracy                           1.00       400
   macro avg       1.00      1.00      1.00       400
weighted avg       1.00      1.00      1.00       400



In [11]:
# Step 11: Check training accuracy
y_train_pred = model.predict(X_train)
print("Train Accuracy:", accuracy_score(y_train, y_train_pred))


Train Accuracy: 1.0


In [12]:
# Step 12: Save trained model
with open('skin_type_model.pkl', 'wb') as file:
    pickle.dump(model, file)
print("Model saved as 'skin_type_model.pkl'")


Model saved as 'skin_type_model.pkl'


In [13]:
# Step 13: Save Label Encoders
with open('label_encoders.pkl', 'wb') as file:
    pickle.dump(label_encoders, file)
print("Label encoders saved as 'label_encoders.pkl'")

Label encoders saved as 'label_encoders.pkl'


In [14]:
# Step 14: Save feature columns
feature_columns = X.columns.tolist()
with open('feature_columns.pkl', 'wb') as file:
    pickle.dump(feature_columns, file)
print("Feature columns saved as 'feature_columns.pkl'")

Feature columns saved as 'feature_columns.pkl'
