1. Import Libraries & Upload Dataset

In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
import joblib
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier

# Upload dataset manually
from google.colab import files
uploaded = files.upload()


Saving air_quality_dataset.csv to air_quality_dataset.csv
Saving fruit_classification_dataset.csv to fruit_classification_dataset.csv


2. Load Datasets

In [2]:
# Load Air Quality Dataset
air_quality_df = pd.read_csv("air_quality_dataset.csv")
print("Air Quality Dataset:")
print(air_quality_df.head())

# Load Fruit Classification Dataset
fruit_df = pd.read_csv("fruit_classification_dataset.csv")
print("Fruit Classification Dataset:")
print(fruit_df.head())

Air Quality Dataset:
   PM2.5   PM10  Temperature  Humidity  Wind_Speed Air_Quality
0  112.0   42.0        31.72     37.96       14.75        Poor
1  189.0  269.0        30.12     64.05       10.24        Poor
2  102.0   29.0        32.23     71.37       13.60        Poor
3   24.0  265.0        20.36     78.04         NaN        Good
4  116.0    NaN        37.44     42.01        1.70        Poor
Fruit Classification Dataset:
   Weight (g)  Length (cm)  Circumference (cm)  Color Fruit_Type
0       183.0        12.45               12.04    2.0     Orange
1       155.0         9.26               10.17    NaN      Apple
2       276.0         7.01               18.67    0.0     Orange
3       240.0        14.44               11.85    2.0     Orange
4       288.0         5.81               24.97    1.0     Banana


3. Data Preprocessing

In [9]:
print("Columns in Air Quality Dataset:", air_quality_df.columns)
print("Columns in Fruit Classification Dataset:", fruit_df.columns)

# Encode fruit labels
label_encoder = LabelEncoder()
fruit_df["Label"] = label_encoder.fit_transform(fruit_df["Label"])  # ใช้ "Label" แทน "Fruit_Type"
joblib.dump(label_encoder, "fruit_label_encoder.pkl")

# Scale data
scaler_air = StandardScaler()
X_air = scaler_air.fit_transform(air_quality_df.drop("Air_Quality", axis=1))
y_air = air_quality_df["Air_Quality"]
joblib.dump(scaler_air, "air_quality_scaler.pkl")

scaler_fruit = StandardScaler()
X_fruit = scaler_fruit.fit_transform(fruit_df.drop("Label", axis=1))  # เปลี่ยนจาก "Fruit_Type" เป็น "Label"
y_fruit = fruit_df["Label"]
joblib.dump(scaler_fruit, "fruit_scaler.pkl")

# Split datasets
X_train_air, X_test_air, y_train_air, y_test_air = train_test_split(X_air, y_air, test_size=0.2, random_state=42)
X_train_fruit, X_test_fruit, y_train_fruit, y_test_fruit = train_test_split(X_fruit, y_fruit, test_size=0.2, random_state=42)


Columns in Air Quality Dataset: Index(['PM2.5', 'PM10', 'Temperature', 'Humidity', 'Wind_Speed',
       'Air_Quality'],
      dtype='object')
Columns in Fruit Classification Dataset: Index(['Weight (g)', 'Length (cm)', 'Circumference (cm)', 'Color', 'Label'], dtype='object')


4. Train Models

In [17]:
#ตรวจสอบค่า NaN ก่อนทำการเทรน (ใช้ np.isnan แทน isnull)
print("Checking NaN in X_train_air before training:", np.isnan(X_train_air).sum())
print("Checking NaN in X_train_fruit before training:", np.isnan(X_train_fruit).sum())

#แก้ไขค่า NaN ด้วยการเติมค่าเฉลี่ย
X_train_air = np.nan_to_num(X_train_air, nan=np.nanmean(X_train_air))
X_test_air = np.nan_to_num(X_test_air, nan=np.nanmean(X_test_air))

X_train_fruit = np.nan_to_num(X_train_fruit, nan=np.nanmean(X_train_fruit))
X_test_fruit = np.nan_to_num(X_test_fruit, nan=np.nanmean(X_test_fruit))

#ตรวจสอบอีกครั้งหลังเติมค่า
print("After filling NaN in X_train_air:", np.isnan(X_train_air).sum())
print("After filling NaN in X_train_fruit:", np.isnan(X_train_fruit).sum())

# Train SVM Model for Air Quality Classification
svm_model = SVC(probability=True, random_state=42)
svm_model.fit(X_train_air, y_train_air)
joblib.dump(svm_model, "air_quality_svm_model.pkl")
print("SVM Model Training Complete!")

#Train Random Forest Model for Air Quality Classification
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train_air, y_train_air)
joblib.dump(rf_model, "air_quality_rf_model.pkl")
print("Random Forest Model Training Complete!")

#Train MLP Neural Network Model for Fruit Classification
mlp_model = MLPClassifier(hidden_layer_sizes=(100,), max_iter=500, random_state=42)
mlp_model.fit(X_train_fruit, y_train_fruit)
joblib.dump(mlp_model, "fruit_mlp_model.pkl")
print("MLP Model Training Complete!")


Checking NaN in X_train_air before training: 0
Checking NaN in X_train_fruit before training: 0
After filling NaN in X_train_air: 0
After filling NaN in X_train_fruit: 0
SVM Model Training Complete!
Random Forest Model Training Complete!
MLP Model Training Complete!




In [None]:
5. Download Trained Models

In [18]:
# Download trained models
files.download("air_quality_svm_model.pkl")
files.download("air_quality_rf_model.pkl")
files.download("fruit_mlp_model.pkl")
files.download("air_quality_scaler.pkl")
files.download("fruit_scaler.pkl")
files.download("fruit_label_encoder.pkl")

print("Model Training Complete! Models are available for download.")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Model Training Complete! Models are available for download.
