In [3]:
import random
import pandas as pd
from sklearn.linear_model import LinearRegression
import numpy as np

class Data:
    # 菜的類型
    vegetables = ["青菜", "蘋果", "西蘭花", "胡蘿蔔", "茄子"]

    # 肉的類型
    meats = ["雞肉", "豬肉", "牛肉", "羊肉", "魚肉"]

    # 飯的類型
    rices = ["白米", "糙米", "炸飯"]

    def __init__(self):
        num_vegetables = random.randint(3, 3)
        num_meats = random.randint(1, 1)
        num_rices = random.randint(1, 1)

        # 隨機選擇菜、肉和飯的組合
        selected_vegetables = random.sample(Data.vegetables, num_vegetables)
        selected_meats = random.sample(Data.meats, num_meats)
        selected_rices = random.sample(Data.rices, num_rices)

        # 計算每個食材的面積
        self.areas = [(item, random.randint(15, 30)) for sublist in [selected_vegetables, selected_meats, selected_rices] for item in sublist]

        # 隨機生成實際價格
        self.actual_price = random.randint(80, 80)

    def __repr__(self):
        return " | ".join([str(self.areas), str(self.actual_price)])
    
    def __iter__(self):
        # iterate over areas and actual_price
        return iter([self.areas, self.actual_price])
    
class Dataset:
    def __init__(self, num_data=100):
        self.data = [Data() for _ in range(num_data)]
        
    def __getitem__(self, index):
        return self.data[index]
    
    def __len__(self):
        return len(self.data)
    
    def __repr__(self):
        return "\n".join([str(item) for item in self.data])
    
    def __iter__(self):
        return iter(self.data)
    
    def AsDataFrame(self):
        df = pd.DataFrame(columns=Data.vegetables + Data.meats + Data.rices + ["實際價格"])
        # 將訓練數據轉換為 DataFrame
        for areas, actual_price in self.data:
            row = dict(areas + [("實際價格", actual_price)])
            df = pd.concat([df, pd.DataFrame([row])], ignore_index=True)
        # nan to 0
        df = df.fillna(0)
        return df
    
class TrainTestPair:
    def __init__(self, train_df, test_df):
        self.train_df = train_df
        self.test_df = test_df
        self.model = self.Train()
        self.ground_truth = self.test_df["實際價格"]
        self.predictions = pd.DataFrame(self.Predict(), columns=["預測價格"])
        self.mape = self.MAPE(self.ground_truth.to_numpy(), self.predictions.to_numpy())

    def Train(self):
        model = LinearRegression(fit_intercept=False)
        model.fit(self.train_df[Data.vegetables + Data.meats + Data.rices], self.train_df["實際價格"])
        return model
    
    def Predict(self):
        return self.model.predict(self.test_df[Data.vegetables + Data.meats + Data.rices])
    
    def MAPE(self, y_true, y_pred):
        return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

In [4]:
random.seed(0)
training_dataset = Dataset(10)
training_dataset

[('西蘭花', 26), ('胡蘿蔔', 21), ('蘋果', 19), ('牛肉', 24), ('糙米', 19)] | 80
[('青菜', 18), ('茄子', 26), ('西蘭花', 28), ('牛肉', 25), ('糙米', 21)] | 80
[('茄子', 15), ('青菜', 30), ('胡蘿蔔', 25), ('羊肉', 22), ('炸飯', 25)] | 80
[('蘋果', 30), ('胡蘿蔔', 18), ('青菜', 24), ('雞肉', 24), ('糙米', 18)] | 80
[('青菜', 24), ('胡蘿蔔', 20), ('蘋果', 21), ('魚肉', 20), ('白米', 16)] | 80
[('蘋果', 27), ('茄子', 23), ('青菜', 22), ('雞肉', 21), ('炸飯', 28)] | 80
[('青菜', 25), ('西蘭花', 21), ('胡蘿蔔', 22), ('雞肉', 15), ('糙米', 23)] | 80
[('西蘭花', 22), ('胡蘿蔔', 16), ('青菜', 17), ('雞肉', 15), ('白米', 18)] | 80
[('西蘭花', 21), ('青菜', 20), ('胡蘿蔔', 18), ('魚肉', 30), ('白米', 21)] | 80
[('西蘭花', 24), ('青菜', 26), ('胡蘿蔔', 28), ('雞肉', 20), ('炸飯', 16)] | 80

In [5]:
train_df = training_dataset.AsDataFrame()
train_df

Unnamed: 0,青菜,蘋果,西蘭花,胡蘿蔔,茄子,雞肉,豬肉,牛肉,羊肉,魚肉,白米,糙米,炸飯,實際價格
0,0,19,26,21,0,0,0,24,0,0,0,19,0,80
1,18,0,28,0,26,0,0,25,0,0,0,21,0,80
2,30,0,0,25,15,0,0,0,22,0,0,0,25,80
3,24,30,0,18,0,24,0,0,0,0,0,18,0,80
4,24,21,0,20,0,0,0,0,0,20,16,0,0,80
5,22,27,0,0,23,21,0,0,0,0,0,0,28,80
6,25,0,21,22,0,15,0,0,0,0,0,23,0,80
7,17,0,22,16,0,15,0,0,0,0,18,0,0,80
8,20,0,21,18,0,0,0,0,0,30,21,0,0,80
9,26,0,24,28,0,20,0,0,0,0,0,0,16,80


In [6]:
testing_dataset = Dataset(5)
testing_dataset

[('蘋果', 20), ('西蘭花', 21), ('茄子', 16), ('羊肉', 20), ('炸飯', 20)] | 80
[('蘋果', 24), ('青菜', 26), ('茄子', 27), ('羊肉', 23), ('炸飯', 19)] | 80
[('青菜', 26), ('西蘭花', 24), ('茄子', 26), ('豬肉', 19), ('糙米', 24)] | 80
[('茄子', 22), ('蘋果', 22), ('西蘭花', 29), ('牛肉', 27), ('白米', 28)] | 80
[('蘋果', 20), ('胡蘿蔔', 29), ('青菜', 30), ('牛肉', 15), ('炸飯', 16)] | 80

In [7]:
test_df = testing_dataset.AsDataFrame()
test_df

Unnamed: 0,青菜,蘋果,西蘭花,胡蘿蔔,茄子,雞肉,豬肉,牛肉,羊肉,魚肉,白米,糙米,炸飯,實際價格
0,0,20,21,0,16,0,0,0,20,0,0,0,20,80
1,26,24,0,0,27,0,0,0,23,0,0,0,19,80
2,26,0,24,0,26,0,19,0,0,0,0,24,0,80
3,0,22,29,0,22,0,0,27,0,0,28,0,0,80
4,30,20,0,29,0,0,0,15,0,0,0,0,16,80


In [8]:
for i in Data.vegetables + Data.meats + Data.rices:
    new_train_df = train_df.copy()
    new_test_df = test_df.copy()
    new_train_df[i] = new_train_df[i] ** 2
    new_test_df[i] = new_test_df[i] ** 2

    pair = TrainTestPair(new_train_df, new_test_df)
    print("食材^2: %s, MAPE: %f" % (i, pair.mape))

食材^2: 青菜, MAPE: 17.815519
食材^2: 蘋果, MAPE: 21.435928
食材^2: 西蘭花, MAPE: 15.845814
食材^2: 胡蘿蔔, MAPE: 15.496174
食材^2: 茄子, MAPE: 15.738464
食材^2: 雞肉, MAPE: 13.586712
食材^2: 豬肉, MAPE: 13.777005
食材^2: 牛肉, MAPE: 13.782777
食材^2: 羊肉, MAPE: 14.003802
食材^2: 魚肉, MAPE: 12.769241
食材^2: 白米, MAPE: 30.395663
食材^2: 糙米, MAPE: 13.421801
食材^2: 炸飯, MAPE: 17.599299
