In [224]:
import random

# 菜的類型
vegetables = ["青菜", "蘋果", "西蘭花", "胡蘿蔔", "茄子"]

# 肉的類型
meats = ["雞肉", "豬肉", "牛肉", "羊肉", "魚肉"]

# 飯的類型
rices = ["白米", "糙米", "炸飯"]

# 生成隨機的訓練數據
def generate_random_sample():
    num_vegetables = random.randint(3, 3)
    num_meats = random.randint(1, 1)
    num_rices = random.randint(1, 1)

    # 隨機選擇菜、肉和飯的組合
    selected_vegetables = random.sample(vegetables, num_vegetables)
    selected_meats = random.sample(meats, num_meats)
    selected_rices = random.sample(rices, num_rices)

    # 計算每個食材的面積
    areas = [(item, random.randint(15, 30)) for sublist in [selected_vegetables, selected_meats, selected_rices] for item in sublist]

    # 隨機生成實際價格
    actual_price = random.randint(80, 80)

    return areas, actual_price

# 生成多個訓練樣本
num_samples = 10
random.seed(0)
training_data = [generate_random_sample() for _ in range(num_samples)]

training_data

[([('西蘭花', 26), ('胡蘿蔔', 21), ('蘋果', 19), ('牛肉', 24), ('糙米', 19)], 80),
 ([('青菜', 18), ('茄子', 26), ('西蘭花', 28), ('牛肉', 25), ('糙米', 21)], 80),
 ([('茄子', 15), ('青菜', 30), ('胡蘿蔔', 25), ('羊肉', 22), ('炸飯', 25)], 80),
 ([('蘋果', 30), ('胡蘿蔔', 18), ('青菜', 24), ('雞肉', 24), ('糙米', 18)], 80),
 ([('青菜', 24), ('胡蘿蔔', 20), ('蘋果', 21), ('魚肉', 20), ('白米', 16)], 80),
 ([('蘋果', 27), ('茄子', 23), ('青菜', 22), ('雞肉', 21), ('炸飯', 28)], 80),
 ([('青菜', 25), ('西蘭花', 21), ('胡蘿蔔', 22), ('雞肉', 15), ('糙米', 23)], 80),
 ([('西蘭花', 22), ('胡蘿蔔', 16), ('青菜', 17), ('雞肉', 15), ('白米', 18)], 80),
 ([('西蘭花', 21), ('青菜', 20), ('胡蘿蔔', 18), ('魚肉', 30), ('白米', 21)], 80),
 ([('西蘭花', 24), ('青菜', 26), ('胡蘿蔔', 28), ('雞肉', 20), ('炸飯', 16)], 80)]

In [225]:
import pandas as pd

df = pd.DataFrame(columns=vegetables + meats + rices + ["實際價格"])

# 將訓練數據轉換為 DataFrame
for areas, actual_price in training_data:
    row = dict(areas + [("實際價格", actual_price)])
    df = pd.concat([df, pd.DataFrame([row])], ignore_index=True)

# nan to 0
df = df.fillna(0)
df

Unnamed: 0,青菜,蘋果,西蘭花,胡蘿蔔,茄子,雞肉,豬肉,牛肉,羊肉,魚肉,白米,糙米,炸飯,實際價格
0,0,19,26,21,0,0,0,24,0,0,0,19,0,80
1,18,0,28,0,26,0,0,25,0,0,0,21,0,80
2,30,0,0,25,15,0,0,0,22,0,0,0,25,80
3,24,30,0,18,0,24,0,0,0,0,0,18,0,80
4,24,21,0,20,0,0,0,0,0,20,16,0,0,80
5,22,27,0,0,23,21,0,0,0,0,0,0,28,80
6,25,0,21,22,0,15,0,0,0,0,0,23,0,80
7,17,0,22,16,0,15,0,0,0,0,18,0,0,80
8,20,0,21,18,0,0,0,0,0,30,21,0,0,80
9,26,0,24,28,0,20,0,0,0,0,0,0,16,80


In [226]:
# apply linear regression
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
import numpy as np

# 生成測試數據
test_data = [generate_random_sample() for _ in range(5)]

# 將測試數據轉換為 DataFrame
df_test = pd.DataFrame(columns=vegetables + meats + rices + ["實際價格"])
for areas, actual_price in test_data:
    row = dict(areas + [("實際價格", actual_price)])
    df_test = pd.concat([df_test, pd.DataFrame([row])], ignore_index=True)
df_test = df_test.fillna(0)

# Polynomial Regression
for col in df.columns:
    if col == "實際價格":
        continue
    new_df = df.copy()
    new_df[col] = new_df[col] ** 2
    new_df.rename(columns={col: col + "^2"}, inplace=True)

    model = LinearRegression(fit_intercept=False)
    model.fit(new_df.drop("實際價格", axis=1), new_df["實際價格"])
    # 預測測試數據的價格
    new_test_df = df_test.copy()
    new_test_df[col] = new_test_df[col] ** 2
    new_test_df.rename(columns={col: col + "^2"}, inplace=True)
    new_test_df["預測價格"] = model.predict(new_test_df.drop("實際價格", axis=1))
    display(new_test_df)

Unnamed: 0,青菜^2,蘋果,西蘭花,胡蘿蔔,茄子,雞肉,豬肉,牛肉,羊肉,魚肉,白米,糙米,炸飯,實際價格,預測價格
0,0,20,21,0,16,0,0,0,20,0,0,0,20,80,73.408517
1,676,24,0,0,27,0,0,0,23,0,0,0,19,80,86.299484
2,676,0,24,0,26,0,19,0,0,0,0,24,0,80,89.352115
3,0,22,29,0,22,0,0,27,0,0,28,0,0,80,125.649609
4,900,20,0,29,0,0,0,15,0,0,0,0,16,80,83.369386


Unnamed: 0,青菜,蘋果^2,西蘭花,胡蘿蔔,茄子,雞肉,豬肉,牛肉,羊肉,魚肉,白米,糙米,炸飯,實際價格,預測價格
0,0,400,21,0,16,0,0,0,20,0,0,0,20,80,55.641141
1,26,576,0,0,27,0,0,0,23,0,0,0,19,80,71.366744
2,26,0,24,0,26,0,19,0,0,0,0,24,0,80,86.1296
3,0,484,29,0,22,0,0,27,0,0,28,0,0,80,116.411002
4,30,400,0,29,0,0,0,15,0,0,0,0,16,80,90.210997


Unnamed: 0,青菜,蘋果,西蘭花^2,胡蘿蔔,茄子,雞肉,豬肉,牛肉,羊肉,魚肉,白米,糙米,炸飯,實際價格,預測價格
0,0,20,441,0,16,0,0,0,20,0,0,0,20,80,74.831219
1,26,24,0,0,27,0,0,0,23,0,0,0,19,80,85.003584
2,26,0,576,0,26,0,19,0,0,0,0,24,0,80,89.511217
3,0,22,841,0,22,0,0,27,0,0,28,0,0,80,118.654502
4,30,20,0,29,0,0,0,15,0,0,0,0,16,80,74.954827


Unnamed: 0,青菜,蘋果,西蘭花,胡蘿蔔^2,茄子,雞肉,豬肉,牛肉,羊肉,魚肉,白米,糙米,炸飯,實際價格,預測價格
0,0,20,21,0,16,0,0,0,20,0,0,0,20,80,77.724788
1,26,24,0,0,27,0,0,0,23,0,0,0,19,80,88.362679
2,26,0,24,0,26,0,19,0,0,0,0,24,0,80,86.877889
3,0,22,29,0,22,0,0,27,0,0,28,0,0,80,122.767465
4,30,20,0,841,0,0,0,15,0,0,0,0,16,80,81.701451


Unnamed: 0,青菜,蘋果,西蘭花,胡蘿蔔,茄子^2,雞肉,豬肉,牛肉,羊肉,魚肉,白米,糙米,炸飯,實際價格,預測價格
0,0,20,21,0,256,0,0,0,20,0,0,0,20,80,70.691716
1,26,24,0,0,729,0,0,0,23,0,0,0,19,80,90.689231
2,26,0,24,0,676,0,19,0,0,0,0,24,0,80,88.336865
3,0,22,29,0,484,0,0,27,0,0,28,0,0,80,113.426179
4,30,20,0,29,0,0,0,15,0,0,0,0,16,80,78.806704


Unnamed: 0,青菜,蘋果,西蘭花,胡蘿蔔,茄子,雞肉^2,豬肉,牛肉,羊肉,魚肉,白米,糙米,炸飯,實際價格,預測價格
0,0,20,21,0,16,0,0,0,20,0,0,0,20,80,75.284843
1,26,24,0,0,27,0,0,0,23,0,0,0,19,80,87.730782
2,26,0,24,0,26,0,19,0,0,0,0,24,0,80,86.084285
3,0,22,29,0,22,0,0,27,0,0,28,0,0,80,114.607331
4,30,20,0,29,0,0,0,15,0,0,0,0,16,80,81.209293


Unnamed: 0,青菜,蘋果,西蘭花,胡蘿蔔,茄子,雞肉,豬肉^2,牛肉,羊肉,魚肉,白米,糙米,炸飯,實際價格,預測價格
0,0,20,21,0,16,0,0,0,20,0,0,0,20,80,70.640903
1,26,24,0,0,27,0,0,0,23,0,0,0,19,80,83.584406
2,26,0,24,0,26,0,361,0,0,0,0,24,0,80,85.097512
3,0,22,29,0,22,0,0,27,0,0,28,0,0,80,114.923217
4,30,20,0,29,0,0,0,15,0,0,0,0,16,80,82.143791


Unnamed: 0,青菜,蘋果,西蘭花,胡蘿蔔,茄子,雞肉,豬肉,牛肉^2,羊肉,魚肉,白米,糙米,炸飯,實際價格,預測價格
0,0,20,21,0,16,0,0,0,20,0,0,0,20,80,71.112612
1,26,24,0,0,27,0,0,0,23,0,0,0,19,80,83.93552
2,26,0,24,0,26,0,19,0,0,0,0,24,0,80,85.419938
3,0,22,29,0,22,0,0,729,0,0,28,0,0,80,114.957782
4,30,20,0,29,0,0,0,225,0,0,0,0,16,80,81.930478


Unnamed: 0,青菜,蘋果,西蘭花,胡蘿蔔,茄子,雞肉,豬肉,牛肉,羊肉^2,魚肉,白米,糙米,炸飯,實際價格,預測價格
0,0,20,21,0,16,0,0,0,400,0,0,0,20,80,71.710453
1,26,24,0,0,27,0,0,0,529,0,0,0,19,80,85.125317
2,26,0,24,0,26,0,19,0,0,0,0,24,0,80,85.723953
3,0,22,29,0,22,0,0,27,0,0,28,0,0,80,114.921496
4,30,20,0,29,0,0,0,15,0,0,0,0,16,80,81.954896


Unnamed: 0,青菜,蘋果,西蘭花,胡蘿蔔,茄子,雞肉,豬肉,牛肉,羊肉,魚肉^2,白米,糙米,炸飯,實際價格,預測價格
0,0,20,21,0,16,0,0,0,20,0,0,0,20,80,73.616297
1,26,24,0,0,27,0,0,0,23,0,0,0,19,80,84.740651
2,26,0,24,0,26,0,19,0,0,0,0,24,0,80,84.35668
3,0,22,29,0,22,0,0,27,0,0,28,0,0,80,114.629296
4,30,20,0,29,0,0,0,15,0,0,0,0,16,80,79.033367


Unnamed: 0,青菜,蘋果,西蘭花,胡蘿蔔,茄子,雞肉,豬肉,牛肉,羊肉,魚肉,白米^2,糙米,炸飯,實際價格,預測價格
0,0,20,21,0,16,0,0,0,20,0,0,0,20,80,53.454161
1,26,24,0,0,27,0,0,0,23,0,0,0,19,80,75.941954
2,26,0,24,0,26,0,19,0,0,0,0,24,0,80,85.841223
3,0,22,29,0,22,0,0,27,0,0,784,0,0,80,146.057736
4,30,20,0,29,0,0,0,15,0,0,0,0,16,80,99.079807


Unnamed: 0,青菜,蘋果,西蘭花,胡蘿蔔,茄子,雞肉,豬肉,牛肉,羊肉,魚肉,白米,糙米^2,炸飯,實際價格,預測價格
0,0,20,21,0,16,0,0,0,20,0,0,0,20,80,68.514565
1,26,24,0,0,27,0,0,0,23,0,0,0,19,80,82.449557
2,26,0,24,0,26,0,19,0,0,0,0,576,0,80,80.290658
3,0,22,29,0,22,0,0,27,0,0,28,0,0,80,116.424125
4,30,20,0,29,0,0,0,15,0,0,0,0,16,80,83.037428


Unnamed: 0,青菜,蘋果,西蘭花,胡蘿蔔,茄子,雞肉,豬肉,牛肉,羊肉,魚肉,白米,糙米,炸飯^2,實際價格,預測價格
0,0,20,21,0,16,0,0,0,20,0,0,0,400,80,56.559583
1,26,24,0,0,27,0,0,0,23,0,0,0,361,80,72.974712
2,26,0,24,0,26,0,19,0,0,0,0,24,0,80,76.135859
3,0,22,29,0,22,0,0,27,0,0,28,0,0,80,109.823644
4,30,20,0,29,0,0,0,15,0,0,0,0,256,80,86.243705


In [227]:
# apply linear regression
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
import numpy as np

model = LinearRegression(fit_intercept=False) # otherwise, intercept is 80...
model.fit(df.drop("實際價格", axis=1), df["實際價格"])
model.coef_

array([ 8.48404154e-01,  1.00811188e+00,  9.97234991e-01,  8.73602960e-01,
        7.11786167e-01, -7.41292547e-02, -6.66133815e-16,  1.10258825e-02,
        2.18060458e-01, -3.17861024e-01,  1.70956950e+00,  8.58288481e-01,
        6.89347153e-01])

In [228]:
# 生成測試數據
test_data = [generate_random_sample() for _ in range(5)]

# 將測試數據轉換為 DataFrame
df_test = pd.DataFrame(columns=vegetables + meats + rices + ["實際價格"])
for areas, actual_price in test_data:
    row = dict(areas + [("實際價格", actual_price)])
    df_test = pd.concat([df_test, pd.DataFrame([row])], ignore_index=True)
df_test = df_test.fillna(0)

df_test

Unnamed: 0,青菜,蘋果,西蘭花,胡蘿蔔,茄子,雞肉,豬肉,牛肉,羊肉,魚肉,白米,糙米,炸飯,實際價格
0,17,0,0,19,15,0,0,0,0,27,0,0,28,80
1,21,0,0,23,24,20,0,0,0,0,0,0,18,80
2,19,0,26,18,0,0,0,23,0,0,15,0,0,80
3,0,28,29,0,30,0,0,0,0,26,20,0,0,80
4,0,26,0,25,17,0,0,16,0,0,0,16,0,80


In [229]:
# 預測測試數據的價格
df_test["預測價格"] = model.predict(df_test.drop("實際價格", axis=1))
df_test

Unnamed: 0,青菜,蘋果,西蘭花,胡蘿蔔,茄子,雞肉,豬肉,牛肉,羊肉,魚肉,白米,糙米,炸飯,實際價格,預測價格
0,17,0,0,19,15,0,0,0,0,27,0,0,28,80,52.417592
1,21,0,0,23,24,20,0,0,0,0,0,0,18,80,65.917887
2,19,0,26,18,0,0,0,23,0,0,15,0,0,80,83.66978
3,0,28,29,0,30,0,0,0,0,26,20,0,0,80,104.427536
4,0,26,0,25,17,0,0,16,0,0,0,16,0,80,74.060378


In [230]:
import pandas as pd

labels_df = pd.read_excel("./標籤集合.xlsx")
prices_df = pd.read_excel("./圖片的價格1121.xlsx")

labels_df.head()

Unnamed: 0,副菜,飯,容器,主菜
0,side dish(副菜) 10,white rice(白飯) 20,plate(餐盤),grilled mackerel(烤鯖魚) 30
1,,purple rice(紫米) 20,box(便當盒),salmon(鮭魚) 40
2,,brown rice(糙米) 20,,braised fish fillet(紅燒水晶魚) 30
3,,,,steamed fish fillet(清蒸水晶魚) 30
4,,,,sweet and spicy fried pork chops(甜辣豬排) 25


In [231]:
prices_df.head()

Unnamed: 0,filename,price,fair price,Equal,Unnamed: 4,Unnamed: 5
0,2023_10_27_1,90.0,90.0,0.0,1.0,-25.0
1,2023_10_30_1,85.0,95.0,-10.0,5.0,-20.0
2,2023_10_30_2,85.0,75.0,10.0,3.0,-15.0
3,2023_10_30_3,75.0,75.0,0.0,8.0,-10.0
4,2023_10_30_4,75.0,75.0,0.0,17.0,-5.0


In [232]:
side_dishes = labels_df["副菜"].dropna().unique().tolist()
rices = labels_df["飯"].dropna().unique().tolist()
containers = labels_df["容器"].dropna().unique().tolist()
main_dishes = labels_df["主菜"].dropna().unique().tolist()
main_dishes.append("sauteed pork")

In [233]:
print(side_dishes)
print(rices)
print(containers)
print(main_dishes)

['side dish(副菜) 10']
['white rice(白飯) 20', 'purple rice(紫米) 20', 'brown rice(糙米) 20']
['plate(餐盤)', 'box(便當盒)']
['grilled mackerel(烤鯖魚) 30', 'salmon(鮭魚) 40', 'braised fish fillet(紅燒水晶魚) 30', 'steamed fish fillet(清蒸水晶魚) 30', 'sweet and spicy fried pork chops(甜辣豬排) 25', 'cuttlefish steak(花枝排) 25', 'fried cod steak(鱈魚排) 25', 'sweet and spicy chicken steak(甜辣雞排) 25', 'chicken steak(雞排) 25', 'fried pork chops(炸豬排) 25', 'braised pork ribs(滷排骨) 25', 'grilled chicken leg steak(烤雞腿排) 25', 'honey grilled chicken leg(蜜汁棒腿) 25', 'grilled chicken steak(香烤雞排) 25', 'big fried chicken leg(炸雞腿大) 40', 'small fried chicken leg(炸雞腿小) 25', 'big grilled chicken leg(烤雞腿大) 40', 'small grilled chicken leg(烤雞腿小) 25', 'stir-fried minced pork(打拋豬) 25', 'pork with scrambled eggs(滑蛋豬肉) 25', 'sauteed pork (醬燒豬肉) 25', 'garlic white meat(蒜泥白肉) 25', 'braised pork(滷豬肉) 25', 'curry chicken(咖哩雞) 25', 'spicy chicken(辣雞丁) 25', 'three cup chicken(三杯雞) 25', 'scallion chicken(蔥油雞) 25', 'sausage(香腸) 25', 'kara chicken leg steak

In [234]:
side_dishes = [item.split('(')[0] for item in side_dishes]
rices = [item.split('(')[0] for item in rices]
containers = [item.split('(')[0] for item in containers]
main_dishes = [item.split('(')[0] for item in main_dishes]

In [235]:
print(side_dishes)
print(rices)
print(containers)
print(main_dishes)

['side dish']
['white rice', 'purple rice', 'brown rice']
['plate', 'box']
['grilled mackerel', 'salmon', 'braised fish fillet', 'steamed fish fillet', 'sweet and spicy fried pork chops', 'cuttlefish steak', 'fried cod steak', 'sweet and spicy chicken steak', 'chicken steak', 'fried pork chops', 'braised pork ribs', 'grilled chicken leg steak', 'honey grilled chicken leg', 'grilled chicken steak', 'big fried chicken leg', 'small fried chicken leg', 'big grilled chicken leg', 'small grilled chicken leg', 'stir-fried minced pork', 'pork with scrambled eggs', 'sauteed pork ', 'garlic white meat', 'braised pork', 'curry chicken', 'spicy chicken', 'three cup chicken', 'scallion chicken', 'sausage', 'kara chicken leg steak', 'fried spanish mackerel', 'fried chicken leg steak', 'lion head', 'big herbal chicken leg', 'fried fish fillet', 'braised fish', 'small herbal chicken leg', 'pig ear', 'fried shishamo', 'herbal chicken', 'shrimp rolls', 'pan fried fish fillet', 'shacha pork', 'thai pork'

In [236]:
import yaml

with open("./final project.v9i.yolov7pytorch/data.yaml", "r") as stream:
    try:
        data = yaml.safe_load(stream)
        print(data["nc"])
        id_to_name = data["names"]
    except yaml.YAMLError as exc:
        print(exc)


49


In [237]:
import matplotlib.pyplot as plt
import matplotlib.patches as patches

def draw_bounding_box(x_center, y_center, width, height):
    # Calculate coordinates of the bounding box
    x_min = x_center - width / 2
    y_min = y_center - height / 2

    # Create figure and axis
    fig, ax = plt.subplots()

    # Create a rectangle patch
    rect = patches.Rectangle((x_min, y_min), width, height, linewidth=1, edgecolor='r', facecolor='none')

    # Add the rectangle to the axis
    ax.add_patch(rect)

    # Set axis limits
    #ax.set_xlim(x_min, x_min + width)
    #ax.set_ylim(y_min, y_min + height)
    ax.invert_yaxis()

    # Show the plot
    plt.show()


In [238]:
def calculate_area(width, height):
    return width * height

#print(len(side_dishes + rices + containers + main_dishes))
#print("sauted pork" in side_dishes + rices + containers + main_dishes)
col = side_dishes + rices + containers + main_dishes + ["side_dishes_n", "price", "fair price"]
training_data = pd.DataFrame(columns=col)
#display(training_data)

import os
for fileid, filename in enumerate(os.listdir("./final project.v9i.yolov7pytorch/train/labels")):
    #print(filename)

    df = pd.read_csv("./final project.v9i.yolov7pytorch/train/labels/" + filename, sep=" ", header=None, 
                     names=["class_id", "x_center", "y_center", "width", "height"])
    #display(df)
    df["class_name"] = df["class_id"].apply(lambda x: data["names"][x])
    #display(df)
    df["area"] = df.apply(lambda x: calculate_area(x["width"], x["height"]), axis=1)
    #display(df)
    # if has container, calculate the ratio
    if df[df["class_name"].isin(containers)]["area"].sum() != 0:
        df["area_ratio"] = df["area"] / df[df["class_name"].isin(containers)]["area"].sum()
    else: # else calculate the ratio of all
        df["area_ratio"] = df["area"] / df["area"].sum()
    #display(df)
    
    # get the price
    price = prices_df[prices_df["filename"] == filename.split("_jpg")[0]]["price"].values[0]
    
        
    # get the fair price
    fair_price = prices_df[prices_df["filename"] == filename.split("_jpg")[0]]["fair price"].values[0]
    
    # check if price is nan
    if pd.isna(price):
        price = fair_price
        
    #print(price)
    #print(fair_price)

    #display(training_data)

    dic = {}
    for index, row in df.iterrows():
        dic[row["class_name"]] = dic.get(row["class_name"], 0) + row["area_ratio"]
        # if(fileid == 606 or fileid == 607 or fileid == 608):
        #     display(row)
    
    dic["side_dishes_n"] = len(df[df["class_name"].isin(side_dishes)])
    dic["price"] = price
    dic["fair price"] = fair_price
    
    # if(fileid == 606 or fileid == 607 or fileid == 608):
    #     print(filename)
    #     print(dic)
    
    # concat, first future warning empty
    training_data = pd.concat([training_data, pd.DataFrame([dic])], ignore_index=True).fillna(0)
    
display(training_data.head())
# locate inf
training_data[(training_data == np.inf) | (training_data == -np.inf)].dropna(how='all')

  training_data = pd.concat([training_data, pd.DataFrame([dic])], ignore_index=True).fillna(0)


Unnamed: 0,side dish,white rice,purple rice,brown rice,plate,box,grilled mackerel,salmon,braised fish fillet,steamed fish fillet,...,pan fried fish fillet,shacha pork,thai pork,white sauce chicken,fried chicken rolls,unknown square main dish,sauteed pork,side_dishes_n,price,fair price
0,0.316974,0.0,0.0,0.11776,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3,130.0,80.0
1,0.316974,0.0,0.0,0.11776,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3,130.0,80.0
2,0.317202,0.0,0.0,0.117985,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3,130.0,80.0
3,0.362907,0.139174,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3,75.0,75.0
4,0.363115,0.139414,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3,75.0,75.0


Unnamed: 0,side dish,white rice,purple rice,brown rice,plate,box,grilled mackerel,salmon,braised fish fillet,steamed fish fillet,...,pan fried fish fillet,shacha pork,thai pork,white sauce chicken,fried chicken rolls,unknown square main dish,sauteed pork,side_dishes_n,price,fair price


In [239]:
# apply linear regression
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
import numpy as np

model = LinearRegression(fit_intercept=False) # otherwise, intercept is 80...
model.fit(training_data.drop(["price", "fair price"], axis=1), training_data["price"])
model.coef_

array([ 9.20155345e+00, -2.12590582e+01, -1.47868113e+01, -2.94990416e-01,
        4.25757570e+01,  3.26299237e+01,  8.69814168e+01,  1.41809264e+02,
        7.45315257e+01,  6.31020182e+01,  1.67688086e-12,  1.09466089e+02,
        5.68543741e+01,  1.17942626e+02,  7.57796035e+01,  4.33471191e+01,
       -5.32907052e-14,  8.64758589e+01,  7.84195168e+01,  7.13262074e+01,
        1.43356245e+02,  7.88008443e+01,  7.86462986e+01,  1.11875828e+02,
        9.43140301e+01,  1.02077896e+02, -2.27373675e-13,  9.86157105e+01,
        7.65454728e+01,  9.10939942e+01,  7.47949025e+01,  7.63811230e+01,
        1.01280594e+02,  1.76477588e+02,  6.81441701e+01,  6.42112726e+01,
        1.42108547e-13,  1.70107544e+02,  1.25581601e+02, -4.26325641e-14,
        1.41778174e+02,  5.16080609e+01,  6.39939536e+01,  8.19702104e+01,
        3.48004122e+01,  5.30684332e+01,  1.83154136e+02,  7.44742879e+01,
        6.56576231e+01,  1.09128738e+02,  1.19930681e+02,  0.00000000e+00,
        9.31028868e+01,  