In [20]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score

In [21]:
def extract_integer_part(string):
    integer_part = ''.join(filter(str.isdigit, string))
    return int(integer_part)

In [22]:
data = pd.read_csv('../Data/fitted_ellipse_parameter.csv')
volume_data = pd.read_csv('../Data/apple_volume_mapper.csv')
data['actual_volume'] = pd.Series(dtype=float)

for index, row in data.iterrows():    
    apple_label = row['apple_label']
    apple_number = extract_integer_part(apple_label)
    apple_volume = volume_data.loc[volume_data['Apple No.'] == apple_number, 'Volume'].values[0]
    data.at[index, 'actual_volume'] = apple_volume

data

Unnamed: 0,apple_label,semi_major_axis,semi_minor_axis,xc,yc,theta,area,perimeter,eccentricity,actual_volume
0,1a.jpg,382.60,325.89,808.68,1460.16,2.02,391706.02,2229.34,0.52,80.0
1,1b.jpg,399.04,363.09,720.77,1344.52,2.28,455177.67,2395.63,0.41,80.0
2,3a.jpg,420.33,404.27,829.28,1584.75,0.77,533844.40,2590.81,0.27,130.0
3,3b.jpg,429.52,410.53,944.84,1594.39,2.91,553959.56,2639.43,0.29,130.0
4,3c.jpg,416.05,396.16,959.69,1544.08,0.71,517808.35,2552.02,0.31,130.0
...,...,...,...,...,...,...,...,...,...,...
1967,200g.jpg,471.73,460.43,967.42,1232.12,0.34,682343.59,2928.56,0.22,130.0
1968,200h.jpg,492.06,453.88,914.85,1225.88,0.37,701630.38,2972.97,0.39,130.0
1969,200i.jpg,496.51,456.95,965.57,1173.64,0.20,712770.83,2996.68,0.39,130.0
1970,200j.jpg,457.88,443.95,947.72,1182.16,1.71,638607.86,2833.35,0.24,130.0


In [23]:
X = data[['semi_major_axis', 'semi_minor_axis', 'area', 'perimeter', 'eccentricity']]
Y = data['actual_volume'].values.reshape(-1,)  
X,Y

(      semi_major_axis  semi_minor_axis       area  perimeter  eccentricity
 0              382.60           325.89  391706.02    2229.34          0.52
 1              399.04           363.09  455177.67    2395.63          0.41
 2              420.33           404.27  533844.40    2590.81          0.27
 3              429.52           410.53  553959.56    2639.43          0.29
 4              416.05           396.16  517808.35    2552.02          0.31
 ...               ...              ...        ...        ...           ...
 1967           471.73           460.43  682343.59    2928.56          0.22
 1968           492.06           453.88  701630.38    2972.97          0.39
 1969           496.51           456.95  712770.83    2996.68          0.39
 1970           457.88           443.95  638607.86    2833.35          0.24
 1971           471.44           461.58  683637.14    2931.26          0.20
 
 [1972 rows x 5 columns],
 array([ 80.,  80., 130., ..., 130., 130., 130.]))

In [24]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
X_test,y_test

(      semi_major_axis  semi_minor_axis       area  perimeter  eccentricity
 752            528.45           489.09  811974.27    3197.89          0.38
 765            442.58           438.94  610306.06    2769.39          0.13
 1656           504.33           493.22  781463.63    3134.01          0.21
 1288           443.08           431.99  601315.60    2749.21          0.22
 1102           436.37           395.96  542807.70    2616.35          0.42
 ...               ...              ...        ...        ...           ...
 1891           476.52           443.37  663750.75    2890.88          0.37
 1359           410.12           384.24  495065.64    2496.22          0.35
 1063           483.13           398.87  605399.51    2777.20          0.56
 332            436.75           418.77  574588.83    2687.99          0.28
 1938           459.23           446.18  643714.03    2844.59          0.24
 
 [395 rows x 5 columns],
 array([200., 150., 175., 150., 105., 100., 230., 100., 170.,

In [26]:
kernel_types = ['linear', 'poly', 'rbf', 'sigmoid']

results = {}
for kernel in kernel_types:
    svr_model = SVR(kernel=kernel)
    svr_model.fit(X_train, y_train)
    
    y_pred = svr_model.predict(X_test)
    
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    results[kernel] = {'mse': mse, 'r2': r2}

print("Kernel Type\t\tMean Squared Error\t\tR-squared")
for kernel, result in results.items():
    print(f"{kernel}\t\t\t{result['mse']}\t\t{result['r2']}")

Kernel Type		Mean Squared Error		R-squared
linear			173035668.40611073		-103665.94875136238
poly			402.08593542158906		0.7591073421743061
rbf			461.49269635528475		0.723516312313665
sigmoid			2649.915206544544		-0.5875837995873445
